diff --git a/.github/workflows/build-cloudberry.yml b/.github/workflows/build-cloudberry.yml index c00dcde0486..0d76fa0da51 100644 --- a/.github/workflows/build-cloudberry.yml +++ b/.github/workflows/build-cloudberry.yml @@ -302,6 +302,7 @@ jobs: "contrib/formatter_fixedwidth:installcheck", "contrib/hstore:installcheck", "contrib/indexscan:installcheck", + "contrib/interconnect:installcheck", "contrib/pg_trgm:installcheck", "contrib/indexscan:installcheck", "contrib/pgcrypto:installcheck", diff --git a/contrib/btree_gist/btree_utils_var.c b/contrib/btree_gist/btree_utils_var.c index 0c0e952f736..63137c5c85c 100644 --- a/contrib/btree_gist/btree_utils_var.c +++ b/contrib/btree_gist/btree_utils_var.c @@ -116,36 +116,47 @@ gbt_var_leaf2node(GBT_VARKEY *leaf, const gbtree_vinfo *tinfo, FmgrInfo *flinfo) /* * returns the common prefix length of a node key + * + * If the underlying type is character data, the prefix length may point in + * the middle of a multibyte character. */ static int32 gbt_var_node_cp_len(const GBT_VARKEY *node, const gbtree_vinfo *tinfo) { GBT_VARKEY_R r = gbt_var_key_readable(node); int32 i = 0; - int32 l = 0; + int32 l_left_to_match = 0; + int32 l_total = 0; int32 t1len = VARSIZE(r.lower) - VARHDRSZ; int32 t2len = VARSIZE(r.upper) - VARHDRSZ; int32 ml = Min(t1len, t2len); char *p1 = VARDATA(r.lower); char *p2 = VARDATA(r.upper); + const char *end1 = p1 + t1len; + const char *end2 = p2 + t2len; if (ml == 0) return 0; while (i < ml) { - if (tinfo->eml > 1 && l == 0) + if (tinfo->eml > 1 && l_left_to_match == 0) { - if ((l = pg_mblen(p1)) != pg_mblen(p2)) + l_total = pg_mblen_range(p1, end1); + if (l_total != pg_mblen_range(p2, end2)) { return i; } + l_left_to_match = l_total; } if (*p1 != *p2) { if (tinfo->eml > 1) { - return (i - l + 1); + int32 l_matched_subset = l_total - l_left_to_match; + + /* end common prefix at final byte of last matching char */ + return i - l_matched_subset; } else { @@ -155,7 +166,7 @@ gbt_var_node_cp_len(const GBT_VARKEY *node, const gbtree_vinfo *tinfo) p1++; p2++; - l--; + l_left_to_match--; i++; } return ml; /* lower == upper */ diff --git a/contrib/dict_xsyn/dict_xsyn.c b/contrib/dict_xsyn/dict_xsyn.c index e538928aba4..19db56f0352 100644 --- a/contrib/dict_xsyn/dict_xsyn.c +++ b/contrib/dict_xsyn/dict_xsyn.c @@ -48,15 +48,15 @@ find_word(char *in, char **end) char *start; *end = NULL; - while (*in && t_isspace(in)) - in += pg_mblen(in); + while (*in && t_isspace_cstr(in)) + in += pg_mblen_cstr(in); if (!*in || *in == '#') return NULL; start = in; - while (*in && !t_isspace(in)) - in += pg_mblen(in); + while (*in && !t_isspace_cstr(in)) + in += pg_mblen_cstr(in); *end = in; diff --git a/contrib/hstore/hstore_io.c b/contrib/hstore/hstore_io.c index 999ddad76d9..08c1b216aa2 100644 --- a/contrib/hstore/hstore_io.c +++ b/contrib/hstore/hstore_io.c @@ -64,7 +64,7 @@ prssyntaxerror(HSParser *state) errsave(state->escontext, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("syntax error in hstore, near \"%.*s\" at position %d", - pg_mblen(state->ptr), state->ptr, + pg_mblen_cstr(state->ptr), state->ptr, (int) (state->ptr - state->begin)))); /* In soft error situation, return false as convenience for caller */ return false; diff --git a/contrib/intarray/_int_selfuncs.c b/contrib/intarray/_int_selfuncs.c index d4793b0b638..015649ab334 100644 --- a/contrib/intarray/_int_selfuncs.c +++ b/contrib/intarray/_int_selfuncs.c @@ -19,6 +19,7 @@ #include "catalog/pg_operator.h" #include "catalog/pg_statistic.h" #include "catalog/pg_type.h" +#include "commands/extension.h" #include "miscadmin.h" #include "utils/builtins.h" #include "utils/lsyscache.h" @@ -171,7 +172,18 @@ _int_matchsel(PG_FUNCTION_ARGS) PG_RETURN_FLOAT8(0.0); } - /* The caller made sure the const is a query, so get it now */ + /* + * Verify that the Const is a query_int, else return a default estimate. + * (This could only fail if someone attached this estimator to the wrong + * operator.) + */ + if (((Const *) other)->consttype != + get_function_sibling_type(fcinfo->flinfo->fn_oid, "query_int")) + { + ReleaseVariableStats(vardata); + PG_RETURN_FLOAT8(DEFAULT_EQ_SEL); + } + query = DatumGetQueryTypeP(((Const *) other)->constvalue); /* Empty query matches nothing */ diff --git a/contrib/interconnect/Makefile b/contrib/interconnect/Makefile index 31489fa9148..5bb0cd188e4 100644 --- a/contrib/interconnect/Makefile +++ b/contrib/interconnect/Makefile @@ -7,6 +7,10 @@ include $(top_builddir)/contrib/interconnect/Makefile.interconnect MODULE_big = interconnect PGFILEDESC = "interconnect - inter connection module" +EXTENSION = interconnect +EXTENSION_VERSION = 1.0 +DATA = interconnect--$(EXTENSION_VERSION).sql + OBJS = \ $(WIN32RES) \ ic_common.o \ @@ -33,6 +37,8 @@ OBJS += proxy/ic_proxy_iobuf.o SHLIB_LINK += $(filter -luv, $(LIBS)) endif # enable_ic_proxy +REGRESS = interconnect + ifdef USE_PGXS PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) diff --git a/contrib/interconnect/README.md b/contrib/interconnect/README.md index fd9615d89c9..5c47d1f713a 100644 --- a/contrib/interconnect/README.md +++ b/contrib/interconnect/README.md @@ -271,3 +271,35 @@ udpifc result: Notice that: Lower TPS does not mean the protocol is slower, might means that the cpu time taken by the protocol is low. For the udpifc, it satisfies the highest tps required by `cbdb`. at the same time it occupies a lower cpu than other types of interconnect. +# interconnect statistics + +This extension provides cumulative interconnect statistics for Cloudberry Database, including queue sizes, buffer usage, retransmits, packet errors, and other UDPIFC‑related metrics. + +It exposes three views with statistics at different aggregation levels: +gp_interconnect_stats — total cluster‑wide stats; +gp_interconnect_stats_per_segment — stats per segment; +gp_interconnect_stats_per_host — stats grouped by host. + +## How to create the extension + +Add interconnect to shared_preload_libraries and restart the cluster. + +``` +gpconfig -c shared_preload_libraries -v \ + "$(psql -At -c \ + "SELECT array_to_string( \ + array_append( \ + string_to_array( \ + current_setting('shared_preload_libraries'), \ + ','), \ + 'interconnect'), \ + ',')" \ + postgres)" +gpstop -ra +``` + +Create the extension in your database. + +``` +CREATE EXTENSION interconnect; +``` diff --git a/contrib/interconnect/expected/interconnect.out b/contrib/interconnect/expected/interconnect.out new file mode 100644 index 00000000000..924a532e180 --- /dev/null +++ b/contrib/interconnect/expected/interconnect.out @@ -0,0 +1,82 @@ +-- Capture current interconnect stats as baseline for future comparisons +SELECT * FROM gp_interconnect_stats \gset prev_ +-- Verify that all baseline interconnect statistics are >= 0 (no negative values) +SELECT + :prev_total_recv_queue_size >= 0, + :prev_recv_queue_conting_time >= 0, + :prev_total_capacity >= 0, + :prev_capacity_counting_time >= 0, + :prev_total_buffers >= 0, + :prev_buffer_counting_time >= 0, + :prev_retransmits >= 0, + :prev_startup_cached_pkts >= 0, + :prev_mismatches >= 0, + :prev_crs_errors >= 0, + :prev_snd_pkt_num >= 0, + :prev_recv_pkt_num >= 0, + :prev_disordered_pkt_num >= 0, + :prev_duplicate_pkt_num >= 0, + :prev_recv_ack_num >= 0, + :prev_status_query_msg_num >= 0; + ?column? | ?column? | ?column? | ?column? | ?column? | ?column? | ?column? | ?column? | ?column? | ?column? | ?column? | ?column? | ?column? | ?column? | ?column? | ?column? +----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+---------- + t | t | t | t | t | t | t | t | t | t | t | t | t | t | t | t +(1 row) + +-- Create test table to generate interconnect traffic +CREATE TABLE test_ic_data +AS SELECT generate_series(1, 1000) AS id +DISTRIBUTED RANDOMLY; +-- Re-capture current state: overwrite prev with latest values +SELECT * FROM gp_interconnect_stats \gset prev2_ +-- Check if current statistics are >= baseline values after first data insertion +SELECT + :prev2_total_recv_queue_size >= :prev_total_recv_queue_size, + :prev2_recv_queue_conting_time >= :prev_recv_queue_conting_time, + :prev2_total_capacity >= :prev_total_capacity, + :prev2_capacity_counting_time >= :prev_capacity_counting_time, + :prev2_total_buffers >= :prev_total_buffers, + :prev2_buffer_counting_time >= :prev_buffer_counting_time, + :prev2_retransmits >= :prev_retransmits, + :prev2_startup_cached_pkts >= :prev_startup_cached_pkts, + :prev2_mismatches >= :prev_mismatches, + :prev2_crs_errors >= :prev_crs_errors, + :prev2_snd_pkt_num >= :prev_snd_pkt_num, + :prev2_recv_pkt_num >= :prev_recv_pkt_num, + :prev2_disordered_pkt_num >= :prev_disordered_pkt_num, + :prev2_duplicate_pkt_num >= :prev_duplicate_pkt_num, + :prev2_recv_ack_num >= :prev_recv_ack_num, + :prev2_status_query_msg_num >= :prev_status_query_msg_num; + ?column? | ?column? | ?column? | ?column? | ?column? | ?column? | ?column? | ?column? | ?column? | ?column? | ?column? | ?column? | ?column? | ?column? | ?column? | ?column? +----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+---------- + t | t | t | t | t | t | t | t | t | t | t | t | t | t | t | t +(1 row) + +-- Insert additional data to further test interconnect statistics changes under load +INSERT INTO test_ic_data SELECT generate_series(1001, 2000); +-- Re‑check if current statistics remain >= baseline after second data insertion +SELECT + total_recv_queue_size >= :prev2_total_recv_queue_size, + recv_queue_conting_time >= :prev2_recv_queue_conting_time, + total_capacity >= :prev2_total_capacity, + capacity_counting_time >= :prev2_capacity_counting_time, + total_buffers >= :prev2_total_buffers, + buffer_counting_time >= :prev2_buffer_counting_time, + retransmits >= :prev2_retransmits, + startup_cached_pkts >= :prev2_startup_cached_pkts, + mismatches >= :prev2_mismatches, + crs_errors >= :prev2_crs_errors, + snd_pkt_num >= :prev2_snd_pkt_num, + recv_pkt_num >= :prev2_recv_pkt_num, + disordered_pkt_num >= :prev2_disordered_pkt_num, + duplicate_pkt_num >= :prev2_duplicate_pkt_num, + recv_ack_num >= :prev2_recv_ack_num, + status_query_msg_num >= :prev2_status_query_msg_num +FROM gp_interconnect_stats; + ?column? | ?column? | ?column? | ?column? | ?column? | ?column? | ?column? | ?column? | ?column? | ?column? | ?column? | ?column? | ?column? | ?column? | ?column? | ?column? +----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+----------+---------- + t | t | t | t | t | t | t | t | t | t | t | t | t | t | t | t +(1 row) + +DROP TABLE test_ic_data; +DROP EXTENSION interconnect; diff --git a/contrib/interconnect/ic_modules.c b/contrib/interconnect/ic_modules.c index b582e8bdbe0..85b7bac3ffe 100644 --- a/contrib/interconnect/ic_modules.c +++ b/contrib/interconnect/ic_modules.c @@ -16,6 +16,7 @@ #include "ic_common.h" #include "tcp/ic_tcp.h" #include "udp/ic_udpifc.h" +#include "storage/ipc.h" #ifdef ENABLE_IC_PROXY #include "proxy/ic_proxy_server.h" @@ -23,6 +24,8 @@ PG_MODULE_MAGIC; +shmem_startup_hook_type prev_shmem_startup_hook = NULL; + MotionIPCLayer tcp_ipc_layer = { .ic_type = INTERCONNECT_TYPE_TCP, .type_name = "tcp", @@ -141,6 +144,16 @@ MotionIPCLayer udpifc_ipc_layer = { .GetMotionSentRecordTypmod = GetMotionSentRecordTypmod, }; +static void +InterconnectShmemInit(void) +{ + if (prev_shmem_startup_hook) + prev_shmem_startup_hook(); + + if (Gp_interconnect_type == INTERCONNECT_TYPE_UDPIFC) + InterconnectShmemInitUDPIFC(); +} + void _PG_init(void) { @@ -153,4 +166,16 @@ _PG_init(void) RegisterIPCLayerImpl(&tcp_ipc_layer); RegisterIPCLayerImpl(&udpifc_ipc_layer); RegisterIPCLayerImpl(&proxy_ipc_layer); + + if (Gp_interconnect_type == INTERCONNECT_TYPE_UDPIFC) + RequestAddinShmemSpace(sizeof(ICStatisticsShmem)); + + prev_shmem_startup_hook = shmem_startup_hook; + shmem_startup_hook = InterconnectShmemInit; +} + +void +_PG_fini(void) +{ + shmem_startup_hook = prev_shmem_startup_hook; } diff --git a/contrib/interconnect/ic_modules.h b/contrib/interconnect/ic_modules.h index a381d279fdd..e73b1998f3a 100644 --- a/contrib/interconnect/ic_modules.h +++ b/contrib/interconnect/ic_modules.h @@ -18,5 +18,6 @@ extern MotionIPCLayer proxy_ipc_layer; extern MotionIPCLayer udpifc_ipc_layer; extern void _PG_init(void); +extern void _PG_fini(void); #endif // INTER_CONNECT_H diff --git a/contrib/interconnect/interconnect--1.0.sql b/contrib/interconnect/interconnect--1.0.sql new file mode 100644 index 00000000000..f4aa635583e --- /dev/null +++ b/contrib/interconnect/interconnect--1.0.sql @@ -0,0 +1,110 @@ +/* contrib/interconnect/interconnect--1.0.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION interconnect" to load this file. \quit + +CREATE FUNCTION __gp_interconnect_get_stats_f_on_master( + OUT gp_segment_id smallint, + OUT total_recv_queue_size bigint, + OUT recv_queue_conting_time bigint, + OUT total_capacity bigint, + OUT capacity_counting_time bigint, + OUT total_buffers bigint, + OUT buffer_counting_time bigint, + OUT retransmits bigint, + OUT startup_cached_pkts bigint, + OUT mismatches bigint, + OUT crs_errors bigint, + OUT snd_pkt_num bigint, + OUT recv_pkt_num bigint, + OUT disordered_pkt_num bigint, + OUT duplicate_pkt_num bigint, + OUT recv_ack_num bigint, + OUT status_query_msg_num bigint +) +RETURNS SETOF record +LANGUAGE C VOLATILE EXECUTE ON MASTER +AS '$libdir/interconnect', 'gp_interconnect_get_stats'; + +CREATE FUNCTION __gp_interconnect_get_stats_f_on_segments( + OUT gp_segment_id smallint, + OUT total_recv_queue_size bigint, + OUT recv_queue_conting_time bigint, + OUT total_capacity bigint, + OUT capacity_counting_time bigint, + OUT total_buffers bigint, + OUT buffer_counting_time bigint, + OUT retransmits bigint, + OUT startup_cached_pkts bigint, + OUT mismatches bigint, + OUT crs_errors bigint, + OUT snd_pkt_num bigint, + OUT recv_pkt_num bigint, + OUT disordered_pkt_num bigint, + OUT duplicate_pkt_num bigint, + OUT recv_ack_num bigint, + OUT status_query_msg_num bigint +) +RETURNS SETOF record LANGUAGE C VOLATILE EXECUTE ON ALL SEGMENTS +AS '$libdir/interconnect', 'gp_interconnect_get_stats'; + + +-- Cummulative interconnect statistics per segment +CREATE VIEW gp_interconnect_stats_per_segment AS + SELECT c.hostname, s.* FROM ( + SELECT * FROM __gp_interconnect_get_stats_f_on_master() + UNION ALL + SELECT * FROM __gp_interconnect_get_stats_f_on_segments() + ) s + JOIN pg_catalog.gp_segment_configuration AS c + ON s.gp_segment_id = c.content AND c.role = 'p'; + +GRANT SELECT ON gp_interconnect_stats_per_segment TO public; + +-- Cummulative interconnect statistics +CREATE VIEW gp_interconnect_stats AS + SELECT + sum(total_recv_queue_size) as total_recv_queue_size + , sum(recv_queue_conting_time) as recv_queue_conting_time + , sum(total_capacity) as total_capacity + , sum(capacity_counting_time) as capacity_counting_time + , sum(total_buffers) as total_buffers + , sum(buffer_counting_time) as buffer_counting_time + , sum(retransmits) as retransmits + , sum(startup_cached_pkts) as startup_cached_pkts + , sum(mismatches) as mismatches + , sum(crs_errors) as crs_errors + , sum(snd_pkt_num) as snd_pkt_num + , sum(recv_pkt_num) as recv_pkt_num + , sum(disordered_pkt_num) as disordered_pkt_num + , sum(duplicate_pkt_num) as duplicate_pkt_num + , sum(recv_ack_num) as recv_ack_num + , sum(status_query_msg_num) as status_query_msg_num + FROM gp_interconnect_stats_per_segment; + +GRANT SELECT ON gp_interconnect_stats TO public; + +-- Cummulative interconnect statistics grouped by host +CREATE VIEW gp_interconnect_stats_per_host AS + SELECT + hostname + , sum(total_recv_queue_size) as total_recv_queue_size + , sum(recv_queue_conting_time) as recv_queue_conting_time + , sum(total_capacity) as total_capacity + , sum(capacity_counting_time) as capacity_counting_time + , sum(total_buffers) as total_buffers + , sum(buffer_counting_time) as buffer_counting_time + , sum(retransmits) as retransmits + , sum(startup_cached_pkts) as startup_cached_pkts + , sum(mismatches) as mismatches + , sum(crs_errors) as crs_errors + , sum(snd_pkt_num) as snd_pkt_num + , sum(recv_pkt_num) as recv_pkt_num + , sum(disordered_pkt_num) as disordered_pkt_num + , sum(duplicate_pkt_num) as duplicate_pkt_num + , sum(recv_ack_num) as recv_ack_num + , sum(status_query_msg_num) as status_query_msg_num + FROM gp_interconnect_stats_per_segment + GROUP BY hostname; + +GRANT SELECT ON gp_interconnect_stats_per_host TO public; diff --git a/contrib/interconnect/interconnect.control b/contrib/interconnect/interconnect.control new file mode 100644 index 00000000000..cfa3c890677 --- /dev/null +++ b/contrib/interconnect/interconnect.control @@ -0,0 +1,3 @@ +comment = 'Cummulative statistics from UDPIFC interconnect protocol' +default_version = '1.0' +relocatable = false diff --git a/contrib/interconnect/sql/interconnect.sql b/contrib/interconnect/sql/interconnect.sql new file mode 100644 index 00000000000..4e6555b6b82 --- /dev/null +++ b/contrib/interconnect/sql/interconnect.sql @@ -0,0 +1,87 @@ +-- start_ignore +\! gpconfig -c shared_preload_libraries -v "interconnect" +\! gpstop -raiq +\c +DROP TABLE IF EXISTS test_ic_data; +CREATE EXTENSION IF NOT EXISTS interconnect; +-- end_ignore + +-- Capture current interconnect stats as baseline for future comparisons +SELECT * FROM gp_interconnect_stats \gset prev_ + +-- Verify that all baseline interconnect statistics are >= 0 (no negative values) +SELECT + :prev_total_recv_queue_size >= 0, + :prev_recv_queue_conting_time >= 0, + :prev_total_capacity >= 0, + :prev_capacity_counting_time >= 0, + :prev_total_buffers >= 0, + :prev_buffer_counting_time >= 0, + :prev_retransmits >= 0, + :prev_startup_cached_pkts >= 0, + :prev_mismatches >= 0, + :prev_crs_errors >= 0, + :prev_snd_pkt_num >= 0, + :prev_recv_pkt_num >= 0, + :prev_disordered_pkt_num >= 0, + :prev_duplicate_pkt_num >= 0, + :prev_recv_ack_num >= 0, + :prev_status_query_msg_num >= 0; + +-- Create test table to generate interconnect traffic +CREATE TABLE test_ic_data +AS SELECT generate_series(1, 1000) AS id +DISTRIBUTED RANDOMLY; + +-- Re-capture current state: overwrite prev with latest values +SELECT * FROM gp_interconnect_stats \gset prev2_ + +-- Check if current statistics are >= baseline values after first data insertion +SELECT + :prev2_total_recv_queue_size >= :prev_total_recv_queue_size, + :prev2_recv_queue_conting_time >= :prev_recv_queue_conting_time, + :prev2_total_capacity >= :prev_total_capacity, + :prev2_capacity_counting_time >= :prev_capacity_counting_time, + :prev2_total_buffers >= :prev_total_buffers, + :prev2_buffer_counting_time >= :prev_buffer_counting_time, + :prev2_retransmits >= :prev_retransmits, + :prev2_startup_cached_pkts >= :prev_startup_cached_pkts, + :prev2_mismatches >= :prev_mismatches, + :prev2_crs_errors >= :prev_crs_errors, + :prev2_snd_pkt_num >= :prev_snd_pkt_num, + :prev2_recv_pkt_num >= :prev_recv_pkt_num, + :prev2_disordered_pkt_num >= :prev_disordered_pkt_num, + :prev2_duplicate_pkt_num >= :prev_duplicate_pkt_num, + :prev2_recv_ack_num >= :prev_recv_ack_num, + :prev2_status_query_msg_num >= :prev_status_query_msg_num; + +-- Insert additional data to further test interconnect statistics changes under load +INSERT INTO test_ic_data SELECT generate_series(1001, 2000); + +-- Re‑check if current statistics remain >= baseline after second data insertion +SELECT + total_recv_queue_size >= :prev2_total_recv_queue_size, + recv_queue_conting_time >= :prev2_recv_queue_conting_time, + total_capacity >= :prev2_total_capacity, + capacity_counting_time >= :prev2_capacity_counting_time, + total_buffers >= :prev2_total_buffers, + buffer_counting_time >= :prev2_buffer_counting_time, + retransmits >= :prev2_retransmits, + startup_cached_pkts >= :prev2_startup_cached_pkts, + mismatches >= :prev2_mismatches, + crs_errors >= :prev2_crs_errors, + snd_pkt_num >= :prev2_snd_pkt_num, + recv_pkt_num >= :prev2_recv_pkt_num, + disordered_pkt_num >= :prev2_disordered_pkt_num, + duplicate_pkt_num >= :prev2_duplicate_pkt_num, + recv_ack_num >= :prev2_recv_ack_num, + status_query_msg_num >= :prev2_status_query_msg_num +FROM gp_interconnect_stats; + +DROP TABLE test_ic_data; +DROP EXTENSION interconnect; + +-- start_ignore +\! gpconfig -r shared_preload_libraries +\! gpstop -raiq +-- end_ignore diff --git a/contrib/interconnect/udp/ic_udpifc.c b/contrib/interconnect/udp/ic_udpifc.c index d11e4577cd6..23de982d89b 100644 --- a/contrib/interconnect/udp/ic_udpifc.c +++ b/contrib/interconnect/udp/ic_udpifc.c @@ -41,6 +41,7 @@ #include "access/transam.h" #include "access/xact.h" #include "common/ip.h" +#include "funcapi.h" #include "nodes/execnodes.h" #include "nodes/pg_list.h" #include "nodes/print.h" @@ -51,6 +52,8 @@ #include "pgstat.h" #include "postmaster/postmaster.h" #include "storage/latch.h" +#include "storage/lock.h" +#include "storage/pg_shmem.h" #include "storage/pmsignal.h" #include "utils/builtins.h" #include "utils/guc.h" @@ -714,6 +717,8 @@ typedef struct ICStatistics /* Statistics for UDP interconnect. */ static ICStatistics ic_statistics; +static ICStatisticsShmem *pICStatisticsShmem = NULL; + /* UDP listen fd */ int UDP_listenerFd; @@ -1814,6 +1819,27 @@ ic_reset_pthread_sigmasks(sigset_t *sigs) return; } +void +InterconnectShmemInitUDPIFC(void) +{ + bool found; + pICStatisticsShmem = ShmemInitStruct("global interconnect statistics", + sizeof(ICStatisticsShmem), &found); + if (pICStatisticsShmem == NULL) + { + ereport(FATAL, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("not enough shared memory for global interconnect statistics"))); + } + + if (!found) + memset(pICStatisticsShmem, 0, sizeof(*pICStatisticsShmem)); + + int tranche_id = LWLockNewTrancheId(); + LWLockRegisterTranche(tranche_id, "IC Statistics"); + LWLockInitialize(&pICStatisticsShmem->lock, tranche_id); +} + /* * InitMotionUDPIFC * Initialize UDP specific comms, and create rx-thread. @@ -3901,6 +3927,30 @@ chunkTransportStateEntryInitialized(ChunkTransportState *transportStates, return pEntry->valid; } +/* Append local interconnect stats to global cummulative stats. */ +static void +updateGlobalInterconnectStats(void) +{ + LWLockAcquire(&pICStatisticsShmem->lock, LW_EXCLUSIVE); + pICStatisticsShmem->totalRecvQueueSize += ic_statistics.totalRecvQueueSize; + pICStatisticsShmem->recvQueueSizeCountingTime += ic_statistics.recvQueueSizeCountingTime; + pICStatisticsShmem->totalCapacity += ic_statistics.totalCapacity; + pICStatisticsShmem->capacityCountingTime += ic_statistics.capacityCountingTime; + pICStatisticsShmem->totalBuffers += ic_statistics.totalBuffers; + pICStatisticsShmem->bufferCountingTime += ic_statistics.bufferCountingTime; + pICStatisticsShmem->retransmits += ic_statistics.retransmits; + pICStatisticsShmem->startupCachedPktNum += ic_statistics.startupCachedPktNum; + pICStatisticsShmem->mismatchNum += ic_statistics.mismatchNum; + pICStatisticsShmem->crcErrors += ic_statistics.crcErrors; + pICStatisticsShmem->sndPktNum += ic_statistics.sndPktNum; + pICStatisticsShmem->recvPktNum += ic_statistics.recvPktNum; + pICStatisticsShmem->disorderedPktNum += ic_statistics.disorderedPktNum; + pICStatisticsShmem->duplicatedPktNum += ic_statistics.duplicatedPktNum; + pICStatisticsShmem->recvAckNum += ic_statistics.recvAckNum; + pICStatisticsShmem->statusQueryMsgNum += ic_statistics.statusQueryMsgNum; + LWLockRelease(&pICStatisticsShmem->lock); +} + /* * computeNetworkStatistics * Compute the max/min/avg network statistics. @@ -4207,6 +4257,7 @@ TeardownUDPIFCInterconnect_Internal(ChunkTransportState *transportStates, (minRtt == ~((uint64) 0) ? 0 : minRtt), (minDev == ~((uint64) 0) ? 0 : minDev), avgRtt, avgDev, maxRtt, maxDev, snd_control_info.cwnd, ic_statistics.statusQueryMsgNum); + updateGlobalInterconnectStats(); ic_control_info.isSender = false; memset(&ic_statistics, 0, sizeof(ICStatistics)); @@ -8224,3 +8275,72 @@ MlPutRxBufferIFC(ChunkTransportState *transportStates, int motNodeID, int route) if (param.msg.len != 0) sendAckWithParam(¶m); } + +PG_FUNCTION_INFO_V1(gp_interconnect_get_stats); + +Datum +gp_interconnect_get_stats(PG_FUNCTION_ARGS) +{ + if (Gp_interconnect_type != INTERCONNECT_TYPE_UDPIFC) + { + ereport(WARNING, + (errcode(ERRCODE_WARNING_GP_INTERCONNECTION), + errmsg("Interconnect statistics are collected only for UDPIFC protocol"))); + PG_RETURN_NULL(); + } + + /* + * Build a tuple descriptor for our result type + * The number and type of attributes have to match the definition of the + * view gp_interconnect_stats_per_segment + */ + enum {NUM_IC_STATS_ELEM = 17}; + TupleDesc tupdesc = CreateTemplateTupleDesc(NUM_IC_STATS_ELEM); + + TupleDescInitEntry(tupdesc, 1, "segid", INT2OID, -1, 0); + TupleDescInitEntry(tupdesc, 2, "total_recv_queue_size", INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, 3, "recv_queue_conting_time", INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, 4, "total_capacity", INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, 5, "capacity_counting_time", INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, 6, "total_buffers", INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, 7, "buffer_counting_time", INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, 8, "retransmits", INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, 9, "startup_cached_pkts", INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, 10, "mismatches", INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, 11, "crs_errors", INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, 12, "snd_pkt_num", INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, 13, "recv_pkt_num", INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, 14, "disordered_pkt_num", INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, 15, "duplicate_pkt_num", INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, 16, "recv_ack_num", INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, 17, "status_query_msg_num", INT8OID, -1, 0); + tupdesc = BlessTupleDesc(tupdesc); + + Datum values[NUM_IC_STATS_ELEM]; + bool nulls[NUM_IC_STATS_ELEM] = {0}; + + LWLockAcquire(&pICStatisticsShmem->lock, LW_SHARED); + values[0] = Int32GetDatum(GpIdentity.segindex); + values[1] = Int64GetDatum(pICStatisticsShmem->totalRecvQueueSize); + values[2] = Int64GetDatum(pICStatisticsShmem->recvQueueSizeCountingTime); + values[3] = Int64GetDatum(pICStatisticsShmem->totalCapacity); + values[4] = Int64GetDatum(pICStatisticsShmem->capacityCountingTime); + values[5] = Int64GetDatum(pICStatisticsShmem->totalBuffers); + values[6] = Int64GetDatum(pICStatisticsShmem->bufferCountingTime); + values[7] = Int64GetDatum(pICStatisticsShmem->retransmits); + values[8] = Int64GetDatum(pICStatisticsShmem->startupCachedPktNum); + values[9] = Int64GetDatum(pICStatisticsShmem->mismatchNum); + values[10] = Int64GetDatum(pICStatisticsShmem->crcErrors); + values[11] = Int64GetDatum(pICStatisticsShmem->sndPktNum); + values[12] = Int64GetDatum(pICStatisticsShmem->recvPktNum); + values[13] = Int64GetDatum(pICStatisticsShmem->disorderedPktNum); + values[14] = Int64GetDatum(pICStatisticsShmem->duplicatedPktNum); + values[15] = Int64GetDatum(pICStatisticsShmem->recvAckNum); + values[16] = Int64GetDatum(pICStatisticsShmem->statusQueryMsgNum); + LWLockRelease(&pICStatisticsShmem->lock); + + HeapTuple tuple = heap_form_tuple(tupdesc, values, nulls); + Datum result = HeapTupleGetDatum(tuple); + + PG_RETURN_DATUM(result); +} diff --git a/contrib/interconnect/udp/ic_udpifc.h b/contrib/interconnect/udp/ic_udpifc.h index af3ca72ba3b..346f6ea85cb 100644 --- a/contrib/interconnect/udp/ic_udpifc.h +++ b/contrib/interconnect/udp/ic_udpifc.h @@ -17,6 +17,7 @@ #include "nodes/execnodes.h" /* ExecSlice, SliceTable */ #include "miscadmin.h" #include "libpq/libpq-be.h" +#include "storage/lwlock.h" #include "utils/builtins.h" #include "utils/memutils.h" @@ -212,4 +213,31 @@ extern void dumpICBufferList(ICBufferList * list, const char *fname); extern void dumpUnackQueueRing(const char *fname); extern void dumpConnections(ChunkTransportStateEntry * pEntry, const char *fname); +/* + * Keeps various statistics about interconnect internal. + * Also those numbers are expected to grow big, hence uint64. + */ +typedef struct ICStatisticsShmem +{ + LWLock lock; /* mutex for synchronizing access to statistics data */ + uint64 totalRecvQueueSize; /* receive queue size sum when main thread is trying to get a packet */ + uint64 recvQueueSizeCountingTime; /* counting times when computing totalRecvQueueSize */ + uint64 totalCapacity; /* the capacity sum when packets are tried to be sent */ + uint64 capacityCountingTime; /* counting times used to compute totalCapacity */ + uint64 totalBuffers; /* total buffers available when sending packets */ + uint64 bufferCountingTime; /* counting times when compute totalBuffers */ + uint64 retransmits; /* the number of packet retransmits */ + uint64 startupCachedPktNum; /* number of packets cached during connection startup */ + uint64 mismatchNum; /* the number of mismatched packets received */ + uint64 crcErrors; /* the number of crc errors */ + uint64 sndPktNum; /* the number of packets sent by sender */ + uint64 recvPktNum; /* the number of packets received by receiver */ + uint64 disorderedPktNum; /* disordered packet number */ + uint64 duplicatedPktNum; /* duplicate packet number */ + uint64 recvAckNum; /* the number of Acks received */ + uint64 statusQueryMsgNum; /* the number of status query messages sent */ +} ICStatisticsShmem; + +void InterconnectShmemInitUDPIFC(void); + #endif // IC_UDP_INTERFACE_H diff --git a/contrib/ltree/lquery_op.c b/contrib/ltree/lquery_op.c index a6466f575fd..bbc1f9f5c72 100644 --- a/contrib/ltree/lquery_op.c +++ b/contrib/ltree/lquery_op.c @@ -27,14 +27,14 @@ getlexeme(char *start, char *end, int *len) char *ptr; while (start < end && t_iseq(start, '_')) - start += pg_mblen(start); + start += pg_mblen_range(start, end); ptr = start; if (ptr >= end) return NULL; while (ptr < end && !t_iseq(ptr, '_')) - ptr += pg_mblen(ptr); + ptr += pg_mblen_range(ptr, end); *len = ptr - start; return start; diff --git a/contrib/ltree/ltree.h b/contrib/ltree/ltree.h index 5e0761641d3..cf24add69f7 100644 --- a/contrib/ltree/ltree.h +++ b/contrib/ltree/ltree.h @@ -127,7 +127,7 @@ typedef struct #define LQUERY_HASNOT 0x01 /* valid label chars are alphanumerics, underscores and hyphens */ -#define ISLABEL(x) ( t_isalnum(x) || t_iseq(x, '_') || t_iseq(x, '-') ) +#define ISLABEL(x) ( t_isalnum_cstr(x) || t_iseq(x, '_') || t_iseq(x, '-') ) /* full text query */ diff --git a/contrib/ltree/ltree_io.c b/contrib/ltree/ltree_io.c index 3a0a4266870..24d2bf67def 100644 --- a/contrib/ltree/ltree_io.c +++ b/contrib/ltree/ltree_io.c @@ -56,7 +56,7 @@ parse_ltree(const char *buf, struct Node *escontext) ptr = buf; while (*ptr) { - charlen = pg_mblen(ptr); + charlen = pg_mblen_cstr(ptr); if (t_iseq(ptr, '.')) num++; ptr += charlen; @@ -71,7 +71,7 @@ parse_ltree(const char *buf, struct Node *escontext) ptr = buf; while (*ptr) { - charlen = pg_mblen(ptr); + charlen = pg_mblen_cstr(ptr); switch (state) { @@ -293,7 +293,7 @@ parse_lquery(const char *buf, struct Node *escontext) ptr = buf; while (*ptr) { - charlen = pg_mblen(ptr); + charlen = pg_mblen_cstr(ptr); if (t_iseq(ptr, '.')) num++; @@ -313,7 +313,7 @@ parse_lquery(const char *buf, struct Node *escontext) ptr = buf; while (*ptr) { - charlen = pg_mblen(ptr); + charlen = pg_mblen_cstr(ptr); switch (state) { @@ -418,7 +418,7 @@ parse_lquery(const char *buf, struct Node *escontext) case LQPRS_WAITFNUM: if (t_iseq(ptr, ',')) state = LQPRS_WAITSNUM; - else if (t_isdigit(ptr)) + else if (t_isdigit_cstr(ptr)) { int low = atoi(ptr); @@ -436,7 +436,7 @@ parse_lquery(const char *buf, struct Node *escontext) UNCHAR; break; case LQPRS_WAITSNUM: - if (t_isdigit(ptr)) + if (t_isdigit_cstr(ptr)) { int high = atoi(ptr); @@ -467,7 +467,7 @@ parse_lquery(const char *buf, struct Node *escontext) case LQPRS_WAITCLOSE: if (t_iseq(ptr, '}')) state = LQPRS_WAITEND; - else if (!t_isdigit(ptr)) + else if (!t_isdigit_cstr(ptr)) UNCHAR; break; case LQPRS_WAITND: @@ -478,7 +478,7 @@ parse_lquery(const char *buf, struct Node *escontext) } else if (t_iseq(ptr, ',')) state = LQPRS_WAITSNUM; - else if (!t_isdigit(ptr)) + else if (!t_isdigit_cstr(ptr)) UNCHAR; break; case LQPRS_WAITEND: diff --git a/contrib/ltree/ltxtquery_io.c b/contrib/ltree/ltxtquery_io.c index 2c27ebd180f..0f2954f31ba 100644 --- a/contrib/ltree/ltxtquery_io.c +++ b/contrib/ltree/ltxtquery_io.c @@ -64,7 +64,7 @@ gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint for (;;) { - charlen = pg_mblen(state->buf); + charlen = pg_mblen_cstr(state->buf); switch (state->state) { @@ -88,7 +88,7 @@ gettoken_query(QPRS_STATE *state, int32 *val, int32 *lenval, char **strval, uint *lenval = charlen; *flag = 0; } - else if (!t_isspace(state->buf)) + else if (!t_isspace_cstr(state->buf)) ereturn(state->escontext, ERR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("operand syntax error"))); diff --git a/contrib/pageinspect/heapfuncs.c b/contrib/pageinspect/heapfuncs.c index e8f43b8bdbf..803472e0928 100644 --- a/contrib/pageinspect/heapfuncs.c +++ b/contrib/pageinspect/heapfuncs.c @@ -101,7 +101,7 @@ text_to_bits(char *str, int len) ereport(ERROR, (errcode(ERRCODE_DATA_CORRUPTED), errmsg("invalid character \"%.*s\" in t_bits string", - pg_mblen(str + off), str + off))); + pg_mblen_cstr(str + off), str + off))); if (off % 8 == 7) bits[off / 8] = byte; diff --git a/contrib/pg_trgm/Makefile b/contrib/pg_trgm/Makefile index f8ecb34a2d2..faae60f8869 100644 --- a/contrib/pg_trgm/Makefile +++ b/contrib/pg_trgm/Makefile @@ -14,7 +14,7 @@ DATA = pg_trgm--1.5--1.6.sql pg_trgm--1.4--1.5.sql pg_trgm--1.3--1.4.sql \ pg_trgm--1.0--1.1.sql PGFILEDESC = "pg_trgm - trigram matching" -REGRESS = pg_trgm pg_word_trgm pg_strict_word_trgm +REGRESS = pg_trgm pg_utf8_trgm pg_word_trgm pg_strict_word_trgm REGRESS_OPTS += --init-file=$(top_srcdir)/src/test/regress/init_file ifdef USE_PGXS diff --git a/contrib/pg_trgm/data/trgm_utf8.data b/contrib/pg_trgm/data/trgm_utf8.data new file mode 100644 index 00000000000..713856e76a6 --- /dev/null +++ b/contrib/pg_trgm/data/trgm_utf8.data @@ -0,0 +1,50 @@ +Mathematics +数学 +गणित +Matemáticas +رياضيات +Mathématiques +গণিত +Matemática +Математика +ریاضی +Matematika +Mathematik +数学 +Mathematics +गणित +గణితం +Matematik +கணிதம் +數學 +Toán học +Matematika +数学 +수학 +ریاضی +Lissafi +Hisabati +Matematika +Matematica +ریاضی +ಗಣಿತ +ગણિત +คณิตศาสตร์ +ሂሳብ +गणित +ਗਣਿਤ +數學 +数学 +Iṣiro +數學 +သင်္ချာ +Herrega +رياضي +गणित +Математика +Matematyka +ഗണിതം +Matematika +رياضي +Matematika +Matematică diff --git a/contrib/pg_trgm/expected/pg_utf8_trgm.out b/contrib/pg_trgm/expected/pg_utf8_trgm.out new file mode 100644 index 00000000000..0768e7d6a83 --- /dev/null +++ b/contrib/pg_trgm/expected/pg_utf8_trgm.out @@ -0,0 +1,8 @@ +SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset +\if :skip_test +\quit +\endif +-- Index 50 translations of the word "Mathematics" +CREATE TEMP TABLE mb (s text); +\copy mb from 'data/trgm_utf8.data' +CREATE INDEX ON mb USING gist(s gist_trgm_ops); diff --git a/contrib/pg_trgm/expected/pg_utf8_trgm_1.out b/contrib/pg_trgm/expected/pg_utf8_trgm_1.out new file mode 100644 index 00000000000..8505c4fa552 --- /dev/null +++ b/contrib/pg_trgm/expected/pg_utf8_trgm_1.out @@ -0,0 +1,3 @@ +SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset +\if :skip_test +\quit diff --git a/contrib/pg_trgm/meson.build b/contrib/pg_trgm/meson.build index 093ac18400c..bd3a34f2557 100644 --- a/contrib/pg_trgm/meson.build +++ b/contrib/pg_trgm/meson.build @@ -39,6 +39,7 @@ tests += { 'regress': { 'sql': [ 'pg_trgm', + 'pg_utf8_trgm', 'pg_word_trgm', 'pg_strict_word_trgm', ], diff --git a/contrib/pg_trgm/sql/pg_utf8_trgm.sql b/contrib/pg_trgm/sql/pg_utf8_trgm.sql new file mode 100644 index 00000000000..0dd962ced83 --- /dev/null +++ b/contrib/pg_trgm/sql/pg_utf8_trgm.sql @@ -0,0 +1,9 @@ +SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset +\if :skip_test +\quit +\endif + +-- Index 50 translations of the word "Mathematics" +CREATE TEMP TABLE mb (s text); +\copy mb from 'data/trgm_utf8.data' +CREATE INDEX ON mb USING gist(s gist_trgm_ops); diff --git a/contrib/pg_trgm/trgm.h b/contrib/pg_trgm/trgm.h index afb0adb222b..1bd41fa1901 100644 --- a/contrib/pg_trgm/trgm.h +++ b/contrib/pg_trgm/trgm.h @@ -52,10 +52,10 @@ typedef char trgm[3]; } while(0) #ifdef KEEPONLYALNUM -#define ISWORDCHR(c) (t_isalnum(c)) +#define ISWORDCHR(c, len) (t_isalnum_with_len(c, len)) #define ISPRINTABLECHAR(a) ( isascii( *(unsigned char*)(a) ) && (isalnum( *(unsigned char*)(a) ) || *(unsigned char*)(a)==' ') ) #else -#define ISWORDCHR(c) (!t_isspace(c)) +#define ISWORDCHR(c, len) (!t_isspace_with_len(c, len)) #define ISPRINTABLECHAR(a) ( isascii( *(unsigned char*)(a) ) && isprint( *(unsigned char*)(a) ) ) #endif #define ISPRINTABLETRGM(t) ( ISPRINTABLECHAR( ((char*)(t)) ) && ISPRINTABLECHAR( ((char*)(t))+1 ) && ISPRINTABLECHAR( ((char*)(t))+2 ) ) diff --git a/contrib/pg_trgm/trgm_op.c b/contrib/pg_trgm/trgm_op.c index 49d4497b4f3..32c390257b3 100644 --- a/contrib/pg_trgm/trgm_op.c +++ b/contrib/pg_trgm/trgm_op.c @@ -173,18 +173,29 @@ static char * find_word(char *str, int lenstr, char **endword, int *charlen) { char *beginword = str; + const char *endstr = str + lenstr; - while (beginword - str < lenstr && !ISWORDCHR(beginword)) - beginword += pg_mblen(beginword); + while (beginword < endstr) + { + int clen = pg_mblen_range(beginword, endstr); - if (beginword - str >= lenstr) + if (ISWORDCHR(beginword, clen)) + break; + beginword += clen; + } + + if (beginword >= endstr) return NULL; *endword = beginword; *charlen = 0; - while (*endword - str < lenstr && ISWORDCHR(*endword)) + while (*endword < endstr) { - *endword += pg_mblen(*endword); + int clen = pg_mblen_range(*endword, endstr); + + if (!ISWORDCHR(*endword, clen)) + break; + *endword += clen; (*charlen)++; } @@ -232,9 +243,9 @@ make_trigrams(trgm *tptr, char *str, int bytelen, int charlen) if (bytelen > charlen) { /* Find multibyte character boundaries and apply compact_trigram */ - int lenfirst = pg_mblen(str), - lenmiddle = pg_mblen(str + lenfirst), - lenlast = pg_mblen(str + lenfirst + lenmiddle); + int lenfirst = pg_mblen_unbounded(str), + lenmiddle = pg_mblen_unbounded(str + lenfirst), + lenlast = pg_mblen_unbounded(str + lenfirst + lenmiddle); while ((ptr - str) + lenfirst + lenmiddle + lenlast <= bytelen) { @@ -245,7 +256,7 @@ make_trigrams(trgm *tptr, char *str, int bytelen, int charlen) lenfirst = lenmiddle; lenmiddle = lenlast; - lenlast = pg_mblen(ptr + lenfirst + lenmiddle); + lenlast = pg_mblen_unbounded(ptr + lenfirst + lenmiddle); } } else @@ -730,6 +741,7 @@ get_wildcard_part(const char *str, int lenstr, { const char *beginword = str; const char *endword; + const char *endstr = str + lenstr; char *s = buf; bool in_leading_wildcard_meta = false; bool in_trailing_wildcard_meta = false; @@ -742,11 +754,13 @@ get_wildcard_part(const char *str, int lenstr, * from this loop to the next one, since we may exit at a word character * that is in_escape. */ - while (beginword - str < lenstr) + while (beginword < endstr) { + clen = pg_mblen_range(beginword, endstr); + if (in_escape) { - if (ISWORDCHR(beginword)) + if (ISWORDCHR(beginword, clen)) break; in_escape = false; in_leading_wildcard_meta = false; @@ -757,12 +771,12 @@ get_wildcard_part(const char *str, int lenstr, in_escape = true; else if (ISWILDCARDCHAR(beginword)) in_leading_wildcard_meta = true; - else if (ISWORDCHR(beginword)) + else if (ISWORDCHR(beginword, clen)) break; else in_leading_wildcard_meta = false; } - beginword += pg_mblen(beginword); + beginword += clen; } /* @@ -795,12 +809,12 @@ get_wildcard_part(const char *str, int lenstr, * string boundary. Strip escapes during copy. */ endword = beginword; - while (endword - str < lenstr) + while (endword < endstr) { - clen = pg_mblen(endword); + clen = pg_mblen_range(endword, endstr); if (in_escape) { - if (ISWORDCHR(endword)) + if (ISWORDCHR(endword, clen)) { memcpy(s, endword, clen); (*charlen)++; @@ -828,7 +842,7 @@ get_wildcard_part(const char *str, int lenstr, in_trailing_wildcard_meta = true; break; } - else if (ISWORDCHR(endword)) + else if (ISWORDCHR(endword, clen)) { memcpy(s, endword, clen); (*charlen)++; diff --git a/contrib/pg_trgm/trgm_regexp.c b/contrib/pg_trgm/trgm_regexp.c index 1d369460671..6d797c72203 100644 --- a/contrib/pg_trgm/trgm_regexp.c +++ b/contrib/pg_trgm/trgm_regexp.c @@ -481,7 +481,7 @@ static TRGM *createTrgmNFAInternal(regex_t *regex, TrgmPackedGraph **graph, static void RE_compile(regex_t *regex, text *text_re, int cflags, Oid collation); static void getColorInfo(regex_t *regex, TrgmNFA *trgmNFA); -static bool convertPgWchar(pg_wchar c, trgm_mb_char *result); +static int convertPgWchar(pg_wchar c, trgm_mb_char *result); static void transformGraph(TrgmNFA *trgmNFA); static void processState(TrgmNFA *trgmNFA, TrgmState *state); static void addKey(TrgmNFA *trgmNFA, TrgmState *state, TrgmStateKey *key); @@ -806,10 +806,11 @@ getColorInfo(regex_t *regex, TrgmNFA *trgmNFA) for (j = 0; j < charsCount; j++) { trgm_mb_char c; + int clen = convertPgWchar(chars[j], &c); - if (!convertPgWchar(chars[j], &c)) + if (!clen) continue; /* ok to ignore it altogether */ - if (ISWORDCHR(c.bytes)) + if (ISWORDCHR(c.bytes, clen)) colorInfo->wordChars[colorInfo->wordCharsCount++] = c; else colorInfo->containsNonWord = true; @@ -821,13 +822,15 @@ getColorInfo(regex_t *regex, TrgmNFA *trgmNFA) /* * Convert pg_wchar to multibyte format. - * Returns false if the character should be ignored completely. + * Returns 0 if the character should be ignored completely, else returns its + * byte length. */ -static bool +static int convertPgWchar(pg_wchar c, trgm_mb_char *result) { /* "s" has enough space for a multibyte character and a trailing NUL */ char s[MAX_MULTIBYTE_CHAR_LEN + 1]; + int clen; /* * We can ignore the NUL character, since it can never appear in a PG text @@ -835,11 +838,11 @@ convertPgWchar(pg_wchar c, trgm_mb_char *result) * reconstructing trigrams. */ if (c == 0) - return false; + return 0; /* Do the conversion, making sure the result is NUL-terminated */ memset(s, 0, sizeof(s)); - pg_wchar2mb_with_len(&c, s, 1); + clen = pg_wchar2mb_with_len(&c, s, 1); /* * In IGNORECASE mode, we can ignore uppercase characters. We assume that @@ -861,7 +864,7 @@ convertPgWchar(pg_wchar c, trgm_mb_char *result) if (strcmp(lowerCased, s) != 0) { pfree(lowerCased); - return false; + return 0; } pfree(lowerCased); } @@ -869,7 +872,7 @@ convertPgWchar(pg_wchar c, trgm_mb_char *result) /* Fill result with exactly MAX_MULTIBYTE_CHAR_LEN bytes */ memcpy(result->bytes, s, MAX_MULTIBYTE_CHAR_LEN); - return true; + return clen; } diff --git a/contrib/pgcrypto/Makefile b/contrib/pgcrypto/Makefile index 647952af597..8b6dabc9fe0 100644 --- a/contrib/pgcrypto/Makefile +++ b/contrib/pgcrypto/Makefile @@ -45,7 +45,8 @@ REGRESS = init md5 sha1 hmac-md5 hmac-sha1 blowfish rijndael sm4\ sha2 des 3des cast5 \ crypt-des crypt-md5 crypt-blowfish crypt-xdes \ pgp-armor pgp-decrypt pgp-encrypt $(CF_PGP_TESTS) \ - pgp-pubkey-decrypt pgp-pubkey-encrypt pgp-info \ + pgp-pubkey-decrypt pgp-pubkey-encrypt pgp-pubkey-session \ + pgp-info \ setup_fips EXTRA_CLEAN = gen-rtab diff --git a/contrib/pgcrypto/expected/pgp-decrypt.out b/contrib/pgcrypto/expected/pgp-decrypt.out index eb049ba9d44..1db89e8c00a 100644 --- a/contrib/pgcrypto/expected/pgp-decrypt.out +++ b/contrib/pgcrypto/expected/pgp-decrypt.out @@ -315,7 +315,7 @@ SaV9L04ky1qECNDx3XjnoKLC+H7IOQ== \xda39a3ee5e6b4b0d3255bfef95601890afd80709 (1 row) -select digest(pgp_sym_decrypt(dearmor(' +select digest(pgp_sym_decrypt_bytea(dearmor(' -----BEGIN PGP MESSAGE----- Comment: dat3.aes.sha1.mdc.s2k3.z0 @@ -387,6 +387,27 @@ ERROR: Wrong key or corrupt data select pgp_sym_decrypt(pgp_sym_encrypt_bytea('P', 'key'), 'key', 'debug=1'); NOTICE: dbg: parse_literal_data: data type=b ERROR: Not text data +-- NUL byte in text decrypt. Ciphertext source: +-- printf 'a\x00\xc' | gpg --homedir /nonexistent --textmode \ +-- --personal-cipher-preferences aes --no-emit-version --batch \ +-- --symmetric --passphrase key --armor +do $$ +begin + perform pgp_sym_decrypt(dearmor(' +-----BEGIN PGP MESSAGE----- + +jA0EBwMCLd9OvySmZNZg0jgBe7vGTmnje5HGXI+zsIQ99WPZu4Zs/P6pQcZ+HZ4n +SZQHOfE8tagjB6Rqow82QpSBiOfWn4qjhQ== +=c2cz +-----END PGP MESSAGE----- +'), 'key', 'debug=1'); +exception when others then + raise '%', + regexp_replace(sqlerrm, 'encoding "[^"]*"', 'encoding [REDACTED]'); +end +$$; +ERROR: invalid byte sequence for encoding [REDACTED]: 0x00 +CONTEXT: PL/pgSQL function inline_code_block line 12 at RAISE -- Decryption with a certain incorrect key yields an apparent BZip2-compressed -- plaintext. Ciphertext source: iterative pgp_sym_encrypt('secret', 'key') -- until the random prefix gave rise to that property. diff --git a/contrib/pgcrypto/expected/pgp-decrypt_1.out b/contrib/pgcrypto/expected/pgp-decrypt_1.out index 80a4c48613d..d214e0bc0e0 100644 --- a/contrib/pgcrypto/expected/pgp-decrypt_1.out +++ b/contrib/pgcrypto/expected/pgp-decrypt_1.out @@ -311,7 +311,7 @@ SaV9L04ky1qECNDx3XjnoKLC+H7IOQ== \xda39a3ee5e6b4b0d3255bfef95601890afd80709 (1 row) -select digest(pgp_sym_decrypt(dearmor(' +select digest(pgp_sym_decrypt_bytea(dearmor(' -----BEGIN PGP MESSAGE----- Comment: dat3.aes.sha1.mdc.s2k3.z0 @@ -383,6 +383,27 @@ ERROR: Wrong key or corrupt data select pgp_sym_decrypt(pgp_sym_encrypt_bytea('P', 'key'), 'key', 'debug=1'); NOTICE: dbg: parse_literal_data: data type=b ERROR: Not text data +-- NUL byte in text decrypt. Ciphertext source: +-- printf 'a\x00\xc' | gpg --homedir /nonexistent --textmode \ +-- --personal-cipher-preferences aes --no-emit-version --batch \ +-- --symmetric --passphrase key --armor +do $$ +begin + perform pgp_sym_decrypt(dearmor(' +-----BEGIN PGP MESSAGE----- + +jA0EBwMCLd9OvySmZNZg0jgBe7vGTmnje5HGXI+zsIQ99WPZu4Zs/P6pQcZ+HZ4n +SZQHOfE8tagjB6Rqow82QpSBiOfWn4qjhQ== +=c2cz +-----END PGP MESSAGE----- +'), 'key', 'debug=1'); +exception when others then + raise '%', + regexp_replace(sqlerrm, 'encoding "[^"]*"', 'encoding [REDACTED]'); +end +$$; +ERROR: invalid byte sequence for encoding [REDACTED]: 0x00 +CONTEXT: PL/pgSQL function inline_code_block line 12 at RAISE -- Decryption with a certain incorrect key yields an apparent BZip2-compressed -- plaintext. Ciphertext source: iterative pgp_sym_encrypt('secret', 'key') -- until the random prefix gave rise to that property. diff --git a/contrib/pgcrypto/expected/pgp-pubkey-session.out b/contrib/pgcrypto/expected/pgp-pubkey-session.out new file mode 100644 index 00000000000..f724d98eb24 --- /dev/null +++ b/contrib/pgcrypto/expected/pgp-pubkey-session.out @@ -0,0 +1,47 @@ +-- Test for overflow with session key at decrypt. +-- Data automatically generated by scripts/pgp_session_data.py. +-- See this file for details explaining how this data is generated. +SELECT pgp_pub_decrypt_bytea( +'\xc1c04c030000000000000000020800a46f5b9b1905b49457a6485474f71ed9b46c2527e1 +da08e1f7871e12c3d38828f2076b984a595bf60f616599ca5729d547de06a258bfbbcd30 +94a321e4668cd43010f0ca8ecf931e5d39bda1152c50c367b11c723f270729245d3ebdbd +0694d320c5a5aa6a405fb45182acb3d7973cbce398e0c5060af7603cfd9ed186ebadd616 +3b50ae42bea5f6d14dda24e6d4687b434c175084515d562e896742b0ba9a1c87d5642e10 +a5550379c71cc490a052ada483b5d96526c0a600fc51755052aa77fdf72f7b4989b920e7 +b90f4b30787a46482670d5caecc7a515a926055ad5509d135702ce51a0e4c1033f2d939d +8f0075ec3428e17310da37d3d2d7ad1ce99adcc91cd446c366c402ae1ee38250343a7fcc +0f8bc28020e603d7a4795ef0dcc1c04c030000000000000000020800a46f5b9b1905b494 +57a6485474f71ed9b46c2527e1da08e1f7871e12c3d38828f2076b984a595bf60f616599 +ca5729d547de06a258bfbbcd3094a321e4668cd43010f0ca8ecf931e5d39bda1152c50c3 +67b11c723f270729245d3ebdbd0694d320c5a5aa6a405fb45182acb3d7973cbce398e0c5 +060af7603cfd9ed186ebadd6163b50ae42bea5f6d14dda24e6d4687b434c175084515d56 +2e896742b0ba9a1c87d5642e10a5550379c71cc490a052ada483b5d96526c0a600fc5175 +5052aa77fdf72f7b4989b920e7b90f4b30787a46482670d5caecc7a515a926055ad5509d +135702ce51a0e4c1033f2d939d8f0075ec3428e17310da37d3d2d7ad1ce99adc'::bytea, +'\xc7c2d8046965d657020800eef8bf1515adb1a3ee7825f75c668ea8dd3e3f9d13e958f6ad +9c55adc0c931a4bb00abe1d52cf7bb0c95d537949d277a5292ede375c6b2a67a3bf7d19f +f975bb7e7be35c2d8300dacba360a0163567372f7dc24000cc7cb6170bedc8f3b1f98c12 +07a6cb4de870a4bc61319b139dcc0e20c368fd68f8fd346d2c0b69c5aed560504e2ec6f1 +23086fe3c5540dc4dd155c0c67257c4ada862f90fe172ace344089da8135e92aca5c2709 +f1c1bc521798bb8c0365841496e709bd184132d387e0c9d5f26dc00fd06c3a76ef66a75c +138285038684707a847b7bd33cfbefbf1d336be954a8048946af97a66352adef8e8b5ae4 +c4748c6f2510265b7a8267bc370dbb00110100010007ff7e72d4f95d2d39901ac12ca5c5 +18e767e719e72340c3fab51c8c5ab1c40f31db8eaffe43533fa61e2dbca2c3f4396c0847 +e5434756acbb1f68128f4136bb135710c89137d74538908dac77967de9e821c559700dd9 +de5a2727eec1f5d12d5d74869dd1de45ed369d94a8814d23861dd163f8c27744b26b98f0 +239c2e6dd1e3493b8cc976fdc8f9a5e250f715aa4c3d7d5f237f8ee15d242e8fa941d1a0 +ed9550ab632d992a97518d142802cb0a97b251319bf5742db8d9d8cbaa06cdfba2d75bc9 +9d77a51ff20bd5ba7f15d7af6e85b904de2855d19af08d45f39deb85403033c69c767a8e +74a343b1d6c8911d34ea441ac3850e57808ed3d885835cbe6c79d10400ef16256f3d5c4c +3341516a2d2aa888df81b603f48a27f3666b40f992a857c1d11ff639cd764a9b42d5a1f8 +58b4aeee36b85508bb5e8b91ef88a7737770b330224479d9b44eae8c631bc43628b69549 +507c0a1af0be0dd7696015abea722b571eb35eefc4ab95595378ec12814727443f625fcd +183bb9b3bccf53b54dd0e5e7a50400ffe08537b2d4e6074e4a1727b658cfccdec8962302 +25e300c05690de45f7065c3d40d86f544a64d51a3e94424f9851a16d1322ebdb41fa8a45 +3131f3e2dc94e858e6396722643df382680f815e53bcdcde5da622f50530a83b217f1103 +cdd6e5e9babe1e415bbff28d44bd18c95f43bbd04afeb2a2a99af38a571c7540de21df03 +ff62c0a33d9143dd3f639893f47732c11c5a12c6052d1935f4d507b7ae1f76ab0e9a69b8 +7305a7f7c19bd509daf4903bff614bc26d118f03e461469c72c12d3a2bb4f78e4d342ce8 +487723649a01ed2b9eb11c662134502c098d55dfcd361939d8370873422c3da75a515a75 +9ffedfe7df44fb3c20f81650801a30d43b5c90b98b3eee'::bytea); +ERROR: Public key too big diff --git a/contrib/pgcrypto/meson.build b/contrib/pgcrypto/meson.build index df7dd50dbc3..57ebfd7ae6d 100644 --- a/contrib/pgcrypto/meson.build +++ b/contrib/pgcrypto/meson.build @@ -50,6 +50,7 @@ pgcrypto_regress = [ 'pgp-encrypt', 'pgp-pubkey-decrypt', 'pgp-pubkey-encrypt', + 'pgp-pubkey-session', 'pgp-info', ] diff --git a/contrib/pgcrypto/pgp-pgsql.c b/contrib/pgcrypto/pgp-pgsql.c index d9b15b07b0f..838a7c381fc 100644 --- a/contrib/pgcrypto/pgp-pgsql.c +++ b/contrib/pgcrypto/pgp-pgsql.c @@ -631,6 +631,7 @@ pgp_sym_decrypt_text(PG_FUNCTION_ARGS) arg = PG_GETARG_BYTEA_PP(2); res = decrypt_internal(0, 1, data, key, NULL, arg); + pg_verifymbstr(VARDATA_ANY(res), VARSIZE_ANY_EXHDR(res), false); PG_FREE_IF_COPY(data, 0); PG_FREE_IF_COPY(key, 1); @@ -732,6 +733,7 @@ pgp_pub_decrypt_text(PG_FUNCTION_ARGS) arg = PG_GETARG_BYTEA_PP(3); res = decrypt_internal(1, 1, data, key, psw, arg); + pg_verifymbstr(VARDATA_ANY(res), VARSIZE_ANY_EXHDR(res), false); PG_FREE_IF_COPY(data, 0); PG_FREE_IF_COPY(key, 1); diff --git a/contrib/pgcrypto/pgp-pubdec.c b/contrib/pgcrypto/pgp-pubdec.c index a0a5738a40e..2a13aa3e6ad 100644 --- a/contrib/pgcrypto/pgp-pubdec.c +++ b/contrib/pgcrypto/pgp-pubdec.c @@ -157,6 +157,7 @@ pgp_parse_pubenc_sesskey(PGP_Context *ctx, PullFilter *pkt) uint8 *msg; int msglen; PGP_MPI *m; + unsigned sess_key_len; pk = ctx->pub_key; if (pk == NULL) @@ -220,11 +221,19 @@ pgp_parse_pubenc_sesskey(PGP_Context *ctx, PullFilter *pkt) if (res < 0) goto out; + sess_key_len = msglen - 3; + if (sess_key_len > PGP_MAX_KEY) + { + px_debug("incorrect session key length=%u", sess_key_len); + res = PXE_PGP_KEY_TOO_BIG; + goto out; + } + /* * got sesskey */ ctx->cipher_algo = *msg; - ctx->sess_key_len = msglen - 3; + ctx->sess_key_len = sess_key_len; memcpy(ctx->sess_key, msg + 1, ctx->sess_key_len); out: diff --git a/contrib/pgcrypto/px.c b/contrib/pgcrypto/px.c index d35ccca7774..a7cb248f6b7 100644 --- a/contrib/pgcrypto/px.c +++ b/contrib/pgcrypto/px.c @@ -65,6 +65,7 @@ static const struct error_desc px_err_list[] = { {PXE_PGP_UNEXPECTED_PKT, "Unexpected packet in key data"}, {PXE_PGP_MATH_FAILED, "Math operation failed"}, {PXE_PGP_SHORT_ELGAMAL_KEY, "Elgamal keys must be at least 1024 bits long"}, + {PXE_PGP_KEY_TOO_BIG, "Public key too big"}, {PXE_PGP_UNKNOWN_PUBALGO, "Unknown public-key encryption algorithm"}, {PXE_PGP_WRONG_KEY, "Wrong key"}, {PXE_PGP_MULTIPLE_KEYS, diff --git a/contrib/pgcrypto/px.h b/contrib/pgcrypto/px.h index 222d8b1ad2f..bda0524a04a 100644 --- a/contrib/pgcrypto/px.h +++ b/contrib/pgcrypto/px.h @@ -75,7 +75,7 @@ /* -108 is unused */ #define PXE_PGP_MATH_FAILED -109 #define PXE_PGP_SHORT_ELGAMAL_KEY -110 -/* -111 is unused */ +#define PXE_PGP_KEY_TOO_BIG -111 #define PXE_PGP_UNKNOWN_PUBALGO -112 #define PXE_PGP_WRONG_KEY -113 #define PXE_PGP_MULTIPLE_KEYS -114 diff --git a/contrib/pgcrypto/scripts/pgp_session_data.py b/contrib/pgcrypto/scripts/pgp_session_data.py new file mode 100644 index 00000000000..999350bb2bc --- /dev/null +++ b/contrib/pgcrypto/scripts/pgp_session_data.py @@ -0,0 +1,491 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +# +# Generate PGP data to check the session key length of the input data provided +# to pgp_pub_decrypt_bytea(). +# +# First, the crafted data is generated from valid RSA data, freshly generated +# by this script each time it is run, see generate_rsa_keypair(). +# Second, the crafted PGP data is built, see build_message_data() and +# build_key_data(). Finally, the resulting SQL script is generated. +# +# This script generates in stdout the SQL file that is used in the regression +# tests of pgcrypto. The following command can be used to regenerate the file +# which should never be manually manipulated: +# python3 scripts/pgp_session_data.py > sql/pgp-pubkey-session.sql + +import os +import re +import struct +import secrets +import sys +import time + +# pwn for binary manipulation (p32, p64) +from pwn import * + +# Cryptographic libraries, to craft the PGP data. +from Crypto.Cipher import AES +from Crypto.PublicKey import RSA +from Crypto.Util.number import inverse + +# AES key used for session key encryption (16 bytes for AES-128) +AES_KEY = b'\x01' * 16 + +def generate_rsa_keypair(key_size: int = 2048) -> dict: + """ + Generate a fresh RSA key pair. + + The generated key includes all components needed for PGP operations: + - n: public modulus (p * q) + - e: public exponent (typically 65537) + - d: private exponent (e^-1 mod phi(n)) + - p, q: prime factors of n + - u: coefficient (p^-1 mod q) for CRT optimization + + The caller can pass the wanted key size in input, for a default of 2048 + bytes. This function returns the RSA key components, after performing + some validation on them. + """ + + start_time = time.time() + + # Generate RSA key + key = RSA.generate(key_size) + + # Extract all key components + rsa_components = { + 'n': key.n, # Public modulus (p * q) + 'e': key.e, # Public exponent (typically 65537) + 'd': key.d, # Private exponent (e^-1 mod phi(n)) + 'p': key.p, # First prime factor + 'q': key.q, # Second prime factor + 'u': inverse(key.p, key.q) # Coefficient for CRT: p^-1 mod q + } + + # Validate key components for correctness + validate_rsa_key(rsa_components) + + return rsa_components + +def validate_rsa_key(rsa: dict) -> None: + """ + Validate a generated RSA key. + + This function performs basic validation to ensure the RSA key is properly + constructed and all components are consistent, at least mathematically. + + Validations performed: + 1. n = p * q (modulus is product of primes) + 2. gcd(e, phi(n)) = 1 (public exponent is coprime to phi(n)) + 3. (d * e) mod(phi(n)) = 1 (private exponent is multiplicative inverse) + 4. (u * p) (mod q) = 1 (coefficient is correct for CRT) + """ + + n, e, d, p, q, u = rsa['n'], rsa['e'], rsa['d'], rsa['p'], rsa['q'], rsa['u'] + + # Check that n = p * q + if n != p * q: + raise ValueError("RSA validation failed: n <> p * q") + + # Check that p and q are different + if p == q: + raise ValueError("RSA validation failed: p = q (not allowed)") + + # Calculate phi(n) = (p-1)(q-1) + phi_n = (p - 1) * (q - 1) + + # Check that gcd(e, phi(n)) = 1 + def gcd(a, b): + while b: + a, b = b, a % b + return a + + if gcd(e, phi_n) != 1: + raise ValueError("RSA validation failed: gcd(e, phi(n)) <> 1") + + # Check that (d * e) mod(phi(n)) = 1 + if (d * e) % phi_n != 1: + raise ValueError("RSA validation failed: d * e <> 1 (mod phi(n))") + + # Check that (u * p) (mod q) = 1 + if (u * p) % q != 1: + raise ValueError("RSA validation failed: u * p <> 1 (mod q)") + +def mpi_encode(x: int) -> bytes: + """ + Encode an integer as an OpenPGP Multi-Precision Integer (MPI). + + Format (RFC 4880, Section 3.2): + - 2 bytes: bit length of the integer (big-endian) + - N bytes: the integer in big-endian format + + This is used to encode RSA key components (n, e, d, p, q, u) in PGP + packets. + + The integer to encode is given in input, returning an MPI-encoded + integer. + + For example: + mpi_encode(65537) -> b'\x00\x11\x01\x00\x01' + (17 bits, value 0x010001) + """ + if x < 0: + raise ValueError("MPI cannot encode negative integers") + + if x == 0: + # Special case: zero has 0 bits and empty magnitude + bits = 0 + mag = b"" + else: + # Calculate bit length and convert to bytes + bits = x.bit_length() + mag = x.to_bytes((bits + 7) // 8, 'big') + + # Pack: 2-byte bit length + magnitude bytes + return struct.pack('>H', bits) + mag + +def new_packet(tag: int, payload: bytes) -> bytes: + """ + Create a new OpenPGP packet with a proper header. + + OpenPGP packet format (RFC 4880, Section 4.2): + - New packet format: 0xC0 | tag + - Length encoding depends on payload size: + * 0-191: single byte + * 192-8383: two bytes (192 + ((length - 192) >> 8), (length - 192) & 0xFF) + * 8384+: five bytes (0xFF + 4-byte big-endian length) + + The packet is built from a "tag" (1-63) and some "payload" data. The + result generated is a complete OpenPGP packet. + + For example: + new_packet(1, b'data') -> b'\xC1\x04data' + (Tag 1, length 4, payload 'data') + """ + # New packet format: set bit 7 and 6, clear bit 5, tag in bits 0-5 + first = 0xC0 | (tag & 0x3F) + ln = len(payload) + + # Encode length according to OpenPGP specification + if ln <= 191: + # Single byte length for small packets + llen = bytes([ln]) + elif ln <= 8383: + # Two-byte length for medium packets + ln2 = ln - 192 + llen = bytes([192 + (ln2 >> 8), ln2 & 0xFF]) + else: + # Five-byte length for large packets + llen = bytes([255]) + struct.pack('>I', ln) + + return bytes([first]) + llen + payload + +def build_key_data(rsa: dict) -> bytes: + """ + Build the key data, containing an RSA private key. + + The RSA contents should have been generated previously. + + Format (see RFC 4880, Section 5.5.3): + - 1 byte: version (4) + - 4 bytes: creation time (current Unix timestamp) + - 1 byte: public key algorithm (2 = RSA encrypt) + - MPI: RSA public modulus n + - MPI: RSA public exponent e + - 1 byte: string-to-key usage (0 = no encryption) + - MPI: RSA private exponent d + - MPI: RSA prime p + - MPI: RSA prime q + - MPI: RSA coefficient u = p^-1 mod q + - 2 bytes: checksum of private key material + + This function takes a set of RSA key components in input (n, e, d, p, q, u) + and returns a secret key packet. + """ + + # Public key portion + ver = bytes([4]) # Version 4 key + ctime = struct.pack('>I', int(time.time())) # Current Unix timestamp + algo = bytes([2]) # RSA encrypt algorithm + n_mpi = mpi_encode(rsa['n']) # Public modulus + e_mpi = mpi_encode(rsa['e']) # Public exponent + pub = ver + ctime + algo + n_mpi + e_mpi + + # Private key portion + hide_type = bytes([0]) # No string-to-key encryption + d_mpi = mpi_encode(rsa['d']) # Private exponent + p_mpi = mpi_encode(rsa['p']) # Prime p + q_mpi = mpi_encode(rsa['q']) # Prime q + u_mpi = mpi_encode(rsa['u']) # Coefficient u = p^-1 mod q + + # Calculate checksum of private key material (simple sum mod 65536) + private_data = d_mpi + p_mpi + q_mpi + u_mpi + cksum = sum(private_data) & 0xFFFF + + secret = hide_type + private_data + struct.pack('>H', cksum) + payload = pub + secret + + return new_packet(7, payload) + +def pgp_cfb_encrypt_resync(key, plaintext): + """ + Implement OpenPGP CFB mode with resync. + + OpenPGP CFB mode is a variant of standard CFB with a resync operation + after the first two blocks. + + Algorithm (RFC 4880, Section 13.9): + 1. Block 1: FR=zeros, encrypt full block_size bytes + 2. Block 2: FR=block1, encrypt only 2 bytes + 3. Resync: FR = block1[2:] + block2 + 4. Remaining blocks: standard CFB mode + + This function uses the following arguments: + - key: AES encryption key (16 bytes for AES-128) + - plaintext: Data to encrypt + """ + block_size = 16 # AES block size + cipher = AES.new(key[:16], AES.MODE_ECB) # Use ECB for manual CFB + ciphertext = b'' + + # Block 1: FR=zeros, encrypt full 16 bytes + FR = b'\x00' * block_size + FRE = cipher.encrypt(FR) # Encrypt the feedback register + block1 = bytes(a ^ b for a, b in zip(FRE, plaintext[0:16])) + ciphertext += block1 + + # Block 2: FR=block1, encrypt only 2 bytes + FR = block1 + FRE = cipher.encrypt(FR) + block2 = bytes(a ^ b for a, b in zip(FRE[0:2], plaintext[16:18])) + ciphertext += block2 + + # Resync: FR = block1[2:16] + block2[0:2] + # This is the key difference from standard CFB mode + FR = block1[2:] + block2 + + # Block 3+: Continue with standard CFB mode + pos = 18 + while pos < len(plaintext): + FRE = cipher.encrypt(FR) + chunk_len = min(block_size, len(plaintext) - pos) + chunk = plaintext[pos:pos+chunk_len] + enc_chunk = bytes(a ^ b for a, b in zip(FRE[:chunk_len], chunk)) + ciphertext += enc_chunk + + # Update feedback register for next iteration + if chunk_len == block_size: + FR = enc_chunk + else: + # Partial block: pad with old FR bytes + FR = enc_chunk + FR[chunk_len:] + pos += chunk_len + + return ciphertext + +def build_literal_data_packet(data: bytes) -> bytes: + """ + Build a literal data packet containing a message. + + Format (RFC 4880, Section 5.9): + - 1 byte: data format ('b' = binary, 't' = text, 'u' = UTF-8 text) + - 1 byte: filename length (0 = no filename) + - N bytes: filename (empty in this case) + - 4 bytes: date (current Unix timestamp) + - M bytes: literal data + + The data used to build the packet is given in input, with the generated + result returned. + """ + body = bytes([ + ord('b'), # Binary data format + 0, # Filename length (0 = no filename) + ]) + struct.pack('>I', int(time.time())) + data # Current timestamp + data + + return new_packet(11, body) + +def build_symenc_data_packet(sess_key: bytes, cipher_algo: int, payload: bytes) -> bytes: + """ + Build a symmetrically-encrypted data packet using AES-128-CFB. + + This packet contains encrypted data using the session key. The format + includes a random prefix, for security (see RFC 4880, Section 5.7). + + Packet structure: + - Random prefix (block_size bytes) + - Prefix repeat (last 2 bytes of prefix repeated) + - Encrypted literal data packet + + This function uses the following set of arguments: + - sess_key: Session key for encryption + - cipher_algo: Cipher algorithm identifier (7 = AES-128) + - payload: Data to encrypt (wrapped in literal data packet) + """ + block_size = 16 # AES-128 block size + key = sess_key[:16] # Use first 16 bytes for AES-128 + + # Create random prefix + repeat last 2 bytes (total 18 bytes) + # This is required by OpenPGP for integrity checking + prefix_random = secrets.token_bytes(block_size) + prefix = prefix_random + prefix_random[-2:] # 18 bytes total + + # Wrap payload in literal data packet + literal_pkt = build_literal_data_packet(payload) + + # Plaintext = prefix + literal data packet + plaintext = prefix + literal_pkt + + # Encrypt using OpenPGP CFB mode with resync + ciphertext = pgp_cfb_encrypt_resync(key, plaintext) + + return new_packet(9, ciphertext) + +def build_tag1_packet(rsa: dict, sess_key: bytes) -> bytes: + """ + Build a public-key encrypted key. + + This is a very important function, as it is able to create the packet + triggering the overflow check. This function can also be used to create + "legit" packet data. + + Format (RFC 4880, Section 5.1): + - 1 byte: version (3) + - 8 bytes: key ID (0 = any key accepted) + - 1 byte: public key algorithm (2 = RSA encrypt) + - MPI: RSA-encrypted session key + + This uses in arguments the generated RSA key pair, and the session key + to encrypt. The latter is manipulated to trigger the overflow. + + This function returns a complete packet encrypted by a session key. + """ + + # Calculate RSA modulus size in bytes + n_bytes = (rsa['n'].bit_length() + 7) // 8 + + # Session key message format: + # - 1 byte: symmetric cipher algorithm (7 = AES-128) + # - N bytes: session key + # - 2 bytes: checksum (simple sum of session key bytes) + algo_byte = bytes([7]) # AES-128 algorithm identifier + cksum = sum(sess_key) & 0xFFFF # 16-bit checksum + M = algo_byte + sess_key + struct.pack('>H', cksum) + + # PKCS#1 v1.5 padding construction + # Format: 0x02 || PS || 0x00 || M + # Total padded message must be exactly n_bytes long. + total_len = n_bytes # Total length must equal modulus size in bytes + ps_len = total_len - len(M) - 2 # Subtract 2 for 0x02 and 0x00 bytes + + if ps_len < 8: + raise ValueError(f"Padding string too short ({ps_len} bytes); need at least 8 bytes. " + f"Message length: {len(M)}, Modulus size: {n_bytes} bytes") + + # Create padding string with *ALL* bytes being 0xFF (no zero separator!) + PS = bytes([0xFF]) * ps_len + + # Construct the complete padded message + # Normal PKCS#1 v1.5 padding: 0x02 || PS || 0x00 || M + padded = bytes([0x02]) + PS + bytes([0x00]) + M + + # Verify padding construction + if len(padded) != n_bytes: + raise ValueError(f"Padded message length ({len(padded)}) doesn't match RSA modulus size ({n_bytes})") + + # Convert padded message to integer and encrypt with RSA + m_int = int.from_bytes(padded, 'big') + + # Ensure message is smaller than modulus (required for RSA) + if m_int >= rsa['n']: + raise ValueError("Padded message is larger than RSA modulus") + + # RSA encryption: c = m^e mod n + c_int = pow(m_int, rsa['e'], rsa['n']) + + # Encode encrypted result as MPI + c_mpi = mpi_encode(c_int) + + # Build complete packet + ver = bytes([3]) # Version 3 packet + key_id = b"\x00" * 8 # Key ID (0 = any key accepted) + algo = bytes([2]) # RSA encrypt algorithm + payload = ver + key_id + algo + c_mpi + + return new_packet(1, payload) + +def build_message_data(rsa: dict) -> bytes: + """ + This function creates a crafted message, with a long session key + length. + + This takes in input the RSA key components generated previously, + returning a concatenated set of PGP packets crafted for the purpose + of this test. + """ + + # Base prefix for session key (AES key + padding + size). + # Note that the crafted size is the important part for this test. + prefix = AES_KEY + b"\x00" * 16 + p32(0x10) + + # Build encrypted data packet, legit. + sedata = build_symenc_data_packet(AES_KEY, cipher_algo=7, payload=b"\x0a\x00") + + # Build multiple packets + packets = [ + # First packet, legit. + build_tag1_packet(rsa, prefix), + + # Encrypted data packet, legit. + sedata, + + # Second packet: information payload. + # + # This packet contains a longer-crafted session key, able to trigger + # the overflow check in pgcrypto. This is the critical part, and + # and you are right to pay a lot of attention here if you are + # reading this code. + build_tag1_packet(rsa, prefix) + ] + + return b"".join(packets) + +def main(): + # Default key size. + # This number can be set to a higher number if wanted, like 4096. We + # just do not need to do that here. + key_size = 2048 + + # Generate fresh RSA key pair + rsa = generate_rsa_keypair(key_size) + + # Generate the message data. + print("### Building message data", file=sys.stderr) + message_data = build_message_data(rsa) + + # Build the key containing the RSA private key + print("### Building key data", file=sys.stderr) + key_data = build_key_data(rsa) + + # Convert to hexadecimal, for the bytea used in the SQL file. + message_data = message_data.hex() + key_data = key_data.hex() + + # Split each value into lines of 72 characters, for readability. + message_data = re.sub("(.{72})", "\\1\n", message_data, 0, re.DOTALL) + key_data = re.sub("(.{72})", "\\1\n", key_data, 0, re.DOTALL) + + # Get the script filename for documentation + file_basename = os.path.basename(__file__) + + # Output the SQL test case + print(f'''-- Test for overflow with session key at decrypt. +-- Data automatically generated by scripts/{file_basename}. +-- See this file for details explaining how this data is generated. +SELECT pgp_pub_decrypt_bytea( +'\\x{message_data}'::bytea, +'\\x{key_data}'::bytea);''', + file=sys.stdout) + +if __name__ == "__main__": + main() diff --git a/contrib/pgcrypto/sql/pgp-decrypt.sql b/contrib/pgcrypto/sql/pgp-decrypt.sql index 49a0267bbcb..2fe498f2f02 100644 --- a/contrib/pgcrypto/sql/pgp-decrypt.sql +++ b/contrib/pgcrypto/sql/pgp-decrypt.sql @@ -228,7 +228,7 @@ SaV9L04ky1qECNDx3XjnoKLC+H7IOQ== -----END PGP MESSAGE----- '), '0123456789abcdefghij'), 'sha1'); -select digest(pgp_sym_decrypt(dearmor(' +select digest(pgp_sym_decrypt_bytea(dearmor(' -----BEGIN PGP MESSAGE----- Comment: dat3.aes.sha1.mdc.s2k3.z0 @@ -282,6 +282,26 @@ VsxxqLSPzNLAeIspJk5G -- Routine text/binary mismatch. select pgp_sym_decrypt(pgp_sym_encrypt_bytea('P', 'key'), 'key', 'debug=1'); +-- NUL byte in text decrypt. Ciphertext source: +-- printf 'a\x00\xc' | gpg --homedir /nonexistent --textmode \ +-- --personal-cipher-preferences aes --no-emit-version --batch \ +-- --symmetric --passphrase key --armor +do $$ +begin + perform pgp_sym_decrypt(dearmor(' +-----BEGIN PGP MESSAGE----- + +jA0EBwMCLd9OvySmZNZg0jgBe7vGTmnje5HGXI+zsIQ99WPZu4Zs/P6pQcZ+HZ4n +SZQHOfE8tagjB6Rqow82QpSBiOfWn4qjhQ== +=c2cz +-----END PGP MESSAGE----- +'), 'key', 'debug=1'); +exception when others then + raise '%', + regexp_replace(sqlerrm, 'encoding "[^"]*"', 'encoding [REDACTED]'); +end +$$; + -- Decryption with a certain incorrect key yields an apparent BZip2-compressed -- plaintext. Ciphertext source: iterative pgp_sym_encrypt('secret', 'key') -- until the random prefix gave rise to that property. diff --git a/contrib/pgcrypto/sql/pgp-pubkey-session.sql b/contrib/pgcrypto/sql/pgp-pubkey-session.sql new file mode 100644 index 00000000000..51792f1f4d8 --- /dev/null +++ b/contrib/pgcrypto/sql/pgp-pubkey-session.sql @@ -0,0 +1,46 @@ +-- Test for overflow with session key at decrypt. +-- Data automatically generated by scripts/pgp_session_data.py. +-- See this file for details explaining how this data is generated. +SELECT pgp_pub_decrypt_bytea( +'\xc1c04c030000000000000000020800a46f5b9b1905b49457a6485474f71ed9b46c2527e1 +da08e1f7871e12c3d38828f2076b984a595bf60f616599ca5729d547de06a258bfbbcd30 +94a321e4668cd43010f0ca8ecf931e5d39bda1152c50c367b11c723f270729245d3ebdbd +0694d320c5a5aa6a405fb45182acb3d7973cbce398e0c5060af7603cfd9ed186ebadd616 +3b50ae42bea5f6d14dda24e6d4687b434c175084515d562e896742b0ba9a1c87d5642e10 +a5550379c71cc490a052ada483b5d96526c0a600fc51755052aa77fdf72f7b4989b920e7 +b90f4b30787a46482670d5caecc7a515a926055ad5509d135702ce51a0e4c1033f2d939d +8f0075ec3428e17310da37d3d2d7ad1ce99adcc91cd446c366c402ae1ee38250343a7fcc +0f8bc28020e603d7a4795ef0dcc1c04c030000000000000000020800a46f5b9b1905b494 +57a6485474f71ed9b46c2527e1da08e1f7871e12c3d38828f2076b984a595bf60f616599 +ca5729d547de06a258bfbbcd3094a321e4668cd43010f0ca8ecf931e5d39bda1152c50c3 +67b11c723f270729245d3ebdbd0694d320c5a5aa6a405fb45182acb3d7973cbce398e0c5 +060af7603cfd9ed186ebadd6163b50ae42bea5f6d14dda24e6d4687b434c175084515d56 +2e896742b0ba9a1c87d5642e10a5550379c71cc490a052ada483b5d96526c0a600fc5175 +5052aa77fdf72f7b4989b920e7b90f4b30787a46482670d5caecc7a515a926055ad5509d +135702ce51a0e4c1033f2d939d8f0075ec3428e17310da37d3d2d7ad1ce99adc'::bytea, +'\xc7c2d8046965d657020800eef8bf1515adb1a3ee7825f75c668ea8dd3e3f9d13e958f6ad +9c55adc0c931a4bb00abe1d52cf7bb0c95d537949d277a5292ede375c6b2a67a3bf7d19f +f975bb7e7be35c2d8300dacba360a0163567372f7dc24000cc7cb6170bedc8f3b1f98c12 +07a6cb4de870a4bc61319b139dcc0e20c368fd68f8fd346d2c0b69c5aed560504e2ec6f1 +23086fe3c5540dc4dd155c0c67257c4ada862f90fe172ace344089da8135e92aca5c2709 +f1c1bc521798bb8c0365841496e709bd184132d387e0c9d5f26dc00fd06c3a76ef66a75c +138285038684707a847b7bd33cfbefbf1d336be954a8048946af97a66352adef8e8b5ae4 +c4748c6f2510265b7a8267bc370dbb00110100010007ff7e72d4f95d2d39901ac12ca5c5 +18e767e719e72340c3fab51c8c5ab1c40f31db8eaffe43533fa61e2dbca2c3f4396c0847 +e5434756acbb1f68128f4136bb135710c89137d74538908dac77967de9e821c559700dd9 +de5a2727eec1f5d12d5d74869dd1de45ed369d94a8814d23861dd163f8c27744b26b98f0 +239c2e6dd1e3493b8cc976fdc8f9a5e250f715aa4c3d7d5f237f8ee15d242e8fa941d1a0 +ed9550ab632d992a97518d142802cb0a97b251319bf5742db8d9d8cbaa06cdfba2d75bc9 +9d77a51ff20bd5ba7f15d7af6e85b904de2855d19af08d45f39deb85403033c69c767a8e +74a343b1d6c8911d34ea441ac3850e57808ed3d885835cbe6c79d10400ef16256f3d5c4c +3341516a2d2aa888df81b603f48a27f3666b40f992a857c1d11ff639cd764a9b42d5a1f8 +58b4aeee36b85508bb5e8b91ef88a7737770b330224479d9b44eae8c631bc43628b69549 +507c0a1af0be0dd7696015abea722b571eb35eefc4ab95595378ec12814727443f625fcd +183bb9b3bccf53b54dd0e5e7a50400ffe08537b2d4e6074e4a1727b658cfccdec8962302 +25e300c05690de45f7065c3d40d86f544a64d51a3e94424f9851a16d1322ebdb41fa8a45 +3131f3e2dc94e858e6396722643df382680f815e53bcdcde5da622f50530a83b217f1103 +cdd6e5e9babe1e415bbff28d44bd18c95f43bbd04afeb2a2a99af38a571c7540de21df03 +ff62c0a33d9143dd3f639893f47732c11c5a12c6052d1935f4d507b7ae1f76ab0e9a69b8 +7305a7f7c19bd509daf4903bff614bc26d118f03e461469c72c12d3a2bb4f78e4d342ce8 +487723649a01ed2b9eb11c662134502c098d55dfcd361939d8370873422c3da75a515a75 +9ffedfe7df44fb3c20f81650801a30d43b5c90b98b3eee'::bytea); diff --git a/contrib/unaccent/unaccent.c b/contrib/unaccent/unaccent.c index 64c879e5470..554843b3548 100644 --- a/contrib/unaccent/unaccent.c +++ b/contrib/unaccent/unaccent.c @@ -149,9 +149,9 @@ initTrie(const char *filename) state = 0; for (ptr = line; *ptr; ptr += ptrlen) { - ptrlen = pg_mblen(ptr); + ptrlen = pg_mblen_cstr(ptr); /* ignore whitespace, but end src or trg */ - if (t_isspace(ptr)) + if (t_isspace_cstr(ptr)) { if (state == 1) state = 2; @@ -315,6 +315,7 @@ unaccent_lexize(PG_FUNCTION_ARGS) char *srcchar = (char *) PG_GETARG_POINTER(1); int32 len = PG_GETARG_INT32(2); char *srcstart = srcchar; + const char *srcend = srcstart + len; TSLexeme *res; StringInfoData buf; @@ -342,7 +343,7 @@ unaccent_lexize(PG_FUNCTION_ARGS) } else { - matchlen = pg_mblen(srcchar); + matchlen = pg_mblen_range(srcchar, srcend); if (buf.data != NULL) appendBinaryStringInfo(&buf, srcchar, matchlen); } diff --git a/pom.xml b/pom.xml index 43c583614a6..0b461661554 100644 --- a/pom.xml +++ b/pom.xml @@ -1802,6 +1802,7 @@ code or new licensing patterns. contrib/btree_gist/btree_bool.c contrib/basic_archive/basic_archive.conf contrib/pg_freespacemap/pg_freespacemap.conf + contrib/pgcrypto/scripts/pgp_session_data.py contrib/pg_walinspect/walinspect.conf contrib/pgrowlocks/specs/pgrowlocks.spec contrib/tcn/specs/tcn.spec diff --git a/src/backend/access/hash/hashfunc.c b/src/backend/access/hash/hashfunc.c index ac21884162f..933aa42ffc9 100644 --- a/src/backend/access/hash/hashfunc.c +++ b/src/backend/access/hash/hashfunc.c @@ -235,6 +235,7 @@ hashoidvector(PG_FUNCTION_ARGS) { oidvector *key = (oidvector *) PG_GETARG_POINTER(0); + check_valid_oidvector(key); return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid)); } @@ -243,6 +244,7 @@ hashoidvectorextended(PG_FUNCTION_ARGS) { oidvector *key = (oidvector *) PG_GETARG_POINTER(0); + check_valid_oidvector(key); return hash_any_extended((unsigned char *) key->values, key->dim1 * sizeof(Oid), PG_GETARG_INT64(1)); diff --git a/src/backend/access/nbtree/nbtcompare.c b/src/backend/access/nbtree/nbtcompare.c index 976a2cc6447..720733b75d2 100644 --- a/src/backend/access/nbtree/nbtcompare.c +++ b/src/backend/access/nbtree/nbtcompare.c @@ -299,6 +299,9 @@ btoidvectorcmp(PG_FUNCTION_ARGS) oidvector *b = (oidvector *) PG_GETARG_POINTER(1); int i; + check_valid_oidvector(a); + check_valid_oidvector(b); + /* We arbitrarily choose to sort first by vector length */ if (a->dim1 != b->dim1) PG_RETURN_INT32(a->dim1 - b->dim1); diff --git a/src/backend/catalog/pg_depend.c b/src/backend/catalog/pg_depend.c index 02e0ce71a07..b3d1c2fba99 100644 --- a/src/backend/catalog/pg_depend.c +++ b/src/backend/catalog/pg_depend.c @@ -23,11 +23,13 @@ #include "catalog/pg_constraint.h" #include "catalog/pg_depend.h" #include "catalog/pg_extension.h" +#include "catalog/pg_type.h" #include "commands/extension.h" #include "miscadmin.h" #include "utils/fmgroids.h" #include "utils/lsyscache.h" #include "utils/rel.h" +#include "utils/syscache.h" static bool isObjectPinned(const ObjectAddress *object); @@ -812,6 +814,77 @@ getAutoExtensionsOfObject(Oid classId, Oid objectId) return result; } +/* + * Look up a type belonging to an extension. + * + * Returns the type's OID, or InvalidOid if not found. + * + * Notice that the type is specified by name only, without a schema. + * That's because this will typically be used by relocatable extensions + * which can't make a-priori assumptions about which schema their objects + * are in. As long as the extension only defines one type of this name, + * the answer is unique anyway. + * + * We might later add the ability to look up functions, operators, etc. + */ +Oid +getExtensionType(Oid extensionOid, const char *typname) +{ + Oid result = InvalidOid; + Relation depRel; + ScanKeyData key[3]; + SysScanDesc scan; + HeapTuple tup; + + depRel = table_open(DependRelationId, AccessShareLock); + + ScanKeyInit(&key[0], + Anum_pg_depend_refclassid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(ExtensionRelationId)); + ScanKeyInit(&key[1], + Anum_pg_depend_refobjid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(extensionOid)); + ScanKeyInit(&key[2], + Anum_pg_depend_refobjsubid, + BTEqualStrategyNumber, F_INT4EQ, + Int32GetDatum(0)); + + scan = systable_beginscan(depRel, DependReferenceIndexId, true, + NULL, 3, key); + + while (HeapTupleIsValid(tup = systable_getnext(scan))) + { + Form_pg_depend depform = (Form_pg_depend) GETSTRUCT(tup); + + if (depform->classid == TypeRelationId && + depform->deptype == DEPENDENCY_EXTENSION) + { + Oid typoid = depform->objid; + HeapTuple typtup; + + typtup = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typoid)); + if (!HeapTupleIsValid(typtup)) + continue; /* should we throw an error? */ + if (strcmp(NameStr(((Form_pg_type) GETSTRUCT(typtup))->typname), + typname) == 0) + { + result = typoid; + ReleaseSysCache(typtup); + break; /* no need to keep searching */ + } + ReleaseSysCache(typtup); + } + } + + systable_endscan(scan); + + table_close(depRel, AccessShareLock); + + return result; +} + /* * Detect whether a sequence is marked as "owned" by a column * diff --git a/src/backend/catalog/pg_proc.c b/src/backend/catalog/pg_proc.c index 6285ce06f66..f46e854dd81 100644 --- a/src/backend/catalog/pg_proc.c +++ b/src/backend/catalog/pg_proc.c @@ -1227,7 +1227,7 @@ match_prosrc_to_literal(const char *prosrc, const char *literal, if (cursorpos > 0) newcp++; } - chlen = pg_mblen(prosrc); + chlen = pg_mblen_cstr(prosrc); if (strncmp(prosrc, literal, chlen) != 0) goto fail; prosrc += chlen; diff --git a/src/backend/commands/extension.c b/src/backend/commands/extension.c index 41e06471f4e..8ec1ffdf209 100644 --- a/src/backend/commands/extension.c +++ b/src/backend/commands/extension.c @@ -46,6 +46,7 @@ #include "catalog/pg_depend.h" #include "catalog/pg_extension.h" #include "catalog/pg_namespace.h" +#include "catalog/pg_proc.h" #include "catalog/pg_type.h" #include "cdb/cdbgang.h" #include "commands/alter.h" @@ -63,10 +64,12 @@ #include "utils/builtins.h" #include "utils/conffiles.h" #include "utils/fmgroids.h" +#include "utils/inval.h" #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/rel.h" #include "utils/snapmgr.h" +#include "utils/syscache.h" #include "utils/varlena.h" #include "catalog/oid_dispatch.h" @@ -114,7 +117,26 @@ typedef struct ExtensionVersionInfo struct ExtensionVersionInfo *previous; /* current best predecessor */ } ExtensionVersionInfo; +/* + * Cache structure for get_function_sibling_type (and maybe later, + * allied lookup functions). + */ +typedef struct ExtensionSiblingCache +{ + struct ExtensionSiblingCache *next; /* list link */ + /* lookup key: requesting function's OID and type name */ + Oid reqfuncoid; + const char *typname; + bool valid; /* is entry currently valid? */ + uint32 exthash; /* cache hash of owning extension's OID */ + Oid typeoid; /* OID associated with typname */ +} ExtensionSiblingCache; + +/* Head of linked list of ExtensionSiblingCache structs */ +static ExtensionSiblingCache *ext_sibling_list = NULL; + /* Local functions */ +static void ext_sibling_callback(Datum arg, int cacheid, uint32 hashvalue); static List *find_update_path(List *evi_list, ExtensionVersionInfo *evi_start, ExtensionVersionInfo *evi_target, @@ -264,6 +286,114 @@ get_extension_schema(Oid ext_oid) return result; } +/* + * get_function_sibling_type - find a type belonging to same extension as func + * + * Returns the type's OID, or InvalidOid if not found. + * + * This is useful in extensions, which won't have fixed object OIDs. + * We work from the calling function's own OID, which it can get from its + * FunctionCallInfo parameter, and look up the owning extension and thence + * a type belonging to the same extension. + * + * Notice that the type is specified by name only, without a schema. + * That's because this will typically be used by relocatable extensions + * which can't make a-priori assumptions about which schema their objects + * are in. As long as the extension only defines one type of this name, + * the answer is unique anyway. + * + * We might later add the ability to look up functions, operators, etc. + * + * This code is simply a frontend for some pg_depend lookups. Those lookups + * are fairly expensive, so we provide a simple cache facility. We assume + * that the passed typname is actually a C constant, or at least permanently + * allocated, so that we need not copy that string. + */ +Oid +get_function_sibling_type(Oid funcoid, const char *typname) +{ + ExtensionSiblingCache *cache_entry; + Oid extoid; + Oid typeoid; + + /* + * See if we have the answer cached. Someday there may be enough callers + * to justify a hash table, but for now, a simple linked list is fine. + */ + for (cache_entry = ext_sibling_list; cache_entry != NULL; + cache_entry = cache_entry->next) + { + if (funcoid == cache_entry->reqfuncoid && + strcmp(typname, cache_entry->typname) == 0) + break; + } + if (cache_entry && cache_entry->valid) + return cache_entry->typeoid; + + /* + * Nope, so do the expensive lookups. We do not expect failures, so we do + * not cache negative results. + */ + extoid = getExtensionOfObject(ProcedureRelationId, funcoid); + if (!OidIsValid(extoid)) + return InvalidOid; + typeoid = getExtensionType(extoid, typname); + if (!OidIsValid(typeoid)) + return InvalidOid; + + /* + * Build, or revalidate, cache entry. + */ + if (cache_entry == NULL) + { + /* Register invalidation hook if this is first entry */ + if (ext_sibling_list == NULL) + CacheRegisterSyscacheCallback(EXTENSIONOID, + ext_sibling_callback, + (Datum) 0); + + /* Momentarily zero the space to ensure valid flag is false */ + cache_entry = (ExtensionSiblingCache *) + MemoryContextAllocZero(CacheMemoryContext, + sizeof(ExtensionSiblingCache)); + cache_entry->next = ext_sibling_list; + ext_sibling_list = cache_entry; + } + + cache_entry->reqfuncoid = funcoid; + cache_entry->typname = typname; + cache_entry->exthash = GetSysCacheHashValue1(EXTENSIONOID, + ObjectIdGetDatum(extoid)); + cache_entry->typeoid = typeoid; + /* Mark it valid only once it's fully populated */ + cache_entry->valid = true; + + return typeoid; +} + +/* + * ext_sibling_callback + * Syscache inval callback function for EXTENSIONOID cache + * + * It seems sufficient to invalidate ExtensionSiblingCache entries when + * the owning extension's pg_extension entry is modified or deleted. + * Neither a requesting function's OID, nor the OID of the object it's + * looking for, could change without an extension update or drop/recreate. + */ +static void +ext_sibling_callback(Datum arg, int cacheid, uint32 hashvalue) +{ + ExtensionSiblingCache *cache_entry; + + for (cache_entry = ext_sibling_list; cache_entry != NULL; + cache_entry = cache_entry->next) + { + if (hashvalue == 0 || + cache_entry->exthash == hashvalue) + cache_entry->valid = false; + } +} + /* * Utility functions to check validity of extension and version names */ diff --git a/src/backend/commands/operatorcmds.c b/src/backend/commands/operatorcmds.c index 51530eb2f56..47e4a02edb9 100644 --- a/src/backend/commands/operatorcmds.c +++ b/src/backend/commands/operatorcmds.c @@ -309,7 +309,6 @@ ValidateRestrictionEstimator(List *restrictionName) { Oid typeId[4]; Oid restrictionOid; - AclResult aclresult; typeId[0] = INTERNALOID; /* PlannerInfo */ typeId[1] = OIDOID; /* operator OID */ @@ -325,11 +324,33 @@ ValidateRestrictionEstimator(List *restrictionName) errmsg("restriction estimator function %s must return type %s", NameListToString(restrictionName), "float8"))); - /* Require EXECUTE rights for the estimator */ - aclresult = object_aclcheck(ProcedureRelationId, restrictionOid, GetUserId(), ACL_EXECUTE); - if (aclresult != ACLCHECK_OK) - aclcheck_error(aclresult, OBJECT_FUNCTION, - NameListToString(restrictionName)); + /* + * If the estimator is not a built-in function, require superuser + * privilege to install it. This protects against using something that is + * not a restriction estimator or has hard-wired assumptions about what + * data types it is working with. (Built-in estimators are required to + * defend themselves adequately against unexpected data type choices, but + * it seems impractical to expect that of extensions' estimators.) + * + * If it is built-in, only require EXECUTE rights. + */ + if (restrictionOid >= FirstGenbkiObjectId) + { + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to specify a non-built-in restriction estimator function"))); + } + else + { + AclResult aclresult; + + aclresult = object_aclcheck(ProcedureRelationId, restrictionOid, + GetUserId(), ACL_EXECUTE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_FUNCTION, + NameListToString(restrictionName)); + } return restrictionOid; } @@ -345,7 +366,6 @@ ValidateJoinEstimator(List *joinName) Oid typeId[5]; Oid joinOid; Oid joinOid2; - AclResult aclresult; typeId[0] = INTERNALOID; /* PlannerInfo */ typeId[1] = OIDOID; /* operator OID */ @@ -383,11 +403,24 @@ ValidateJoinEstimator(List *joinName) errmsg("join estimator function %s must return type %s", NameListToString(joinName), "float8"))); - /* Require EXECUTE rights for the estimator */ - aclresult = object_aclcheck(ProcedureRelationId, joinOid, GetUserId(), ACL_EXECUTE); - if (aclresult != ACLCHECK_OK) - aclcheck_error(aclresult, OBJECT_FUNCTION, - NameListToString(joinName)); + /* privilege checks are the same as in ValidateRestrictionEstimator */ + if (joinOid >= FirstGenbkiObjectId) + { + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to specify a non-built-in join estimator function"))); + } + else + { + AclResult aclresult; + + aclresult = object_aclcheck(ProcedureRelationId, joinOid, + GetUserId(), ACL_EXECUTE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_FUNCTION, + NameListToString(joinName)); + } return joinOid; } diff --git a/src/backend/commands/statscmds.c b/src/backend/commands/statscmds.c index d3220849f6a..db0e2a5f74c 100644 --- a/src/backend/commands/statscmds.c +++ b/src/backend/commands/statscmds.c @@ -65,7 +65,7 @@ compare_int16(const void *a, const void *b) * CREATE STATISTICS */ ObjectAddress -CreateStatistics(CreateStatsStmt *stmt) +CreateStatistics(CreateStatsStmt *stmt, bool check_rights) { int16 attnums[STATS_MAX_DIMENSIONS]; int nattnums = 0; @@ -176,6 +176,21 @@ CreateStatistics(CreateStatsStmt *stmt) } namestrcpy(&stxname, namestr); + /* + * Check we have creation rights in target namespace. Skip check if + * caller doesn't want it. + */ + if (check_rights) + { + AclResult aclresult; + + aclresult = object_aclcheck(NamespaceRelationId, namespaceId, + GetUserId(), ACL_CREATE); + if (aclresult != ACLCHECK_OK) + aclcheck_error(aclresult, OBJECT_SCHEMA, + get_namespace_name(namespaceId)); + } + /* * Deal with the possibility that the statistics object already exists. */ diff --git a/src/backend/commands/tablecmds.c b/src/backend/commands/tablecmds.c index 3548786fe41..b8b69336b89 100644 --- a/src/backend/commands/tablecmds.c +++ b/src/backend/commands/tablecmds.c @@ -11038,7 +11038,7 @@ ATExecAddStatistics(AlteredTableInfo *tab, Relation rel, Assert(stmt->transformed); HOLD_DISPATCH(); - address = CreateStatistics(stmt); + address = CreateStatistics(stmt, !is_rebuild); RESUME_DISPATCH(); return address; diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c index 08756e59c2a..021c69ad031 100644 --- a/src/backend/tcop/utility.c +++ b/src/backend/tcop/utility.c @@ -2538,7 +2538,7 @@ ProcessUtilitySlow(ParseState *pstate, /* Run parse analysis ... */ stmt = transformStatsStmt(relid, stmt, queryString); - address = CreateStatistics(stmt); + address = CreateStatistics(stmt, true); } break; diff --git a/src/backend/tsearch/dict_synonym.c b/src/backend/tsearch/dict_synonym.c index c7cf7c04b60..fe7ca30ce97 100644 --- a/src/backend/tsearch/dict_synonym.c +++ b/src/backend/tsearch/dict_synonym.c @@ -47,8 +47,8 @@ findwrd(char *in, char **end, uint16 *flags) char *lastchar; /* Skip leading spaces */ - while (*in && t_isspace(in)) - in += pg_mblen(in); + while (*in && t_isspace_cstr(in)) + in += pg_mblen_cstr(in); /* Return NULL on empty lines */ if (*in == '\0') @@ -60,10 +60,10 @@ findwrd(char *in, char **end, uint16 *flags) lastchar = start = in; /* Find end of word */ - while (*in && !t_isspace(in)) + while (*in && !t_isspace_cstr(in)) { lastchar = in; - in += pg_mblen(in); + in += pg_mblen_cstr(in); } if (in - lastchar == 1 && t_iseq(lastchar, '*') && flags) diff --git a/src/backend/tsearch/dict_thesaurus.c b/src/backend/tsearch/dict_thesaurus.c index 80402e99375..dcee060ba19 100644 --- a/src/backend/tsearch/dict_thesaurus.c +++ b/src/backend/tsearch/dict_thesaurus.c @@ -190,8 +190,8 @@ thesaurusRead(const char *filename, DictThesaurus *d) ptr = line; /* is it a comment? */ - while (*ptr && t_isspace(ptr)) - ptr += pg_mblen(ptr); + while (*ptr && t_isspace_cstr(ptr)) + ptr += pg_mblen_cstr(ptr); if (t_iseq(ptr, '#') || *ptr == '\0' || t_iseq(ptr, '\n') || t_iseq(ptr, '\r')) @@ -212,7 +212,7 @@ thesaurusRead(const char *filename, DictThesaurus *d) errmsg("unexpected delimiter"))); state = TR_WAITSUBS; } - else if (!t_isspace(ptr)) + else if (!t_isspace_cstr(ptr)) { beginwrd = ptr; state = TR_INLEX; @@ -225,7 +225,7 @@ thesaurusRead(const char *filename, DictThesaurus *d) newLexeme(d, beginwrd, ptr, idsubst, posinsubst++); state = TR_WAITSUBS; } - else if (t_isspace(ptr)) + else if (t_isspace_cstr(ptr)) { newLexeme(d, beginwrd, ptr, idsubst, posinsubst++); state = TR_WAITLEX; @@ -237,15 +237,15 @@ thesaurusRead(const char *filename, DictThesaurus *d) { useasis = true; state = TR_INSUBS; - beginwrd = ptr + pg_mblen(ptr); + beginwrd = ptr + pg_mblen_cstr(ptr); } else if (t_iseq(ptr, '\\')) { useasis = false; state = TR_INSUBS; - beginwrd = ptr + pg_mblen(ptr); + beginwrd = ptr + pg_mblen_cstr(ptr); } - else if (!t_isspace(ptr)) + else if (!t_isspace_cstr(ptr)) { useasis = false; beginwrd = ptr; @@ -254,7 +254,7 @@ thesaurusRead(const char *filename, DictThesaurus *d) } else if (state == TR_INSUBS) { - if (t_isspace(ptr)) + if (t_isspace_cstr(ptr)) { if (ptr == beginwrd) ereport(ERROR, @@ -267,7 +267,7 @@ thesaurusRead(const char *filename, DictThesaurus *d) else elog(ERROR, "unrecognized thesaurus state: %d", state); - ptr += pg_mblen(ptr); + ptr += pg_mblen_cstr(ptr); } if (state == TR_INSUBS) diff --git a/src/backend/tsearch/regis.c b/src/backend/tsearch/regis.c index 0c74c6d0c1c..ee5bc378350 100644 --- a/src/backend/tsearch/regis.c +++ b/src/backend/tsearch/regis.c @@ -37,7 +37,7 @@ RS_isRegis(const char *str) { if (state == RS_IN_WAIT) { - if (t_isalpha(c)) + if (t_isalpha_cstr(c)) /* okay */ ; else if (t_iseq(c, '[')) state = RS_IN_ONEOF; @@ -48,14 +48,14 @@ RS_isRegis(const char *str) { if (t_iseq(c, '^')) state = RS_IN_NONEOF; - else if (t_isalpha(c)) + else if (t_isalpha_cstr(c)) state = RS_IN_ONEOF_IN; else return false; } else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF) { - if (t_isalpha(c)) + if (t_isalpha_cstr(c)) /* okay */ ; else if (t_iseq(c, ']')) state = RS_IN_WAIT; @@ -64,7 +64,7 @@ RS_isRegis(const char *str) } else elog(ERROR, "internal error in RS_isRegis: state %d", state); - c += pg_mblen(c); + c += pg_mblen_cstr(c); } return (state == RS_IN_WAIT); @@ -96,15 +96,14 @@ RS_compile(Regis *r, bool issuffix, const char *str) { if (state == RS_IN_WAIT) { - if (t_isalpha(c)) + if (t_isalpha_cstr(c)) { if (ptr) ptr = newRegisNode(ptr, len); else ptr = r->node = newRegisNode(NULL, len); - COPYCHAR(ptr->data, c); ptr->type = RSF_ONEOF; - ptr->len = pg_mblen(c); + ptr->len = ts_copychar_cstr(ptr->data, c); } else if (t_iseq(c, '[')) { @@ -125,10 +124,9 @@ RS_compile(Regis *r, bool issuffix, const char *str) ptr->type = RSF_NONEOF; state = RS_IN_NONEOF; } - else if (t_isalpha(c)) + else if (t_isalpha_cstr(c)) { - COPYCHAR(ptr->data, c); - ptr->len = pg_mblen(c); + ptr->len = ts_copychar_cstr(ptr->data, c); state = RS_IN_ONEOF_IN; } else /* shouldn't get here */ @@ -136,11 +134,8 @@ RS_compile(Regis *r, bool issuffix, const char *str) } else if (state == RS_IN_ONEOF_IN || state == RS_IN_NONEOF) { - if (t_isalpha(c)) - { - COPYCHAR(ptr->data + ptr->len, c); - ptr->len += pg_mblen(c); - } + if (t_isalpha_cstr(c)) + ptr->len += ts_copychar_cstr(ptr->data + ptr->len, c); else if (t_iseq(c, ']')) state = RS_IN_WAIT; else /* shouldn't get here */ @@ -148,7 +143,7 @@ RS_compile(Regis *r, bool issuffix, const char *str) } else elog(ERROR, "internal error in RS_compile: state %d", state); - c += pg_mblen(c); + c += pg_mblen_cstr(c); } if (state != RS_IN_WAIT) /* shouldn't get here */ @@ -187,10 +182,10 @@ mb_strchr(char *str, char *c) char *ptr = str; bool res = false; - clen = pg_mblen(c); + clen = pg_mblen_cstr(c); while (*ptr && !res) { - plen = pg_mblen(ptr); + plen = pg_mblen_cstr(ptr); if (plen == clen) { i = plen; @@ -219,7 +214,7 @@ RS_execute(Regis *r, char *str) while (*c) { len++; - c += pg_mblen(c); + c += pg_mblen_cstr(c); } if (len < r->nchar) @@ -230,7 +225,7 @@ RS_execute(Regis *r, char *str) { len -= r->nchar; while (len-- > 0) - c += pg_mblen(c); + c += pg_mblen_cstr(c); } @@ -250,7 +245,7 @@ RS_execute(Regis *r, char *str) elog(ERROR, "unrecognized regis node type: %d", ptr->type); } ptr = ptr->next; - c += pg_mblen(c); + c += pg_mblen_cstr(c); } return true; diff --git a/src/backend/tsearch/spell.c b/src/backend/tsearch/spell.c index 8d1d81501f2..8f4fdf2782c 100644 --- a/src/backend/tsearch/spell.c +++ b/src/backend/tsearch/spell.c @@ -232,7 +232,7 @@ findchar(char *str, int c) { if (t_iseq(str, c)) return str; - str += pg_mblen(str); + str += pg_mblen_cstr(str); } return NULL; @@ -245,7 +245,7 @@ findchar2(char *str, int c1, int c2) { if (t_iseq(str, c1) || t_iseq(str, c2)) return str; - str += pg_mblen(str); + str += pg_mblen_cstr(str); } return NULL; @@ -352,6 +352,7 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag) char *next, *sbuf = *sflagset; int maxstep; + int clen; bool stop = false; bool met_comma = false; @@ -363,11 +364,11 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag) { case FM_LONG: case FM_CHAR: - COPYCHAR(sflag, *sflagset); - sflag += pg_mblen(*sflagset); + clen = ts_copychar_cstr(sflag, *sflagset); + sflag += clen; /* Go to start of the next flag */ - *sflagset += pg_mblen(*sflagset); + *sflagset += clen; /* Check if we get all characters of flag */ maxstep--; @@ -391,7 +392,7 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag) *sflagset = next; while (**sflagset) { - if (t_isdigit(*sflagset)) + if (t_isdigit_cstr(*sflagset)) { if (!met_comma) ereport(ERROR, @@ -409,7 +410,7 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag) *sflagset))); met_comma = true; } - else if (!t_isspace(*sflagset)) + else if (!t_isspace_cstr(*sflagset)) { ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), @@ -417,7 +418,7 @@ getNextFlagFromString(IspellDict *Conf, char **sflagset, char *sflag) *sflagset))); } - *sflagset += pg_mblen(*sflagset); + *sflagset += pg_mblen_cstr(*sflagset); } stop = true; break; @@ -543,7 +544,7 @@ NIImportDictionary(IspellDict *Conf, const char *filename) while (*s) { /* we allow only single encoded flags for faster works */ - if (pg_mblen(s) == 1 && t_isprint(s) && !t_isspace(s)) + if (pg_mblen_cstr(s) == 1 && t_isprint_unbounded(s) && !t_isspace_unbounded(s)) s++; else { @@ -559,12 +560,12 @@ NIImportDictionary(IspellDict *Conf, const char *filename) s = line; while (*s) { - if (t_isspace(s)) + if (t_isspace_cstr(s)) { *s = '\0'; break; } - s += pg_mblen(s); + s += pg_mblen_cstr(s); } pstr = lowerstr_ctx(Conf, line); @@ -796,17 +797,17 @@ get_nextfield(char **str, char *next) while (**str) { + int clen = pg_mblen_cstr(*str); + if (state == PAE_WAIT_MASK) { if (t_iseq(*str, '#')) return false; - else if (!t_isspace(*str)) + else if (!t_isspace_cstr(*str)) { - int clen = pg_mblen(*str); - if (clen < avail) { - COPYCHAR(next, *str); + ts_copychar_with_len(next, *str, clen); next += clen; avail -= clen; } @@ -815,24 +816,22 @@ get_nextfield(char **str, char *next) } else /* state == PAE_INMASK */ { - if (t_isspace(*str)) + if (t_isspace_cstr(*str)) { *next = '\0'; return true; } else { - int clen = pg_mblen(*str); - if (clen < avail) { - COPYCHAR(next, *str); + ts_copychar_with_len(next, *str, clen); next += clen; avail -= clen; } } } - *str += pg_mblen(*str); + *str += clen; } *next = '\0'; @@ -922,14 +921,15 @@ parse_affentry(char *str, char *mask, char *find, char *repl) while (*str) { + int clen = pg_mblen_cstr(str); + if (state == PAE_WAIT_MASK) { if (t_iseq(str, '#')) return false; - else if (!t_isspace(str)) + else if (!t_isspace_cstr(str)) { - COPYCHAR(pmask, str); - pmask += pg_mblen(str); + pmask += ts_copychar_with_len(pmask, str, clen); state = PAE_INMASK; } } @@ -940,10 +940,9 @@ parse_affentry(char *str, char *mask, char *find, char *repl) *pmask = '\0'; state = PAE_WAIT_FIND; } - else if (!t_isspace(str)) + else if (!t_isspace_cstr(str)) { - COPYCHAR(pmask, str); - pmask += pg_mblen(str); + pmask += ts_copychar_with_len(pmask, str, clen); } } else if (state == PAE_WAIT_FIND) @@ -952,13 +951,12 @@ parse_affentry(char *str, char *mask, char *find, char *repl) { state = PAE_INFIND; } - else if (t_isalpha(str) || t_iseq(str, '\'') /* english 's */ ) + else if (t_isalpha_cstr(str) || t_iseq(str, '\'') /* english 's */ ) { - COPYCHAR(prepl, str); - prepl += pg_mblen(str); + prepl += ts_copychar_with_len(prepl, str, clen); state = PAE_INREPL; } - else if (!t_isspace(str)) + else if (!t_isspace_cstr(str)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("syntax error"))); @@ -970,12 +968,11 @@ parse_affentry(char *str, char *mask, char *find, char *repl) *pfind = '\0'; state = PAE_WAIT_REPL; } - else if (t_isalpha(str)) + else if (t_isalpha_cstr(str)) { - COPYCHAR(pfind, str); - pfind += pg_mblen(str); + pfind += ts_copychar_with_len(pfind, str, clen); } - else if (!t_isspace(str)) + else if (!t_isspace_cstr(str)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("syntax error"))); @@ -986,13 +983,12 @@ parse_affentry(char *str, char *mask, char *find, char *repl) { break; /* void repl */ } - else if (t_isalpha(str)) + else if (t_isalpha_cstr(str)) { - COPYCHAR(prepl, str); - prepl += pg_mblen(str); + prepl += ts_copychar_with_len(prepl, str, clen); state = PAE_INREPL; } - else if (!t_isspace(str)) + else if (!t_isspace_cstr(str)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("syntax error"))); @@ -1004,12 +1000,11 @@ parse_affentry(char *str, char *mask, char *find, char *repl) *prepl = '\0'; break; } - else if (t_isalpha(str)) + else if (t_isalpha_cstr(str)) { - COPYCHAR(prepl, str); - prepl += pg_mblen(str); + prepl += ts_copychar_with_len(prepl, str, clen); } - else if (!t_isspace(str)) + else if (!t_isspace_cstr(str)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("syntax error"))); @@ -1017,7 +1012,7 @@ parse_affentry(char *str, char *mask, char *find, char *repl) else elog(ERROR, "unrecognized state in parse_affentry: %d", state); - str += pg_mblen(str); + str += clen; } *pmask = *pfind = *prepl = '\0'; @@ -1070,10 +1065,9 @@ addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val) CompoundAffixFlag *newValue; char sbuf[BUFSIZ]; char *sflag; - int clen; - while (*s && t_isspace(s)) - s += pg_mblen(s); + while (*s && t_isspace_cstr(s)) + s += pg_mblen_cstr(s); if (!*s) ereport(ERROR, @@ -1082,10 +1076,10 @@ addCompoundAffixFlagValue(IspellDict *Conf, char *s, uint32 val) /* Get flag without \n */ sflag = sbuf; - while (*s && !t_isspace(s) && *s != '\n') + while (*s && !t_isspace_cstr(s) && *s != '\n') { - clen = pg_mblen(s); - COPYCHAR(sflag, s); + int clen = ts_copychar_cstr(sflag, s); + sflag += clen; s += clen; } @@ -1228,7 +1222,7 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) while ((recoded = tsearch_readline(&trst)) != NULL) { - if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#')) + if (*recoded == '\0' || t_isspace_cstr(recoded) || t_iseq(recoded, '#')) { pfree(recoded); continue; @@ -1265,8 +1259,8 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) { char *s = recoded + strlen("FLAG"); - while (*s && t_isspace(s)) - s += pg_mblen(s); + while (*s && t_isspace_cstr(s)) + s += pg_mblen_cstr(s); if (*s) { @@ -1301,7 +1295,7 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename) { int fields_read; - if (*recoded == '\0' || t_isspace(recoded) || t_iseq(recoded, '#')) + if (*recoded == '\0' || t_isspace_cstr(recoded) || t_iseq(recoded, '#')) goto nextline; fields_read = parse_ooaffentry(recoded, type, sflag, find, repl, mask); @@ -1464,12 +1458,12 @@ NIImportAffixes(IspellDict *Conf, const char *filename) s = findchar2(recoded, 'l', 'L'); if (s) { - while (*s && !t_isspace(s)) - s += pg_mblen(s); - while (*s && t_isspace(s)) - s += pg_mblen(s); + while (*s && !t_isspace_cstr(s)) + s += pg_mblen_cstr(s); + while (*s && t_isspace_cstr(s)) + s += pg_mblen_cstr(s); - if (*s && pg_mblen(s) == 1) + if (*s && pg_mblen_cstr(s) == 1) { addCompoundAffixFlagValue(Conf, s, FF_COMPOUNDFLAG); Conf->usecompound = true; @@ -1497,8 +1491,8 @@ NIImportAffixes(IspellDict *Conf, const char *filename) s = recoded + 4; /* we need non-lowercased string */ flagflags = 0; - while (*s && t_isspace(s)) - s += pg_mblen(s); + while (*s && t_isspace_cstr(s)) + s += pg_mblen_cstr(s); if (*s == '*') { @@ -1519,14 +1513,13 @@ NIImportAffixes(IspellDict *Conf, const char *filename) * be followed by EOL, whitespace, or ':'. Otherwise this is a * new-format flag command. */ - if (*s && pg_mblen(s) == 1) + if (*s && pg_mblen_cstr(s) == 1) { - COPYCHAR(flag, s); + flag[0] = *s++; flag[1] = '\0'; - s++; if (*s == '\0' || *s == '#' || *s == '\n' || *s == ':' || - t_isspace(s)) + t_isspace_cstr(s)) { oldformat = true; goto nextline; @@ -1750,7 +1743,7 @@ NISortDictionary(IspellDict *Conf) (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("invalid affix alias \"%s\"", Conf->Spell[i]->p.flag))); - if (*end != '\0' && !t_isdigit(end) && !t_isspace(end)) + if (*end != '\0' && !t_isdigit_cstr(end) && !t_isspace_cstr(end)) ereport(ERROR, (errcode(ERRCODE_CONFIG_FILE_ERROR), errmsg("invalid affix alias \"%s\"", diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c index 537ad288f53..4a01b65d577 100644 --- a/src/backend/tsearch/ts_locale.c +++ b/src/backend/tsearch/ts_locale.c @@ -33,92 +33,44 @@ static void tsearch_readline_callback(void *arg); */ #define WC_BUF_LEN 3 -/* - * The reason these functions use a 3-wchar_t output buffer, not 2 as you - * might expect, is that on Windows "wchar_t" is 16 bits and what we'll be - * getting from char2wchar() is UTF16 not UTF32. A single input character - * may therefore produce a surrogate pair rather than just one wchar_t; - * we also need room for a trailing null. When we do get a surrogate pair, - * we pass just the first code to iswdigit() etc, so that these functions will - * always return false for characters outside the Basic Multilingual Plane. - */ -#define WC_BUF_LEN 3 - -int -t_isdigit(const char *ptr) -{ - int clen = pg_mblen(ptr); - wchar_t character[WC_BUF_LEN]; - pg_locale_t mylocale = 0; /* TODO */ - - if (clen == 1 || database_ctype_is_c) - return isdigit(TOUCHAR(ptr)); - - char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); - - return iswdigit((wint_t) character[0]); -} - -int -t_isspace(const char *ptr) -{ - int clen = pg_mblen(ptr); - wchar_t character[WC_BUF_LEN]; - pg_locale_t mylocale = 0; /* TODO */ - - if (clen == 1 || database_ctype_is_c) - return isspace(TOUCHAR(ptr)); - - char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); - - return iswspace((wint_t) character[0]); -} - -int -t_isalpha(const char *ptr) -{ - int clen = pg_mblen(ptr); - wchar_t character[WC_BUF_LEN]; - pg_locale_t mylocale = 0; /* TODO */ - - if (clen == 1 || database_ctype_is_c) - return isalpha(TOUCHAR(ptr)); - - char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); - - return iswalpha((wint_t) character[0]); -} - -int -t_isalnum(const char *ptr) -{ - int clen = pg_mblen(ptr); - wchar_t character[WC_BUF_LEN]; - pg_locale_t mylocale = 0; /* TODO */ - - if (clen == 1 || database_ctype_is_c) - return isalnum(TOUCHAR(ptr)); - - char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); - - return iswalnum((wint_t) character[0]); -} - -int -t_isprint(const char *ptr) -{ - int clen = pg_mblen(ptr); - wchar_t character[WC_BUF_LEN]; - pg_locale_t mylocale = 0; /* TODO */ - - if (clen == 1 || database_ctype_is_c) - return isprint(TOUCHAR(ptr)); - - char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); - - return iswprint((wint_t) character[0]); +#define GENERATE_T_ISCLASS_DEF(character_class) \ +/* mblen shall be that of the first character */ \ +int \ +t_is##character_class##_with_len(const char *ptr, int mblen) \ +{ \ + int clen = pg_mblen_with_len(ptr, mblen); \ + wchar_t character[WC_BUF_LEN]; \ + pg_locale_t mylocale = 0; /* TODO */ \ + if (clen == 1 || database_ctype_is_c) \ + return is##character_class(TOUCHAR(ptr)); \ + char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); \ + return isw##character_class((wint_t) character[0]); \ +} \ +\ +/* ptr shall point to a NUL-terminated string */ \ +int \ +t_is##character_class##_cstr(const char *ptr) \ +{ \ + return t_is##character_class##_with_len(ptr, pg_mblen_cstr(ptr)); \ +} \ +/* ptr shall point to a string with pre-validated encoding */ \ +int \ +t_is##character_class##_unbounded(const char *ptr) \ +{ \ + return t_is##character_class##_with_len(ptr, pg_mblen_unbounded(ptr)); \ +} \ +/* historical name for _unbounded */ \ +int \ +t_is##character_class(const char *ptr) \ +{ \ + return t_is##character_class##_unbounded(ptr); \ } +GENERATE_T_ISCLASS_DEF(alnum) +GENERATE_T_ISCLASS_DEF(alpha) +GENERATE_T_ISCLASS_DEF(digit) +GENERATE_T_ISCLASS_DEF(print) +GENERATE_T_ISCLASS_DEF(space) /* * Set up to read a file using tsearch_readline(). This facility is diff --git a/src/backend/tsearch/ts_selfuncs.c b/src/backend/tsearch/ts_selfuncs.c index 92afc67a5c4..511ba6be7ff 100644 --- a/src/backend/tsearch/ts_selfuncs.c +++ b/src/backend/tsearch/ts_selfuncs.c @@ -109,12 +109,14 @@ tsmatchsel(PG_FUNCTION_ARGS) * OK, there's a Var and a Const we're dealing with here. We need the * Const to be a TSQuery, else we can't do anything useful. We have to * check this because the Var might be the TSQuery not the TSVector. + * + * Also check that the Var really is a TSVector, in case this estimator is + * mistakenly attached to some other operator. */ - if (((Const *) other)->consttype == TSQUERYOID) + if (((Const *) other)->consttype == TSQUERYOID && + vardata.vartype == TSVECTOROID) { /* tsvector @@ tsquery or the other way around */ - Assert(vardata.vartype == TSVECTOROID); - selec = tsquerysel(&vardata, ((Const *) other)->constvalue); } else diff --git a/src/backend/tsearch/ts_utils.c b/src/backend/tsearch/ts_utils.c index 7c4c2a91123..463e5253558 100644 --- a/src/backend/tsearch/ts_utils.c +++ b/src/backend/tsearch/ts_utils.c @@ -88,8 +88,8 @@ readstoplist(const char *fname, StopList *s, char *(*wordop) (const char *)) char *pbuf = line; /* Trim trailing space */ - while (*pbuf && !t_isspace(pbuf)) - pbuf += pg_mblen(pbuf); + while (*pbuf && !t_isspace_cstr(pbuf)) + pbuf += pg_mblen_cstr(pbuf); *pbuf = '\0'; /* Skip empty lines */ diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c index 9130b148366..05d605ade51 100644 --- a/src/backend/tsearch/wparser_def.c +++ b/src/backend/tsearch/wparser_def.c @@ -1727,7 +1727,8 @@ TParserGet(TParser *prs) prs->state->charlen = 0; else prs->state->charlen = (prs->charmaxlen == 1) ? prs->charmaxlen : - pg_mblen(prs->str + prs->state->posbyte); + pg_mblen_range(prs->str + prs->state->posbyte, + prs->str + prs->lenstr); Assert(prs->state->posbyte + prs->state->charlen <= prs->lenstr); Assert(prs->state->state >= TPS_Base && prs->state->state < TPS_Null); diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index ff3e1af0a1d..ec28adc92c6 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -3803,6 +3803,12 @@ deconstruct_array_builtin(ArrayType *array, elmalign = TYPALIGN_SHORT; break; + case INT4OID: + elmlen = sizeof(int32); + elmbyval = true; + elmalign = TYPALIGN_INT; + break; + case OIDOID: elmlen = sizeof(Oid); elmbyval = true; diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c index e5ac3ad23df..a20fbf18c24 100644 --- a/src/backend/utils/adt/encode.c +++ b/src/backend/utils/adt/encode.c @@ -215,7 +215,7 @@ hex_decode_safe(const char *src, size_t len, char *dst, Node *escontext) ereturn(escontext, 0, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid hexadecimal digit: \"%.*s\"", - pg_mblen(s), s))); + pg_mblen_range(s, srcend), s))); s++; if (s >= srcend) ereturn(escontext, 0, @@ -225,7 +225,7 @@ hex_decode_safe(const char *src, size_t len, char *dst, Node *escontext) ereturn(escontext, 0, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid hexadecimal digit: \"%.*s\"", - pg_mblen(s), s))); + pg_mblen_range(s, srcend), s))); s++; *p++ = (v1 << 4) | v2; } @@ -354,7 +354,7 @@ pg_base64_decode(const char *src, size_t len, char *dst) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid symbol \"%.*s\" found while decoding base64 sequence", - pg_mblen(s - 1), s - 1))); + pg_mblen_range(s - 1, srcend), s - 1))); } /* add it to buffer */ buf = (buf << 6) + b; diff --git a/src/backend/utils/adt/format_type.c b/src/backend/utils/adt/format_type.c index 12402a06379..a9054d11b0c 100644 --- a/src/backend/utils/adt/format_type.c +++ b/src/backend/utils/adt/format_type.c @@ -444,11 +444,15 @@ oidvectortypes(PG_FUNCTION_ARGS) { oidvector *oidArray = (oidvector *) PG_GETARG_POINTER(0); char *result; - int numargs = oidArray->dim1; + int numargs; int num; size_t total; size_t left; + /* validate input before fetching dim1 */ + check_valid_oidvector(oidArray); + numargs = oidArray->dim1; + total = 20 * numargs + 1; result = palloc(total); result[0] = '\0'; diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index 738cdf81a7f..35e64caaaa6 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -1385,7 +1385,7 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw, ereport(ERROR, (errcode(ERRCODE_INVALID_DATETIME_FORMAT), errmsg("invalid datetime format separator: \"%s\"", - pnstrdup(str, pg_mblen(str))))); + pnstrdup(str, pg_mblen_cstr(str))))); if (*str == ' ') n->type = NODE_TYPE_SPACE; @@ -1415,7 +1415,7 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw, /* backslash quotes the next character, if any */ if (*str == '\\' && *(str + 1)) str++; - chlen = pg_mblen(str); + chlen = pg_mblen_cstr(str); n->type = NODE_TYPE_CHAR; memcpy(n->character, str, chlen); n->character[chlen] = '\0'; @@ -1433,7 +1433,7 @@ parse_format(FormatNode *node, const char *str, const KeyWord *kw, */ if (*str == '\\' && *(str + 1) == '"') str++; - chlen = pg_mblen(str); + chlen = pg_mblen_cstr(str); if ((flags & DCH_FLAG) && is_separator_char(str)) n->type = NODE_TYPE_SEPARATOR; @@ -2138,8 +2138,8 @@ asc_toupper_z(const char *buff) do { \ if (S_THth(_suf)) \ { \ - if (*(ptr)) (ptr) += pg_mblen(ptr); \ - if (*(ptr)) (ptr) += pg_mblen(ptr); \ + if (*(ptr)) (ptr) += pg_mblen_cstr(ptr); \ + if (*(ptr)) (ptr) += pg_mblen_cstr(ptr); \ } \ } while (0) @@ -3345,7 +3345,7 @@ DCH_from_char(FormatNode *node, const char *in, TmFromChar *out, * insist that the consumed character match the format's * character. */ - s += pg_mblen(s); + s += pg_mblen_cstr(s); } continue; } @@ -3367,11 +3367,11 @@ DCH_from_char(FormatNode *node, const char *in, TmFromChar *out, if (extra_skip > 0) extra_skip--; else - s += pg_mblen(s); + s += pg_mblen_cstr(s); } else { - int chlen = pg_mblen(s); + int chlen = pg_mblen_cstr(s); /* * Standard mode requires strict match of format characters. @@ -5615,13 +5615,15 @@ NUM_numpart_to_char(NUMProc *Np, int id) static void NUM_eat_non_data_chars(NUMProc *Np, int n, int input_len) { + const char *end = Np->inout + input_len; + while (n-- > 0) { if (OVERLOAD_TEST) break; /* end of input */ if (strchr("0123456789.,+-", *Np->inout_p) != NULL) break; /* it's a data character */ - Np->inout_p += pg_mblen(Np->inout_p); + Np->inout_p += pg_mblen_range(Np->inout_p, end); } } @@ -6074,7 +6076,7 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, } else { - Np->inout_p += pg_mblen(Np->inout_p); + Np->inout_p += pg_mblen_range(Np->inout_p, Np->inout + input_len); } continue; } diff --git a/src/backend/utils/adt/int.c b/src/backend/utils/adt/int.c index 44d1c7ad0c4..f9a08257ac3 100644 --- a/src/backend/utils/adt/int.c +++ b/src/backend/utils/adt/int.c @@ -134,6 +134,30 @@ buildint2vector(const int16 *int2s, int n) return result; } +/* + * validate that an array object meets the restrictions of int2vector + * + * We need this because there are pathways by which a general int2[] array can + * be cast to int2vector, allowing the type's restrictions to be violated. + * All code that receives an int2vector as a SQL parameter should check this. + */ +static void +check_valid_int2vector(const int2vector *int2Array) +{ + /* + * We insist on ndim == 1 and dataoffset == 0 (that is, no nulls) because + * otherwise the array's layout will not be what calling code expects. We + * needn't be picky about the index lower bound though. Checking elemtype + * is just paranoia. + */ + if (int2Array->ndim != 1 || + int2Array->dataoffset != 0 || + int2Array->elemtype != INT2OID) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("array is not a valid int2vector"))); +} + /* * int2vectorin - converts "num num ..." to internal form */ @@ -208,10 +232,14 @@ int2vectorout(PG_FUNCTION_ARGS) { int2vector *int2Array = (int2vector *) PG_GETARG_POINTER(0); int num, - nnums = int2Array->dim1; + nnums; char *rp; char *result; + /* validate input before fetching dim1 */ + check_valid_int2vector(int2Array); + nnums = int2Array->dim1; + /* assumes sign, 5 digits, ' ' */ rp = result = (char *) palloc(nnums * 7 + 1); for (num = 0; num < nnums; num++) @@ -272,6 +300,7 @@ int2vectorrecv(PG_FUNCTION_ARGS) Datum int2vectorsend(PG_FUNCTION_ARGS) { + /* We don't do check_valid_int2vector, since array_send won't care */ return array_send(fcinfo); } diff --git a/src/backend/utils/adt/jsonfuncs.c b/src/backend/utils/adt/jsonfuncs.c index 70cb922e6b7..42b886c621a 100644 --- a/src/backend/utils/adt/jsonfuncs.c +++ b/src/backend/utils/adt/jsonfuncs.c @@ -682,7 +682,7 @@ report_json_context(JsonLexContext *lex) { /* Advance to next multibyte character */ if (IS_HIGHBIT_SET(*context_start)) - context_start += pg_mblen(context_start); + context_start += pg_mblen_range(context_start, context_end); else context_start++; } diff --git a/src/backend/utils/adt/jsonpath_gram.y b/src/backend/utils/adt/jsonpath_gram.y index adc259d5bf8..c1880c113b8 100644 --- a/src/backend/utils/adt/jsonpath_gram.y +++ b/src/backend/utils/adt/jsonpath_gram.y @@ -527,7 +527,8 @@ makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("invalid input syntax for type %s", "jsonpath"), errdetail("Unrecognized flag character \"%.*s\" in LIKE_REGEX predicate.", - pg_mblen(flags->val + i), flags->val + i))); + pg_mblen_range(flags->val + i, flags->val + flags->len), + flags->val + i))); break; } } diff --git a/src/backend/utils/adt/levenshtein.c b/src/backend/utils/adt/levenshtein.c index 0763daf0e83..9a84c8d0fc4 100644 --- a/src/backend/utils/adt/levenshtein.c +++ b/src/backend/utils/adt/levenshtein.c @@ -83,6 +83,8 @@ varstr_levenshtein(const char *source, int slen, int *s_char_len = NULL; int j; const char *y; + const char *send = source + slen; + const char *tend = target + tlen; /* * For varstr_levenshtein_less_equal, we have real variables called @@ -183,10 +185,10 @@ varstr_levenshtein(const char *source, int slen, #endif /* - * In order to avoid calling pg_mblen() repeatedly on each character in s, - * we cache all the lengths before starting the main loop -- but if all - * the characters in both strings are single byte, then we skip this and - * use a fast-path in the main loop. If only one string contains + * In order to avoid calling pg_mblen_range() repeatedly on each character + * in s, we cache all the lengths before starting the main loop -- but if + * all the characters in both strings are single byte, then we skip this + * and use a fast-path in the main loop. If only one string contains * multi-byte characters, we still build the array, so that the fast-path * needn't deal with the case where the array hasn't been initialized. */ @@ -198,7 +200,7 @@ varstr_levenshtein(const char *source, int slen, s_char_len = (int *) palloc((m + 1) * sizeof(int)); for (i = 0; i < m; ++i) { - s_char_len[i] = pg_mblen(cp); + s_char_len[i] = pg_mblen_range(cp, send); cp += s_char_len[i]; } s_char_len[i] = 0; @@ -224,7 +226,7 @@ varstr_levenshtein(const char *source, int slen, { int *temp; const char *x = source; - int y_char_len = n != tlen + 1 ? pg_mblen(y) : 1; + int y_char_len = n != tlen + 1 ? pg_mblen_range(y, tend) : 1; int i; #ifdef LEVENSHTEIN_LESS_EQUAL diff --git a/src/backend/utils/adt/like.c b/src/backend/utils/adt/like.c index 33a2f46aab0..776112c695f 100644 --- a/src/backend/utils/adt/like.c +++ b/src/backend/utils/adt/like.c @@ -55,20 +55,20 @@ static int Generic_Text_IC_like(text *str, text *pat, Oid collation); *-------------------- */ static inline int -wchareq(const char *p1, const char *p2) +wchareq(const char *p1, int p1len, const char *p2, int p2len) { - int p1_len; + int p1clen; /* Optimization: quickly compare the first byte. */ if (*p1 != *p2) return 0; - p1_len = pg_mblen(p1); - if (pg_mblen(p2) != p1_len) + p1clen = pg_mblen_with_len(p1, p1len); + if (pg_mblen_with_len(p2, p2len) != p1clen) return 0; /* They are the same length */ - while (p1_len--) + while (p1clen--) { if (*p1++ != *p2++) return 0; @@ -107,11 +107,11 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c) #define NextByte(p, plen) ((p)++, (plen)--) /* Set up to compile like_match.c for multibyte characters */ -#define CHAREQ(p1, p2) wchareq((p1), (p2)) +#define CHAREQ(p1, p1len, p2, p2len) wchareq((p1), (p1len), (p2), (p2len)) #define NextChar(p, plen) \ - do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0) + do { int __l = pg_mblen_with_len((p), (plen)); (p) +=__l; (plen) -=__l; } while (0) #define CopyAdvChar(dst, src, srclen) \ - do { int __l = pg_mblen(src); \ + do { int __l = pg_mblen_with_len((src), (srclen)); \ (srclen) -= __l; \ while (__l-- > 0) \ *(dst)++ = *(src)++; \ @@ -123,7 +123,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c) #include "like_match.c" /* Set up to compile like_match.c for single-byte characters */ -#define CHAREQ(p1, p2) (*(p1) == *(p2)) +#define CHAREQ(p1, p1len, p2, p2len) (*(p1) == *(p2)) #define NextChar(p, plen) NextByte((p), (plen)) #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--) diff --git a/src/backend/utils/adt/like_match.c b/src/backend/utils/adt/like_match.c index 8b2dff6d6e2..e586de9efd1 100644 --- a/src/backend/utils/adt/like_match.c +++ b/src/backend/utils/adt/like_match.c @@ -294,6 +294,7 @@ do_like_escape(text *pat, text *esc) errhint("Escape string must be empty or one character."))); e = VARDATA_ANY(esc); + elen = VARSIZE_ANY_EXHDR(esc); /* * If specified escape is '\', just copy the pattern as-is. @@ -312,7 +313,7 @@ do_like_escape(text *pat, text *esc) afterescape = false; while (plen > 0) { - if (CHAREQ(p, e) && !afterescape) + if (CHAREQ(p, plen, e, elen) && !afterescape) { *r++ = '\\'; NextChar(p, plen); diff --git a/src/backend/utils/adt/network_selfuncs.c b/src/backend/utils/adt/network_selfuncs.c index a8c0f954dfa..192df1c569d 100644 --- a/src/backend/utils/adt/network_selfuncs.c +++ b/src/backend/utils/adt/network_selfuncs.c @@ -43,9 +43,9 @@ /* Maximum number of items to consider in join selectivity calculations */ #define MAX_CONSIDERED_ELEMS 1024 -static Selectivity networkjoinsel_inner(Oid operator, +static Selectivity networkjoinsel_inner(Oid operator, int opr_codenum, VariableStatData *vardata1, VariableStatData *vardata2); -static Selectivity networkjoinsel_semi(Oid operator, +static Selectivity networkjoinsel_semi(Oid operator, int opr_codenum, VariableStatData *vardata1, VariableStatData *vardata2); static Selectivity mcv_population(float4 *mcv_numbers, int mcv_nvalues); static Selectivity inet_hist_value_sel(Datum *values, int nvalues, @@ -82,6 +82,7 @@ networksel(PG_FUNCTION_ARGS) Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); int varRelid = PG_GETARG_INT32(3); + int opr_codenum; VariableStatData vardata; Node *other; bool varonleft; @@ -95,6 +96,14 @@ networksel(PG_FUNCTION_ARGS) nullfrac; FmgrInfo proc; + /* + * Before all else, verify that the operator is one of the ones supported + * by this function, which in turn proves that the input datatypes are + * what we expect. Otherwise, attaching this selectivity function to some + * unexpected operator could cause trouble. + */ + opr_codenum = inet_opr_codenum(operator); + /* * If expression is not (variable op something) or (something op * variable), then punt and return a default estimate. @@ -150,13 +159,12 @@ networksel(PG_FUNCTION_ARGS) STATISTIC_KIND_HISTOGRAM, InvalidOid, ATTSTATSSLOT_VALUES)) { - int opr_codenum = inet_opr_codenum(operator); + int h_codenum; /* Commute if needed, so we can consider histogram to be on the left */ - if (!varonleft) - opr_codenum = -opr_codenum; + h_codenum = varonleft ? opr_codenum : -opr_codenum; non_mcv_selec = inet_hist_value_sel(hslot.values, hslot.nvalues, - constvalue, opr_codenum); + constvalue, h_codenum); free_attstatsslot(&hslot); } @@ -203,10 +211,19 @@ networkjoinsel(PG_FUNCTION_ARGS) #endif SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4); double selec; + int opr_codenum; VariableStatData vardata1; VariableStatData vardata2; bool join_is_reversed; + /* + * Before all else, verify that the operator is one of the ones supported + * by this function, which in turn proves that the input datatypes are + * what we expect. Otherwise, attaching this selectivity function to some + * unexpected operator could cause trouble. + */ + opr_codenum = inet_opr_codenum(operator); + get_join_variables(root, args, sjinfo, &vardata1, &vardata2, &join_is_reversed); @@ -220,16 +237,19 @@ networkjoinsel(PG_FUNCTION_ARGS) * Selectivity for left/full join is not exactly the same as inner * join, but we neglect the difference, as eqjoinsel does. */ - selec = networkjoinsel_inner(operator, &vardata1, &vardata2); + selec = networkjoinsel_inner(operator, opr_codenum, + &vardata1, &vardata2); break; case JOIN_SEMI: case JOIN_ANTI: case JOIN_LASJ_NOTIN: /* Here, it's important that we pass the outer var on the left. */ if (!join_is_reversed) - selec = networkjoinsel_semi(operator, &vardata1, &vardata2); + selec = networkjoinsel_semi(operator, opr_codenum, + &vardata1, &vardata2); else selec = networkjoinsel_semi(get_commutator(operator), + -opr_codenum, &vardata2, &vardata1); break; default: @@ -261,7 +281,7 @@ networkjoinsel(PG_FUNCTION_ARGS) * Also, MCV vs histogram selectivity is not neglected as in eqjoinsel_inner(). */ static Selectivity -networkjoinsel_inner(Oid operator, +networkjoinsel_inner(Oid operator, int opr_codenum, VariableStatData *vardata1, VariableStatData *vardata2) { Form_pg_statistic stats; @@ -274,7 +294,6 @@ networkjoinsel_inner(Oid operator, mcv2_exists = false, hist1_exists = false, hist2_exists = false; - int opr_codenum; int mcv1_length = 0, mcv2_length = 0; AttStatsSlot mcv1_slot; @@ -326,8 +345,6 @@ networkjoinsel_inner(Oid operator, memset(&hist2_slot, 0, sizeof(hist2_slot)); } - opr_codenum = inet_opr_codenum(operator); - /* * Calculate selectivity for MCV vs MCV matches. */ @@ -388,7 +405,7 @@ networkjoinsel_inner(Oid operator, * histogram selectivity for semi/anti join cases. */ static Selectivity -networkjoinsel_semi(Oid operator, +networkjoinsel_semi(Oid operator, int opr_codenum, VariableStatData *vardata1, VariableStatData *vardata2) { Form_pg_statistic stats; @@ -402,7 +419,6 @@ networkjoinsel_semi(Oid operator, mcv2_exists = false, hist1_exists = false, hist2_exists = false; - int opr_codenum; FmgrInfo proc; int i, mcv1_length = 0, @@ -456,7 +472,6 @@ networkjoinsel_semi(Oid operator, memset(&hist2_slot, 0, sizeof(hist2_slot)); } - opr_codenum = inet_opr_codenum(operator); fmgr_info(get_opcode(operator), &proc); /* Estimate number of input rows represented by RHS histogram. */ @@ -828,6 +843,9 @@ inet_semi_join_sel(Datum lhs_value, /* * Assign useful code numbers for the subnet inclusion/overlap operators * + * This will throw an error if the operator is not one of the ones we + * support in networksel() and networkjoinsel(). + * * Only inet_masklen_inclusion_cmp() and inet_hist_match_divider() depend * on the exact codes assigned here; but many other places in this file * know that they can negate a code to obtain the code for the commutator diff --git a/src/backend/utils/adt/oid.c b/src/backend/utils/adt/oid.c index 3f7af5b3a06..066511443cf 100644 --- a/src/backend/utils/adt/oid.c +++ b/src/backend/utils/adt/oid.c @@ -106,6 +106,30 @@ buildoidvector(const Oid *oids, int n) return result; } +/* + * validate that an array object meets the restrictions of oidvector + * + * We need this because there are pathways by which a general oid[] array can + * be cast to oidvector, allowing the type's restrictions to be violated. + * All code that receives an oidvector as a SQL parameter should check this. + */ +void +check_valid_oidvector(const oidvector *oidArray) +{ + /* + * We insist on ndim == 1 and dataoffset == 0 (that is, no nulls) because + * otherwise the array's layout will not be what calling code expects. We + * needn't be picky about the index lower bound though. Checking elemtype + * is just paranoia. + */ + if (oidArray->ndim != 1 || + oidArray->dataoffset != 0 || + oidArray->elemtype != OIDOID) + ereport(ERROR, + (errcode(ERRCODE_DATATYPE_MISMATCH), + errmsg("array is not a valid oidvector"))); +} + /* * oidvectorin - converts "num num ..." to internal form */ @@ -158,10 +182,14 @@ oidvectorout(PG_FUNCTION_ARGS) { oidvector *oidArray = (oidvector *) PG_GETARG_POINTER(0); int num, - nnums = oidArray->dim1; + nnums; char *rp; char *result; + /* validate input before fetching dim1 */ + check_valid_oidvector(oidArray); + nnums = oidArray->dim1; + /* assumes sign, 10 digits, ' ' */ rp = result = (char *) palloc(nnums * 12 + 1); for (num = 0; num < nnums; num++) @@ -224,6 +252,7 @@ oidvectorrecv(PG_FUNCTION_ARGS) Datum oidvectorsend(PG_FUNCTION_ARGS) { + /* We don't do check_valid_oidvector, since array_send won't care */ return array_send(fcinfo); } diff --git a/src/backend/utils/adt/oracle_compat.c b/src/backend/utils/adt/oracle_compat.c index 3b5b794afb3..8d025011e2f 100644 --- a/src/backend/utils/adt/oracle_compat.c +++ b/src/backend/utils/adt/oracle_compat.c @@ -153,8 +153,8 @@ lpad(PG_FUNCTION_ARGS) char *ptr1, *ptr2, *ptr2start, - *ptr2end, *ptr_ret; + const char *ptr2end; int m, s1len, s2len; @@ -199,7 +199,7 @@ lpad(PG_FUNCTION_ARGS) while (m--) { - int mlen = pg_mblen(ptr2); + int mlen = pg_mblen_range(ptr2, ptr2end); memcpy(ptr_ret, ptr2, mlen); ptr_ret += mlen; @@ -212,7 +212,7 @@ lpad(PG_FUNCTION_ARGS) while (s1len--) { - int mlen = pg_mblen(ptr1); + int mlen = pg_mblen_unbounded(ptr1); memcpy(ptr_ret, ptr1, mlen); ptr_ret += mlen; @@ -251,8 +251,8 @@ rpad(PG_FUNCTION_ARGS) char *ptr1, *ptr2, *ptr2start, - *ptr2end, *ptr_ret; + const char *ptr2end; int m, s1len, s2len; @@ -292,11 +292,12 @@ rpad(PG_FUNCTION_ARGS) m = len - s1len; ptr1 = VARDATA_ANY(string1); + ptr_ret = VARDATA(ret); while (s1len--) { - int mlen = pg_mblen(ptr1); + int mlen = pg_mblen_unbounded(ptr1); memcpy(ptr_ret, ptr1, mlen); ptr_ret += mlen; @@ -308,7 +309,7 @@ rpad(PG_FUNCTION_ARGS) while (m--) { - int mlen = pg_mblen(ptr2); + int mlen = pg_mblen_range(ptr2, ptr2end); memcpy(ptr_ret, ptr2, mlen); ptr_ret += mlen; @@ -393,6 +394,7 @@ dotrim(const char *string, int stringlen, */ const char **stringchars; const char **setchars; + const char *setend; int *stringmblen; int *setmblen; int stringnchars; @@ -400,6 +402,7 @@ dotrim(const char *string, int stringlen, int resultndx; int resultnchars; const char *p; + const char *pend; int len; int mblen; const char *str_pos; @@ -410,10 +413,11 @@ dotrim(const char *string, int stringlen, stringnchars = 0; p = string; len = stringlen; + pend = p + len; while (len > 0) { stringchars[stringnchars] = p; - stringmblen[stringnchars] = mblen = pg_mblen(p); + stringmblen[stringnchars] = mblen = pg_mblen_range(p, pend); stringnchars++; p += mblen; len -= mblen; @@ -424,10 +428,11 @@ dotrim(const char *string, int stringlen, setnchars = 0; p = set; len = setlen; + setend = set + setlen; while (len > 0) { setchars[setnchars] = p; - setmblen[setnchars] = mblen = pg_mblen(p); + setmblen[setnchars] = mblen = pg_mblen_range(p, setend); setnchars++; p += mblen; len -= mblen; @@ -805,6 +810,8 @@ translate(PG_FUNCTION_ARGS) *to_end; char *source, *target; + const char *source_end; + const char *from_end; int m, fromlen, tolen, @@ -819,9 +826,11 @@ translate(PG_FUNCTION_ARGS) if (m <= 0) PG_RETURN_TEXT_P(string); source = VARDATA_ANY(string); + source_end = source + m; fromlen = VARSIZE_ANY_EXHDR(from); from_ptr = VARDATA_ANY(from); + from_end = from_ptr + fromlen; tolen = VARSIZE_ANY_EXHDR(to); to_ptr = VARDATA_ANY(to); to_end = to_ptr + tolen; @@ -845,12 +854,12 @@ translate(PG_FUNCTION_ARGS) while (m > 0) { - source_len = pg_mblen(source); + source_len = pg_mblen_range(source, source_end); from_index = 0; for (i = 0; i < fromlen; i += len) { - len = pg_mblen(&from_ptr[i]); + len = pg_mblen_range(&from_ptr[i], from_end); if (len == source_len && memcmp(source, &from_ptr[i], len) == 0) break; @@ -866,11 +875,11 @@ translate(PG_FUNCTION_ARGS) { if (p >= to_end) break; - p += pg_mblen(p); + p += pg_mblen_range(p, to_end); } if (p < to_end) { - len = pg_mblen(p); + len = pg_mblen_range(p, to_end); memcpy(target, p, len); target += len; retlen += len; diff --git a/src/backend/utils/adt/regexp.c b/src/backend/utils/adt/regexp.c index 702cd52b6d4..d43a0577ee4 100644 --- a/src/backend/utils/adt/regexp.c +++ b/src/backend/utils/adt/regexp.c @@ -444,7 +444,7 @@ parse_re_flags(pg_re_flags *flags, text *opts) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid regular expression option: \"%.*s\"", - pg_mblen(opt_p + i), opt_p + i))); + pg_mblen_range(opt_p + i, opt_p + opt_len), opt_p + i))); break; } } @@ -674,12 +674,13 @@ textregexreplace(PG_FUNCTION_ARGS) if (VARSIZE_ANY_EXHDR(opt) > 0) { char *opt_p = VARDATA_ANY(opt); + const char *end_p = opt_p + VARSIZE_ANY_EXHDR(opt); if (*opt_p >= '0' && *opt_p <= '9') ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid regular expression option: \"%.*s\"", - pg_mblen(opt_p), opt_p), + pg_mblen_range(opt_p, end_p), opt_p), errhint("If you meant to use regexp_replace() with a start parameter, cast the fourth argument to integer explicitly."))); } @@ -773,12 +774,14 @@ similar_escape_internal(text *pat_text, text *esc_text) *r; int plen, elen; + const char *pend; bool afterescape = false; bool incharclass = false; int nquotes = 0; p = VARDATA_ANY(pat_text); plen = VARSIZE_ANY_EXHDR(pat_text); + pend = p + plen; if (esc_text == NULL) { /* No ESCAPE clause provided; default to backslash as escape */ @@ -867,7 +870,7 @@ similar_escape_internal(text *pat_text, text *esc_text) if (elen > 1) { - int mblen = pg_mblen(p); + int mblen = pg_mblen_range(p, pend); if (mblen > 1) { diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c index 001f2cc0299..244187e474b 100644 --- a/src/backend/utils/adt/tsquery.c +++ b/src/backend/utils/adt/tsquery.c @@ -120,7 +120,7 @@ get_modifiers(char *buf, int16 *weight, bool *prefix) return buf; buf++; - while (*buf && pg_mblen(buf) == 1) + while (*buf && pg_mblen_cstr(buf) == 1) { switch (*buf) { @@ -197,7 +197,7 @@ parse_phrase_operator(TSQueryParserState pstate, int16 *distance) continue; } - if (!t_isdigit(ptr)) + if (!t_isdigit_cstr(ptr)) return false; errno = 0; @@ -259,12 +259,12 @@ parse_or_operator(TSQueryParserState pstate) return false; /* it shouldn't be a part of any word */ - if (t_iseq(ptr, '-') || t_iseq(ptr, '_') || t_isalnum(ptr)) + if (t_iseq(ptr, '-') || t_iseq(ptr, '_') || t_isalnum_cstr(ptr)) return false; for (;;) { - ptr += pg_mblen(ptr); + ptr += pg_mblen_cstr(ptr); if (*ptr == '\0') /* got end of string without operand */ return false; @@ -274,7 +274,7 @@ parse_or_operator(TSQueryParserState pstate) * So we still treat OR literal as operation with possibly incorrect * operand and will not search it as lexeme */ - if (!t_isspace(ptr)) + if (!t_isspace_cstr(ptr)) break; } @@ -315,7 +315,7 @@ gettoken_query_standard(TSQueryParserState state, int8 *operator, /* generic syntax error message is fine */ return PT_ERR; } - else if (!t_isspace(state->buf)) + else if (!t_isspace_cstr(state->buf)) { /* * We rely on the tsvector parser to parse the value for @@ -383,14 +383,14 @@ gettoken_query_standard(TSQueryParserState state, int8 *operator, { return (state->count) ? PT_ERR : PT_END; } - else if (!t_isspace(state->buf)) + else if (!t_isspace_cstr(state->buf)) { return PT_ERR; } break; } - state->buf += pg_mblen(state->buf); + state->buf += pg_mblen_cstr(state->buf); } } @@ -444,7 +444,7 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator, state->state = WAITOPERAND; continue; } - else if (!t_isspace(state->buf)) + else if (!t_isspace_cstr(state->buf)) { /* * We rely on the tsvector parser to parse the value for @@ -492,7 +492,7 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator, state->buf++; continue; } - else if (!t_isspace(state->buf)) + else if (!t_isspace_cstr(state->buf)) { /* insert implicit AND between operands */ state->state = WAITOPERAND; @@ -502,7 +502,7 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator, break; } - state->buf += pg_mblen(state->buf); + state->buf += pg_mblen_cstr(state->buf); } } @@ -1014,9 +1014,8 @@ infix(INFIX *in, int parentPriority, bool rightPhraseOp) *(in->cur) = '\\'; in->cur++; } - COPYCHAR(in->cur, op); - clen = pg_mblen(op); + clen = ts_copychar_cstr(in->cur, op); op += clen; in->cur += clen; } diff --git a/src/backend/utils/adt/tsvector.c b/src/backend/utils/adt/tsvector.c index 85c492d122a..39e16f8a7cd 100644 --- a/src/backend/utils/adt/tsvector.c +++ b/src/backend/utils/adt/tsvector.c @@ -320,9 +320,9 @@ tsvectorout(PG_FUNCTION_ARGS) lenbuf = 0, pp; WordEntry *ptr = ARRPTR(out); - char *curbegin, - *curin, + char *curin, *curout; + const char *curend; lenbuf = out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /* \0 */ ; for (i = 0; i < out->size; i++) @@ -335,13 +335,14 @@ tsvectorout(PG_FUNCTION_ARGS) curout = outbuf = (char *) palloc(lenbuf); for (i = 0; i < out->size; i++) { - curbegin = curin = STRPTR(out) + ptr->pos; + curin = STRPTR(out) + ptr->pos; + curend = curin + ptr->len; if (i != 0) *curout++ = ' '; *curout++ = '\''; - while (curin - curbegin < ptr->len) + while (curin < curend) { - int len = pg_mblen(curin); + int len = pg_mblen_range(curin, curend); if (t_iseq(curin, '\'')) *curout++ = '\''; diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c index f511a28bb04..ae90e750604 100644 --- a/src/backend/utils/adt/tsvector_op.c +++ b/src/backend/utils/adt/tsvector_op.c @@ -2615,11 +2615,15 @@ ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws) if (ws) { char *buf; + const char *end; buf = VARDATA_ANY(ws); - while (buf - VARDATA_ANY(ws) < VARSIZE_ANY_EXHDR(ws)) + end = buf + VARSIZE_ANY_EXHDR(ws); + while (buf < end) { - if (pg_mblen(buf) == 1) + int len = pg_mblen_range(buf, end); + + if (len == 1) { switch (*buf) { @@ -2643,7 +2647,7 @@ ts_stat_sql(MemoryContext persistentContext, text *txt, text *ws) stat->weight |= 0; } } - buf += pg_mblen(buf); + buf += len; } } diff --git a/src/backend/utils/adt/tsvector_parser.c b/src/backend/utils/adt/tsvector_parser.c index 13e075831fe..e4b91f8d3c4 100644 --- a/src/backend/utils/adt/tsvector_parser.c +++ b/src/backend/utils/adt/tsvector_parser.c @@ -206,10 +206,9 @@ gettoken_tsvector(TSVectorParseState state, else if ((state->oprisdelim && ISOPERATOR(state->prsbuf)) || (state->is_web && t_iseq(state->prsbuf, '"'))) PRSSYNTAXERROR; - else if (!t_isspace(state->prsbuf)) + else if (!t_isspace_cstr(state->prsbuf)) { - COPYCHAR(curpos, state->prsbuf); - curpos += pg_mblen(state->prsbuf); + curpos += ts_copychar_cstr(curpos, state->prsbuf); statecode = WAITENDWORD; } } @@ -223,8 +222,7 @@ gettoken_tsvector(TSVectorParseState state, else { RESIZEPRSBUF; - COPYCHAR(curpos, state->prsbuf); - curpos += pg_mblen(state->prsbuf); + curpos += ts_copychar_cstr(curpos, state->prsbuf); Assert(oldstate != 0); statecode = oldstate; } @@ -236,7 +234,7 @@ gettoken_tsvector(TSVectorParseState state, statecode = WAITNEXTCHAR; oldstate = WAITENDWORD; } - else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' || + else if (t_isspace_cstr(state->prsbuf) || *(state->prsbuf) == '\0' || (state->oprisdelim && ISOPERATOR(state->prsbuf)) || (state->is_web && t_iseq(state->prsbuf, '"'))) { @@ -259,8 +257,7 @@ gettoken_tsvector(TSVectorParseState state, else { RESIZEPRSBUF; - COPYCHAR(curpos, state->prsbuf); - curpos += pg_mblen(state->prsbuf); + curpos += ts_copychar_cstr(curpos, state->prsbuf); } } else if (statecode == WAITENDCMPLX) @@ -279,8 +276,7 @@ gettoken_tsvector(TSVectorParseState state, else { RESIZEPRSBUF; - COPYCHAR(curpos, state->prsbuf); - curpos += pg_mblen(state->prsbuf); + curpos += ts_copychar_cstr(curpos, state->prsbuf); } } else if (statecode == WAITCHARCMPLX) @@ -288,8 +284,7 @@ gettoken_tsvector(TSVectorParseState state, if (!state->is_web && t_iseq(state->prsbuf, '\'')) { RESIZEPRSBUF; - COPYCHAR(curpos, state->prsbuf); - curpos += pg_mblen(state->prsbuf); + curpos += ts_copychar_cstr(curpos, state->prsbuf); statecode = WAITENDCMPLX; } else @@ -300,7 +295,7 @@ gettoken_tsvector(TSVectorParseState state, PRSSYNTAXERROR; if (state->oprisdelim) { - /* state->prsbuf+=pg_mblen(state->prsbuf); */ + /* state->prsbuf+=pg_mblen_cstr(state->prsbuf); */ RETURN_TOKEN; } else @@ -317,7 +312,7 @@ gettoken_tsvector(TSVectorParseState state, } else if (statecode == INPOSINFO) { - if (t_isdigit(state->prsbuf)) + if (t_isdigit_cstr(state->prsbuf)) { if (posalen == 0) { @@ -372,10 +367,10 @@ gettoken_tsvector(TSVectorParseState state, PRSSYNTAXERROR; WEP_SETWEIGHT(pos[npos - 1], 0); } - else if (t_isspace(state->prsbuf) || + else if (t_isspace_cstr(state->prsbuf) || *(state->prsbuf) == '\0') RETURN_TOKEN; - else if (!t_isdigit(state->prsbuf)) + else if (!t_isdigit_cstr(state->prsbuf)) PRSSYNTAXERROR; } else /* internal error */ @@ -383,6 +378,6 @@ gettoken_tsvector(TSVectorParseState state, statecode); /* get next char */ - state->prsbuf += pg_mblen(state->prsbuf); + state->prsbuf += pg_mblen_cstr(state->prsbuf); } } diff --git a/src/backend/utils/adt/varbit.c b/src/backend/utils/adt/varbit.c index 7e1457cb9ef..c53356bbb46 100644 --- a/src/backend/utils/adt/varbit.c +++ b/src/backend/utils/adt/varbit.c @@ -233,7 +233,7 @@ bit_in(PG_FUNCTION_ARGS) ereturn(escontext, (Datum) 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("\"%.*s\" is not a valid binary digit", - pg_mblen(sp), sp))); + pg_mblen_cstr(sp), sp))); x >>= 1; if (x == 0) @@ -258,7 +258,7 @@ bit_in(PG_FUNCTION_ARGS) ereturn(escontext, (Datum) 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("\"%.*s\" is not a valid hexadecimal digit", - pg_mblen(sp), sp))); + pg_mblen_cstr(sp), sp))); if (bc) { @@ -534,7 +534,7 @@ varbit_in(PG_FUNCTION_ARGS) ereturn(escontext, (Datum) 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("\"%.*s\" is not a valid binary digit", - pg_mblen(sp), sp))); + pg_mblen_cstr(sp), sp))); x >>= 1; if (x == 0) @@ -559,7 +559,7 @@ varbit_in(PG_FUNCTION_ARGS) ereturn(escontext, (Datum) 0, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("\"%.*s\" is not a valid hexadecimal digit", - pg_mblen(sp), sp))); + pg_mblen_cstr(sp), sp))); if (bc) { diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index b5f0018e8f3..1f8fcd1f406 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -797,8 +797,11 @@ text_catenate(text *t1, text *t2) * charlen_to_bytelen() * Compute the number of bytes occupied by n characters starting at *p * - * It is caller's responsibility that there actually are n characters; - * the string need not be null-terminated. + * The caller shall ensure there are n complete characters. Callers achieve + * this by deriving "n" from regmatch_t findings from searching a wchar array. + * pg_mb2wchar_with_len() skips any trailing incomplete character, so regex + * matches will end no later than the last complete character. (The string + * need not be null-terminated.) */ static int charlen_to_bytelen(const char *p, int n) @@ -813,7 +816,7 @@ charlen_to_bytelen(const char *p, int n) const char *s; for (s = p; n > 0; n--) - s += pg_mblen(s); + s += pg_mblen_unbounded(s); /* caller verified encoding */ return s - p; } @@ -946,6 +949,7 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified) int32 slice_start; int32 slice_size; int32 slice_strlen; + int32 slice_len; text *slice; int32 E1; int32 i; @@ -1015,7 +1019,8 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified) slice = (text *) DatumGetPointer(str); /* see if we got back an empty string */ - if (VARSIZE_ANY_EXHDR(slice) == 0) + slice_len = VARSIZE_ANY_EXHDR(slice); + if (slice_len == 0) { if (slice != (text *) DatumGetPointer(str)) pfree(slice); @@ -1024,7 +1029,7 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified) /* Now we can get the actual length of the slice in MB characters */ slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice), - VARSIZE_ANY_EXHDR(slice)); + slice_len); /* * Check that the start position wasn't > slice_strlen. If so, SQL99 @@ -1051,7 +1056,7 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified) */ p = VARDATA_ANY(slice); for (i = 0; i < S1 - 1; i++) - p += pg_mblen(p); + p += pg_mblen_unbounded(p); /* hang onto a pointer to our start position */ s = p; @@ -1061,7 +1066,7 @@ text_substring(Datum str, int32 start, int32 length, bool length_not_specified) * length. */ for (i = S1; i < E1; i++) - p += pg_mblen(p); + p += pg_mblen_unbounded(p); ret = (text *) palloc(VARHDRSZ + (p - s)); SET_VARSIZE(ret, VARHDRSZ + (p - s)); @@ -1359,6 +1364,8 @@ text_position_next(TextPositionState *state) */ if (state->is_multibyte_char_in_char) { + const char *haystack_end = state->str1 + state->len1; + /* Walk one character at a time, until we reach the match. */ /* the search should never move backwards. */ @@ -1367,7 +1374,7 @@ text_position_next(TextPositionState *state) while (state->refpoint < matchptr) { /* step to next character. */ - state->refpoint += pg_mblen(state->refpoint); + state->refpoint += pg_mblen_range(state->refpoint, haystack_end); state->refpos++; /* @@ -4682,6 +4689,8 @@ split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate) } else { + const char *end_ptr; + /* * When fldsep is NULL, each character in the input string becomes a * separate element in the result set. The separator is effectively @@ -4690,10 +4699,11 @@ split_text(FunctionCallInfo fcinfo, SplitTextOutputData *tstate) inputstring_len = VARSIZE_ANY_EXHDR(inputstring); start_ptr = VARDATA_ANY(inputstring); + end_ptr = start_ptr + inputstring_len; while (inputstring_len > 0) { - int chunk_len = pg_mblen(start_ptr); + int chunk_len = pg_mblen_range(start_ptr, end_ptr); CHECK_FOR_INTERRUPTS(); @@ -5524,7 +5534,7 @@ text_reverse(PG_FUNCTION_ARGS) { int sz; - sz = pg_mblen(p); + sz = pg_mblen_range(p, endp); dst -= sz; memcpy(dst, p, sz); p += sz; @@ -5685,7 +5695,7 @@ text_format(PG_FUNCTION_ARGS) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("unrecognized format() type specifier \"%.*s\"", - pg_mblen(cp), cp), + pg_mblen_range(cp, end_ptr), cp), errhint("For a single \"%%\" use \"%%%%\"."))); /* If indirect width was specified, get its value */ @@ -5806,7 +5816,7 @@ text_format(PG_FUNCTION_ARGS) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("unrecognized format() type specifier \"%.*s\"", - pg_mblen(cp), cp), + pg_mblen_range(cp, end_ptr), cp), errhint("For a single \"%%\" use \"%%%%\"."))); break; } diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c index 1537adfb7bf..45132fcc0fa 100644 --- a/src/backend/utils/adt/xml.c +++ b/src/backend/utils/adt/xml.c @@ -2329,8 +2329,7 @@ sqlchar_to_unicode(const char *s) char *utf8string; pg_wchar ret[2]; /* need space for trailing zero */ - /* note we're not assuming s is null-terminated */ - utf8string = pg_server_to_any(s, pg_mblen(s), PG_UTF8); + utf8string = pg_server_to_any(s, pg_mblen_cstr(s), PG_UTF8); pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret, pg_encoding_mblen(PG_UTF8, utf8string)); @@ -2383,7 +2382,7 @@ map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped, initStringInfo(&buf); - for (p = ident; *p; p += pg_mblen(p)) + for (p = ident; *p; p += pg_mblen_cstr(p)) { if (*p == ':' && (p == ident || fully_escaped)) appendStringInfoString(&buf, "_x003A_"); @@ -2408,7 +2407,7 @@ map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped, : !is_valid_xml_namechar(u)) appendStringInfo(&buf, "_x%04X_", (unsigned int) u); else - appendBinaryStringInfo(&buf, p, pg_mblen(p)); + appendBinaryStringInfo(&buf, p, pg_mblen_cstr(p)); } } @@ -2431,7 +2430,7 @@ map_xml_name_to_sql_identifier(const char *name) initStringInfo(&buf); - for (p = name; *p; p += pg_mblen(p)) + for (p = name; *p; p += pg_mblen_cstr(p)) { if (*p == '_' && *(p + 1) == 'x' && isxdigit((unsigned char) *(p + 2)) @@ -2449,7 +2448,7 @@ map_xml_name_to_sql_identifier(const char *name) p += 6; } else - appendBinaryStringInfo(&buf, p, pg_mblen(p)); + appendBinaryStringInfo(&buf, p, pg_mblen_cstr(p)); } return buf.data; diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c index 65c20bde39e..a8901a957eb 100644 --- a/src/backend/utils/cache/syscache.c +++ b/src/backend/utils/cache/syscache.c @@ -43,6 +43,7 @@ #include "catalog/pg_directory_table.h" #include "catalog/pg_enum.h" #include "catalog/pg_event_trigger.h" +#include "catalog/pg_extension.h" #include "catalog/pg_foreign_data_wrapper.h" #include "catalog/pg_foreign_server.h" #include "catalog/pg_foreign_table.h" @@ -823,6 +824,13 @@ static const struct cachedesc cacheinfo[] = { 0 }, 128 + }, + /* intentionally out of alphabetical order, to avoid an ABI break: */ + [EXTENSIONOID] = { + ExtensionRelationId, + ExtensionOidIndexId, + KEY(Anum_pg_extension_oid), + 2 } }; diff --git a/src/backend/utils/mb/mbutils.c b/src/backend/utils/mb/mbutils.c index 87ed364aab4..0477acc1e08 100644 --- a/src/backend/utils/mb/mbutils.c +++ b/src/backend/utils/mb/mbutils.c @@ -38,6 +38,7 @@ #include "catalog/namespace.h" #include "mb/pg_wchar.h" #include "utils/builtins.h" +#include "utils/memdebug.h" #include "utils/memutils.h" #include "utils/syscache.h" #include "varatt.h" @@ -100,6 +101,13 @@ perform_default_encoding_conversion(const char *src, int len, bool is_client_to_ FmgrInfo *custom_encoding_proc); static int cliplen(const char *str, int len, int limit); +pg_attribute_noreturn() +static void report_invalid_encoding_int(int encoding, const char *mbstr, + int mblen, int len); + +pg_attribute_noreturn() +static void report_invalid_encoding_db(const char *mbstr, int mblen, int len); + /* * Prepare for a future call to SetClientEncoding. Success should mean @@ -1149,11 +1157,126 @@ pg_encoding_wchar2mb_with_len(int encoding, return pg_wchar_table[encoding].wchar2mb_with_len(from, (unsigned char *) to, len); } -/* returns the byte length of a multibyte character */ +/* + * Returns the byte length of a multibyte character sequence in a + * null-terminated string. Raises an illegal byte sequence error if the + * sequence would hit a null terminator. + * + * The caller is expected to have checked for a terminator at *mbstr == 0 + * before calling, but some callers want 1 in that case, so this function + * continues that tradition. + * + * This must only be used for strings that have a null-terminator to enable + * bounds detection. + */ +int +pg_mblen_cstr(const char *mbstr) +{ + int length = pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr); + + /* + * The .mblen functions return 1 when given a pointer to a terminator. + * Some callers depend on that, so we tolerate it for now. Well-behaved + * callers check the leading byte for a terminator *before* calling. + */ + for (int i = 1; i < length; ++i) + if (unlikely(mbstr[i] == 0)) + report_invalid_encoding_db(mbstr, length, i); + + /* + * String should be NUL-terminated, but checking that would make typical + * callers O(N^2), tripling Valgrind check-world time. Unless + * VALGRIND_EXPENSIVE, check 1 byte after each actual character. (If we + * found a character, not a terminator, the next byte must be a terminator + * or the start of the next character.) If the caller iterates the whole + * string, the last call will diagnose a missing terminator. + */ + if (mbstr[0] != '\0') + { +#ifdef VALGRIND_EXPENSIVE + VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, strlen(mbstr)); +#else + VALGRIND_CHECK_MEM_IS_DEFINED(mbstr + length, 1); +#endif + } + + return length; +} + +/* + * Returns the byte length of a multibyte character sequence bounded by a range + * [mbstr, end) of at least one byte in size. Raises an illegal byte sequence + * error if the sequence would exceed the range. + */ +int +pg_mblen_range(const char *mbstr, const char *end) +{ + int length = pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr); + + Assert(end > mbstr); +#ifdef VALGRIND_EXPENSIVE + VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, end - mbstr); +#else + VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, length); +#endif + + if (unlikely(mbstr + length > end)) + report_invalid_encoding_db(mbstr, length, end - mbstr); + + return length; +} + +/* + * Returns the byte length of a multibyte character sequence bounded by a range + * extending for 'limit' bytes, which must be at least one. Raises an illegal + * byte sequence error if the sequence would exceed the range. + */ +int +pg_mblen_with_len(const char *mbstr, int limit) +{ + int length = pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr); + + Assert(limit >= 1); +#ifdef VALGRIND_EXPENSIVE + VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, limit); +#else + VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, length); +#endif + + if (unlikely(length > limit)) + report_invalid_encoding_db(mbstr, length, limit); + + return length; +} + + +/* + * Returns the length of a multibyte character sequence, without any + * validation of bounds. + * + * PLEASE NOTE: This function can only be used safely if the caller has + * already verified the input string, since otherwise there is a risk of + * overrunning the buffer if the string is invalid. A prior call to a + * pg_mbstrlen* function suffices. + */ +int +pg_mblen_unbounded(const char *mbstr) +{ + int length = pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr); + + VALGRIND_CHECK_MEM_IS_DEFINED(mbstr, length); + + return length; +} + +/* + * Historical name for pg_mblen_unbounded(). Should not be used and will be + * removed in a later version. + */ int pg_mblen(const char *mbstr) { - return pg_wchar_table[DatabaseEncoding->encoding].mblen((const unsigned char *) mbstr); + return pg_mblen_unbounded(mbstr); } /* returns the display length of a multibyte character */ @@ -1175,14 +1298,14 @@ pg_mbstrlen(const char *mbstr) while (*mbstr) { - mbstr += pg_mblen(mbstr); + mbstr += pg_mblen_cstr(mbstr); len++; } return len; } /* returns the length (counted in wchars) of a multibyte string - * (not necessarily NULL terminated) + * (stops at the first of "limit" or a NUL) */ int pg_mbstrlen_with_len(const char *mbstr, int limit) @@ -1195,7 +1318,7 @@ pg_mbstrlen_with_len(const char *mbstr, int limit) while (limit > 0 && *mbstr) { - int l = pg_mblen(mbstr); + int l = pg_mblen_with_len(mbstr, limit); limit -= l; mbstr += l; @@ -1265,7 +1388,7 @@ pg_mbcharcliplen(const char *mbstr, int len, int limit) while (len > 0 && *mbstr) { - l = pg_mblen(mbstr); + l = pg_mblen_with_len(mbstr, len); nch++; if (nch > limit) break; @@ -1835,12 +1958,19 @@ void report_invalid_encoding(int encoding, const char *mbstr, int len) { int l = pg_encoding_mblen_or_incomplete(encoding, mbstr, len); + + report_invalid_encoding_int(encoding, mbstr, l, len); +} + +static void +report_invalid_encoding_int(int encoding, const char *mbstr, int mblen, int len) +{ char buf[8 * 5 + 1]; char *p = buf; int j, jlimit; - jlimit = Min(l, len); + jlimit = Min(mblen, len); jlimit = Min(jlimit, 8); /* prevent buffer overrun */ for (j = 0; j < jlimit; j++) @@ -1857,6 +1987,12 @@ report_invalid_encoding(int encoding, const char *mbstr, int len) buf))); } +static void +report_invalid_encoding_db(const char *mbstr, int mblen, int len) +{ + report_invalid_encoding_int(GetDatabaseEncoding(), mbstr, mblen, len); +} + /* * report_untranslatable_char: complain about untranslatable character * diff --git a/src/common/wchar.c b/src/common/wchar.c index c0fb19b3f1a..5133e5e5b25 100644 --- a/src/common/wchar.c +++ b/src/common/wchar.c @@ -82,6 +82,9 @@ * subset to the ASCII routines to ensure consistency. */ +/* No error-reporting facility. Ignore incomplete trailing byte sequence. */ +#define MB2CHAR_NEED_AT_LEAST(len, need) if ((len) < (need)) break + /* * SQL/ASCII */ @@ -127,22 +130,24 @@ pg_euc2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) while (len > 0 && *from) { - if (*from == SS2 && len >= 2) /* JIS X 0201 (so called "1 byte - * KANA") */ + if (*from == SS2) /* JIS X 0201 (so called "1 byte KANA") */ { + MB2CHAR_NEED_AT_LEAST(len, 2); from++; *to = (SS2 << 8) | *from++; len -= 2; } - else if (*from == SS3 && len >= 3) /* JIS X 0212 KANJI */ + else if (*from == SS3) /* JIS X 0212 KANJI */ { + MB2CHAR_NEED_AT_LEAST(len, 3); from++; *to = (SS3 << 16) | (*from++ << 8); *to |= *from++; len -= 3; } - else if (IS_HIGHBIT_SET(*from) && len >= 2) /* JIS X 0208 KANJI */ + else if (IS_HIGHBIT_SET(*from)) /* JIS X 0208 KANJI */ { + MB2CHAR_NEED_AT_LEAST(len, 2); *to = *from++ << 8; *to |= *from++; len -= 2; @@ -254,22 +259,25 @@ pg_euccn2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) while (len > 0 && *from) { - if (*from == SS2 && len >= 3) /* code set 2 (unused?) */ + if (*from == SS2) /* code set 2 (unused?) */ { + MB2CHAR_NEED_AT_LEAST(len, 3); from++; *to = (SS2 << 16) | (*from++ << 8); *to |= *from++; len -= 3; } - else if (*from == SS3 && len >= 3) /* code set 3 (unused ?) */ + else if (*from == SS3) /* code set 3 (unused ?) */ { + MB2CHAR_NEED_AT_LEAST(len, 3); from++; *to = (SS3 << 16) | (*from++ << 8); *to |= *from++; len -= 3; } - else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 1 */ + else if (IS_HIGHBIT_SET(*from)) /* code set 1 */ { + MB2CHAR_NEED_AT_LEAST(len, 2); *to = *from++ << 8; *to |= *from++; len -= 2; @@ -286,12 +294,22 @@ pg_euccn2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) return cnt; } +/* + * mbverifychar does not accept SS2 or SS3 (CS2 and CS3 are not defined for + * EUC_CN), but mb2wchar_with_len does. Tell a coherent story for code that + * relies on agreement between mb2wchar_with_len and mblen. Invalid text + * datums (e.g. from shared catalogs) reach this. + */ static int pg_euccn_mblen(const unsigned char *s) { int len; - if (IS_HIGHBIT_SET(*s)) + if (*s == SS2) + len = 3; + else if (*s == SS3) + len = 3; + else if (IS_HIGHBIT_SET(*s)) len = 2; else len = 1; @@ -321,23 +339,26 @@ pg_euctw2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) while (len > 0 && *from) { - if (*from == SS2 && len >= 4) /* code set 2 */ + if (*from == SS2) /* code set 2 */ { + MB2CHAR_NEED_AT_LEAST(len, 4); from++; *to = (((uint32) SS2) << 24) | (*from++ << 16); *to |= *from++ << 8; *to |= *from++; len -= 4; } - else if (*from == SS3 && len >= 3) /* code set 3 (unused?) */ + else if (*from == SS3) /* code set 3 (unused?) */ { + MB2CHAR_NEED_AT_LEAST(len, 3); from++; *to = (SS3 << 16) | (*from++ << 8); *to |= *from++; len -= 3; } - else if (IS_HIGHBIT_SET(*from) && len >= 2) /* code set 2 */ + else if (IS_HIGHBIT_SET(*from)) /* code set 2 */ { + MB2CHAR_NEED_AT_LEAST(len, 2); *to = *from++ << 8; *to |= *from++; len -= 2; @@ -474,8 +495,7 @@ pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) } else if ((*from & 0xe0) == 0xc0) { - if (len < 2) - break; /* drop trailing incomplete char */ + MB2CHAR_NEED_AT_LEAST(len, 2); c1 = *from++ & 0x1f; c2 = *from++ & 0x3f; *to = (c1 << 6) | c2; @@ -483,8 +503,7 @@ pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) } else if ((*from & 0xf0) == 0xe0) { - if (len < 3) - break; /* drop trailing incomplete char */ + MB2CHAR_NEED_AT_LEAST(len, 3); c1 = *from++ & 0x0f; c2 = *from++ & 0x3f; c3 = *from++ & 0x3f; @@ -493,8 +512,7 @@ pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) } else if ((*from & 0xf8) == 0xf0) { - if (len < 4) - break; /* drop trailing incomplete char */ + MB2CHAR_NEED_AT_LEAST(len, 4); c1 = *from++ & 0x07; c2 = *from++ & 0x3f; c3 = *from++ & 0x3f; @@ -757,28 +775,32 @@ pg_mule2wchar_with_len(const unsigned char *from, pg_wchar *to, int len) while (len > 0 && *from) { - if (IS_LC1(*from) && len >= 2) + if (IS_LC1(*from)) { + MB2CHAR_NEED_AT_LEAST(len, 2); *to = *from++ << 16; *to |= *from++; len -= 2; } - else if (IS_LCPRV1(*from) && len >= 3) + else if (IS_LCPRV1(*from)) { + MB2CHAR_NEED_AT_LEAST(len, 3); from++; *to = *from++ << 16; *to |= *from++; len -= 3; } - else if (IS_LC2(*from) && len >= 3) + else if (IS_LC2(*from)) { + MB2CHAR_NEED_AT_LEAST(len, 3); *to = *from++ << 16; *to |= *from++ << 8; *to |= *from++; len -= 3; } - else if (IS_LCPRV2(*from) && len >= 4) + else if (IS_LCPRV2(*from)) { + MB2CHAR_NEED_AT_LEAST(len, 4); from++; *to = *from++ << 16; *to |= *from++ << 8; @@ -2145,7 +2167,7 @@ pg_encoding_set_invalid(int encoding, char *dst) const pg_wchar_tbl pg_wchar_table[] = { {pg_ascii2wchar_with_len, pg_wchar2single_with_len, pg_ascii_mblen, pg_ascii_dsplen, pg_ascii_verifychar, pg_ascii_verifystr, 1}, /* PG_SQL_ASCII */ {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifychar, pg_eucjp_verifystr, 3}, /* PG_EUC_JP */ - {pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifychar, pg_euccn_verifystr, 2}, /* PG_EUC_CN */ + {pg_euccn2wchar_with_len, pg_wchar2euc_with_len, pg_euccn_mblen, pg_euccn_dsplen, pg_euccn_verifychar, pg_euccn_verifystr, 3}, /* PG_EUC_CN */ {pg_euckr2wchar_with_len, pg_wchar2euc_with_len, pg_euckr_mblen, pg_euckr_dsplen, pg_euckr_verifychar, pg_euckr_verifystr, 3}, /* PG_EUC_KR */ {pg_euctw2wchar_with_len, pg_wchar2euc_with_len, pg_euctw_mblen, pg_euctw_dsplen, pg_euctw_verifychar, pg_euctw_verifystr, 4}, /* PG_EUC_TW */ {pg_eucjp2wchar_with_len, pg_wchar2euc_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, pg_eucjp_verifychar, pg_eucjp_verifystr, 3}, /* PG_EUC_JIS_2004 */ diff --git a/src/include/catalog/dependency.h b/src/include/catalog/dependency.h index 0ea7b3cda81..6a7ae2abea9 100644 --- a/src/include/catalog/dependency.h +++ b/src/include/catalog/dependency.h @@ -245,6 +245,8 @@ extern long changeDependenciesOn(Oid refClassId, Oid oldRefObjectId, extern Oid getExtensionOfObject(Oid classId, Oid objectId); extern List *getAutoExtensionsOfObject(Oid classId, Oid objectId); +extern Oid getExtensionType(Oid extensionOid, const char *typname); + extern bool sequenceIsOwned(Oid seqId, char deptype, Oid *tableId, int32 *colId); extern List *getOwnedSequences(Oid relid); extern Oid getIdentitySequence(Oid relid, AttrNumber attnum, bool missing_ok); diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h index 4b00ff7cdc7..7af15a37f52 100644 --- a/src/include/commands/defrem.h +++ b/src/include/commands/defrem.h @@ -87,7 +87,7 @@ extern void RemoveOperatorById(Oid operOid); extern ObjectAddress AlterOperator(AlterOperatorStmt *stmt); /* commands/statscmds.c */ -extern ObjectAddress CreateStatistics(CreateStatsStmt *stmt); +extern ObjectAddress CreateStatistics(CreateStatsStmt *stmt, bool check_rights); extern ObjectAddress AlterStatistics(AlterStatsStmt *stmt); extern void RemoveStatisticsById(Oid statsOid); extern void RemoveStatisticsDataById(Oid statsOid, bool inh); diff --git a/src/include/commands/extension.h b/src/include/commands/extension.h index 042ae6ba70d..f2e45cf59ea 100644 --- a/src/include/commands/extension.h +++ b/src/include/commands/extension.h @@ -50,6 +50,8 @@ extern char *get_extension_name(Oid ext_oid); extern Oid get_extension_schema(Oid ext_oid); extern bool extension_file_exists(const char *extensionName); +extern Oid get_function_sibling_type(Oid funcoid, const char *typname); + extern ObjectAddress AlterExtensionNamespace(const char *extensionName, const char *newschema, Oid *oldschema); diff --git a/src/include/mb/pg_wchar.h b/src/include/mb/pg_wchar.h index c2cc2ad0963..cd9027a444a 100644 --- a/src/include/mb/pg_wchar.h +++ b/src/include/mb/pg_wchar.h @@ -608,7 +608,14 @@ extern int pg_char_and_wchar_strcmp(const char *s1, const pg_wchar *s2); extern int pg_wchar_strncmp(const pg_wchar *s1, const pg_wchar *s2, size_t n); extern int pg_char_and_wchar_strncmp(const char *s1, const pg_wchar *s2, size_t n); extern size_t pg_wchar_strlen(const pg_wchar *str); +extern int pg_mblen_cstr(const char *mbstr); +extern int pg_mblen_range(const char *mbstr, const char *end); +extern int pg_mblen_with_len(const char *mbstr, int limit); +extern int pg_mblen_unbounded(const char *mbstr); + +/* deprecated */ extern int pg_mblen(const char *mbstr); + extern int pg_dsplen(const char *mbstr); extern int pg_mbstrlen(const char *mbstr); extern int pg_mbstrlen_with_len(const char *mbstr, int limit); diff --git a/src/include/tsearch/ts_locale.h b/src/include/tsearch/ts_locale.h index 58d594d4006..787ffb165d1 100644 --- a/src/include/tsearch/ts_locale.h +++ b/src/include/tsearch/ts_locale.h @@ -37,13 +37,37 @@ typedef struct /* The second argument of t_iseq() must be a plain ASCII character */ #define t_iseq(x,c) (TOUCHAR(x) == (unsigned char) (c)) -#define COPYCHAR(d,s) memcpy(d, s, pg_mblen(s)) +/* Copy multibyte character of known byte length, return byte length. */ +static inline int +ts_copychar_with_len(void *dest, const void *src, int length) +{ + memcpy(dest, src, length); + return length; +} + +/* Copy multibyte character from null-terminated string, return byte length. */ +static inline int +ts_copychar_cstr(void *dest, const void *src) +{ + return ts_copychar_with_len(dest, src, pg_mblen_cstr((const char *) src)); +} + +/* Historical macro for the above. */ +#define COPYCHAR ts_copychar_cstr + +#define GENERATE_T_ISCLASS_DECL(character_class) \ +extern int t_is##character_class##_with_len(const char *ptr, int len); \ +extern int t_is##character_class##_cstr(const char *ptr); \ +extern int t_is##character_class##_unbounded(const char *ptr); \ +\ +/* deprecated */ \ +extern int t_is##character_class(const char *ptr); -extern int t_isdigit(const char *ptr); -extern int t_isspace(const char *ptr); -extern int t_isalpha(const char *ptr); -extern int t_isalnum(const char *ptr); -extern int t_isprint(const char *ptr); +GENERATE_T_ISCLASS_DECL(alnum); +GENERATE_T_ISCLASS_DECL(alpha); +GENERATE_T_ISCLASS_DECL(digit); +GENERATE_T_ISCLASS_DECL(print); +GENERATE_T_ISCLASS_DECL(space); extern char *lowerstr(const char *str); extern char *lowerstr_with_len(const char *str, int len); diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h index d3dc8bae475..48db1b800a1 100644 --- a/src/include/tsearch/ts_utils.h +++ b/src/include/tsearch/ts_utils.h @@ -40,14 +40,12 @@ extern bool gettoken_tsvector(TSVectorParseState state, extern void close_tsvector_parser(TSVectorParseState state); /* phrase operator begins with '<' */ -#define ISOPERATOR(x) \ - ( pg_mblen(x) == 1 && ( *(x) == '!' || \ - *(x) == '&' || \ - *(x) == '|' || \ - *(x) == '(' || \ - *(x) == ')' || \ - *(x) == '<' \ - ) ) +#define ISOPERATOR(x) (*(x) == '!' || \ + *(x) == '&' || \ + *(x) == '|' || \ + *(x) == '(' || \ + *(x) == ')' || \ + *(x) == '<') /* parse_tsquery */ diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index cb309ede6ae..bcba170c327 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -70,6 +70,7 @@ extern int64 get_size_from_segDBs(const char *cmd); /* oid.c */ extern oidvector *buildoidvector(const Oid *oids, int n); +extern void check_valid_oidvector(const oidvector *oidArray); extern Oid oidparse(Node *node); /* pseudotypes.c */ diff --git a/src/include/utils/syscache.h b/src/include/utils/syscache.h index b0658a94bd9..e790dfe2af5 100644 --- a/src/include/utils/syscache.h +++ b/src/include/utils/syscache.h @@ -132,9 +132,12 @@ enum SysCacheIdentifier MVTABLESMVRELOID, USERMAPPINGOID, USERMAPPINGUSERSERVER, - ATTENCODINGNUM + ATTENCODINGNUM, -#define SysCacheSize (ATTENCODINGNUM + 1) + /* intentionally out of alphabetical order, to avoid an ABI break: */ + EXTENSIONOID + +#define SysCacheSize (EXTENSIONOID + 1) }; extern void InitCatalogCache(void); diff --git a/src/interfaces/libpq/fe-connect.c b/src/interfaces/libpq/fe-connect.c index 135af368f84..54858d15689 100644 --- a/src/interfaces/libpq/fe-connect.c +++ b/src/interfaces/libpq/fe-connect.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -1075,7 +1076,7 @@ parse_comma_separated_list(char **startptr, bool *more) char *p; char *s = *startptr; char *e; - int len; + size_t len; /* * Search for the end of the current element; a comma or end-of-string @@ -5594,7 +5595,21 @@ ldapServiceLookup(const char *purl, PQconninfoOption *options, /* concatenate values into a single string with newline terminators */ size = 1; /* for the trailing null */ for (i = 0; values[i] != NULL; i++) + { + if (values[i]->bv_len >= INT_MAX || + size > (INT_MAX - (values[i]->bv_len + 1))) + { + libpq_append_error(errorMessage, + "connection info string size exceeds the maximum allowed (%d)", + INT_MAX); + ldap_value_free_len(values); + ldap_unbind(ld); + return 3; + } + size += values[i]->bv_len + 1; + } + if ((result = malloc(size)) == NULL) { libpq_append_error(errorMessage, "out of memory"); diff --git a/src/interfaces/libpq/fe-exec.c b/src/interfaces/libpq/fe-exec.c index 2cf897cef04..e723bdf925c 100644 --- a/src/interfaces/libpq/fe-exec.c +++ b/src/interfaces/libpq/fe-exec.c @@ -532,7 +532,7 @@ PQsetvalue(PGresult *res, int tup_num, int field_num, char *value, int len) } else { - attval->value = (char *) pqResultAlloc(res, len + 1, true); + attval->value = (char *) pqResultAlloc(res, (size_t) len + 1, true); if (!attval->value) goto fail; attval->len = len; @@ -624,8 +624,13 @@ pqResultAlloc(PGresult *res, size_t nBytes, bool isBinary) */ if (nBytes >= PGRESULT_SEP_ALLOC_THRESHOLD) { - size_t alloc_size = nBytes + PGRESULT_BLOCK_OVERHEAD; + size_t alloc_size; + /* Don't wrap around with overly large requests. */ + if (nBytes > SIZE_MAX - PGRESULT_BLOCK_OVERHEAD) + return NULL; + + alloc_size = nBytes + PGRESULT_BLOCK_OVERHEAD; block = (PGresult_data *) malloc(alloc_size); if (!block) return NULL; @@ -1292,7 +1297,7 @@ pqRowProcessor(PGconn *conn, const char **errmsgp) bool isbinary = (res->attDescs[i].format != 0); char *val; - val = (char *) pqResultAlloc(res, clen + 1, isbinary); + val = (char *) pqResultAlloc(res, (size_t) clen + 1, isbinary); if (val == NULL) goto fail; @@ -4166,6 +4171,27 @@ PQescapeString(char *to, const char *from, size_t length) } +/* + * Frontend version of the backend's add_size(), intended to be API-compatible + * with the pg_add_*_overflow() helpers. Stores the result into *dst on success. + * Returns true instead if the addition overflows. + * + * TODO: move to common/int.h + */ +static bool +add_size_overflow(size_t s1, size_t s2, size_t *dst) +{ + size_t result; + + result = s1 + s2; + if (result < s1 || result < s2) + return true; + + *dst = result; + return false; +} + + /* * Escape arbitrary strings. If as_ident is true, we escape the result * as an identifier; if false, as a literal. The result is returned in @@ -4178,9 +4204,9 @@ PQescapeInternal(PGconn *conn, const char *str, size_t len, bool as_ident) const char *s; char *result; char *rp; - int num_quotes = 0; /* single or double, depending on as_ident */ - int num_backslashes = 0; - size_t input_len = strlen(str); + size_t num_quotes = 0; /* single or double, depending on as_ident */ + size_t num_backslashes = 0; + size_t input_len = strnlen(str, len); size_t result_size; char quote_char = as_ident ? '"' : '\''; bool validated_mb = false; @@ -4245,10 +4271,21 @@ PQescapeInternal(PGconn *conn, const char *str, size_t len, bool as_ident) } } - /* Allocate output buffer. */ - result_size = input_len + num_quotes + 3; /* two quotes, plus a NUL */ + /* + * Allocate output buffer. Protect against overflow, in case the caller + * has allocated a large fraction of the available size_t. + */ + if (add_size_overflow(input_len, num_quotes, &result_size) || + add_size_overflow(result_size, 3, &result_size)) /* two quotes plus a NUL */ + goto overflow; + if (!as_ident && num_backslashes > 0) - result_size += num_backslashes + 2; + { + if (add_size_overflow(result_size, num_backslashes, &result_size) || + add_size_overflow(result_size, 2, &result_size)) /* for " E" prefix */ + goto overflow; + } + result = rp = (char *) malloc(result_size); if (rp == NULL) { @@ -4321,6 +4358,12 @@ PQescapeInternal(PGconn *conn, const char *str, size_t len, bool as_ident) *rp = '\0'; return result; + +overflow: + libpq_append_conn_error(conn, + "escaped string size exceeds the maximum allowed (%zu)", + SIZE_MAX); + return NULL; } char * @@ -4386,16 +4429,25 @@ PQescapeByteaInternal(PGconn *conn, unsigned char *result; size_t i; size_t len; - size_t bslash_len = (std_strings ? 1 : 2); + const size_t bslash_len = (std_strings ? 1 : 2); /* - * empty string has 1 char ('\0') + * Calculate the escaped length, watching for overflow as we do with + * PQescapeInternal(). The following code relies on a small constant + * bslash_len so that small additions and multiplications don't need their + * own overflow checks. + * + * Start with the empty string, which has 1 char ('\0'). */ len = 1; if (use_hex) { - len += bslash_len + 1 + 2 * from_length; + /* We prepend "\x" and double each input character. */ + if (add_size_overflow(len, bslash_len + 1, &len) || + add_size_overflow(len, from_length, &len) || + add_size_overflow(len, from_length, &len)) + goto overflow; } else { @@ -4403,13 +4455,25 @@ PQescapeByteaInternal(PGconn *conn, for (i = from_length; i > 0; i--, vp++) { if (*vp < 0x20 || *vp > 0x7e) - len += bslash_len + 3; + { + if (add_size_overflow(len, bslash_len + 3, &len)) /* octal "\ooo" */ + goto overflow; + } else if (*vp == '\'') - len += 2; + { + if (add_size_overflow(len, 2, &len)) /* double each quote */ + goto overflow; + } else if (*vp == '\\') - len += bslash_len + bslash_len; + { + if (add_size_overflow(len, bslash_len * 2, &len)) /* double each backslash */ + goto overflow; + } else - len++; + { + if (add_size_overflow(len, 1, &len)) + goto overflow; + } } } @@ -4470,6 +4534,13 @@ PQescapeByteaInternal(PGconn *conn, *rp = '\0'; return result; + +overflow: + if (conn) + libpq_append_conn_error(conn, + "escaped bytea size exceeds the maximum allowed (%zu)", + SIZE_MAX); + return NULL; } unsigned char * diff --git a/src/interfaces/libpq/fe-print.c b/src/interfaces/libpq/fe-print.c index 40620b47e94..4b9dd7da087 100644 --- a/src/interfaces/libpq/fe-print.c +++ b/src/interfaces/libpq/fe-print.c @@ -107,6 +107,16 @@ PQprint(FILE *fout, const PGresult *res, const PQprintOpt *po) } screen_size; #endif + /* + * Quick sanity check on po->fieldSep, since we make heavy use of int + * math throughout. + */ + if (fs_len < strlen(po->fieldSep)) + { + fprintf(stderr, libpq_gettext("overlong field separator\n")); + goto exit; + } + nTups = PQntuples(res); fieldNames = (const char **) calloc(nFields, sizeof(char *)); fieldNotNum = (unsigned char *) calloc(nFields, 1); @@ -402,7 +412,7 @@ do_field(const PQprintOpt *po, const PGresult *res, { if (plen > fieldMax[j]) fieldMax[j] = plen; - if (!(fields[i * nFields + j] = (char *) malloc(plen + 1))) + if (!(fields[i * nFields + j] = (char *) malloc((size_t) plen + 1))) { fprintf(stderr, libpq_gettext("out of memory\n")); return false; @@ -452,6 +462,27 @@ do_field(const PQprintOpt *po, const PGresult *res, } +/* + * Frontend version of the backend's add_size(), intended to be API-compatible + * with the pg_add_*_overflow() helpers. Stores the result into *dst on success. + * Returns true instead if the addition overflows. + * + * TODO: move to common/int.h + */ +static bool +add_size_overflow(size_t s1, size_t s2, size_t *dst) +{ + size_t result; + + result = s1 + s2; + if (result < s1 || result < s2) + return true; + + *dst = result; + return false; +} + + static char * do_header(FILE *fout, const PQprintOpt *po, const int nFields, int *fieldMax, const char **fieldNames, unsigned char *fieldNotNum, @@ -464,15 +495,31 @@ do_header(FILE *fout, const PQprintOpt *po, const int nFields, int *fieldMax, fputs("", fout); else { - int tot = 0; + size_t tot = 0; int n = 0; char *p = NULL; + /* Calculate the border size, checking for overflow. */ for (; n < nFields; n++) - tot += fieldMax[n] + fs_len + (po->standard ? 2 : 0); + { + /* Field plus separator, plus 2 extra '-' in standard format. */ + if (add_size_overflow(tot, fieldMax[n], &tot) || + add_size_overflow(tot, fs_len, &tot) || + (po->standard && add_size_overflow(tot, 2, &tot))) + goto overflow; + } if (po->standard) - tot += fs_len * 2 + 2; - border = malloc(tot + 1); + { + /* An extra separator at the front and back. */ + if (add_size_overflow(tot, fs_len, &tot) || + add_size_overflow(tot, fs_len, &tot) || + add_size_overflow(tot, 2, &tot)) + goto overflow; + } + if (add_size_overflow(tot, 1, &tot)) /* terminator */ + goto overflow; + + border = malloc(tot); if (!border) { fprintf(stderr, libpq_gettext("out of memory\n")); @@ -535,6 +582,10 @@ do_header(FILE *fout, const PQprintOpt *po, const int nFields, int *fieldMax, else fprintf(fout, "\n%s\n", border); return border; + +overflow: + fprintf(stderr, libpq_gettext("header size exceeds the maximum allowed\n")); + return NULL; } diff --git a/src/interfaces/libpq/fe-protocol3.c b/src/interfaces/libpq/fe-protocol3.c index dbb64161a33..82d0516e138 100644 --- a/src/interfaces/libpq/fe-protocol3.c +++ b/src/interfaces/libpq/fe-protocol3.c @@ -25,6 +25,7 @@ #include #include +#include #ifdef WIN32 #include "win32.h" @@ -64,8 +65,8 @@ static int getReadyForQuery(PGconn *conn); static void saveCdbStatMsg(PGresult *result, char *data, int len); static void reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding); -static int build_startup_packet(const PGconn *conn, char *packet, - const PQEnvironmentOption *options); +static size_t build_startup_packet(const PGconn *conn, char *packet, + const PQEnvironmentOption *options); /* @@ -1385,8 +1386,21 @@ reportErrorPosition(PQExpBuffer msg, const char *query, int loc, int encoding) * scridx[] respectively. */ - /* we need a safe allocation size... */ + /* + * We need a safe allocation size. + * + * The only caller of reportErrorPosition() is pqBuildErrorMessage3(); it + * gets its query from either a PQresultErrorField() or a PGcmdQueueEntry, + * both of which must have fit into conn->inBuffer/outBuffer. So slen fits + * inside an int, but we can't assume that (slen * sizeof(int)) fits + * inside a size_t. + */ slen = strlen(wquery) + 1; + if (slen > SIZE_MAX / sizeof(int)) + { + free(wquery); + return; + } qidx = (int *) malloc(slen * sizeof(int)); if (qidx == NULL) @@ -2443,15 +2457,43 @@ pqBuildStartupPacket3(PGconn *conn, int *packetlen, const PQEnvironmentOption *options) { char *startpacket; + size_t len; + + len = build_startup_packet(conn, NULL, options); + if (len == 0 || len > INT_MAX) + return NULL; - *packetlen = build_startup_packet(conn, NULL, options); + *packetlen = len; startpacket = (char *) malloc(*packetlen); if (!startpacket) return NULL; - *packetlen = build_startup_packet(conn, startpacket, options); + + len = build_startup_packet(conn, startpacket, options); + Assert(*packetlen == len); + return startpacket; } +/* + * Frontend version of the backend's add_size(), intended to be API-compatible + * with the pg_add_*_overflow() helpers. Stores the result into *dst on success. + * Returns true instead if the addition overflows. + * + * TODO: move to common/int.h + */ +static bool +add_size_overflow(size_t s1, size_t s2, size_t *dst) +{ + size_t result; + + result = s1 + s2; + if (result < s1 || result < s2) + return true; + + *dst = result; + return false; +} + /* * Build a startup packet given a filled-in PGconn structure. * @@ -2459,13 +2501,13 @@ pqBuildStartupPacket3(PGconn *conn, int *packetlen, * To avoid duplicate logic, this routine is called twice: the first time * (with packet == NULL) just counts the space needed, the second time * (with packet == allocated space) fills it in. Return value is the number - * of bytes used. + * of bytes used, or zero in the unlikely event of size_t overflow. */ -static int +static size_t build_startup_packet(const PGconn *conn, char *packet, const PQEnvironmentOption *options) { - int packet_len = 0; + size_t packet_len = 0; const PQEnvironmentOption *next_eo; const char *val; @@ -2484,10 +2526,12 @@ build_startup_packet(const PGconn *conn, char *packet, do { \ if (packet) \ strcpy(packet + packet_len, optname); \ - packet_len += strlen(optname) + 1; \ + if (add_size_overflow(packet_len, strlen(optname) + 1, &packet_len)) \ + return 0; \ if (packet) \ strcpy(packet + packet_len, optval); \ - packet_len += strlen(optval) + 1; \ + if (add_size_overflow(packet_len, strlen(optval) + 1, &packet_len)) \ + return 0; \ } while(0) if (conn->pguser && conn->pguser[0]) @@ -2535,7 +2579,8 @@ build_startup_packet(const PGconn *conn, char *packet, /* Add trailing terminator */ if (packet) packet[packet_len] = '\0'; - packet_len++; + if (add_size_overflow(packet_len, 1, &packet_len)) + return 0; return packet_len; } diff --git a/src/interfaces/libpq/libpq-int.h b/src/interfaces/libpq/libpq-int.h index fabf6bab94d..908cc5da0b9 100644 --- a/src/interfaces/libpq/libpq-int.h +++ b/src/interfaces/libpq/libpq-int.h @@ -558,7 +558,16 @@ struct pg_conn pg_prng_state prng_state; /* prng state for load balancing connections */ - /* Buffer for data received from backend and not yet processed */ + /* + * Buffer for data received from backend and not yet processed. + * + * NB: We rely on a maximum inBufSize/outBufSize of INT_MAX (and therefore + * an INT_MAX upper bound on the size of any and all packet contents) to + * avoid overflow; for example in reportErrorPosition(). Changing the type + * would require not only an adjustment to the overflow protection in + * pqCheck{In,Out}BufferSpace(), but also a careful audit of all libpq + * code that uses ints during size calculations. + */ char *inBuffer; /* currently allocated buffer */ int inBufSize; /* allocated size of buffer */ int inStart; /* offset to first unconsumed data in buffer */ diff --git a/src/test/modules/test_regex/test_regex.c b/src/test/modules/test_regex/test_regex.c index d1dd48a993b..3a470dbae44 100644 --- a/src/test/modules/test_regex/test_regex.c +++ b/src/test/modules/test_regex/test_regex.c @@ -414,7 +414,8 @@ parse_test_flags(test_re_flags *flags, text *opts) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid regular expression test option: \"%.*s\"", - pg_mblen(opt_p + i), opt_p + i))); + pg_mblen_range(opt_p + i, opt_p + opt_len), + opt_p + i))); break; } } diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out index 20b401a9ef9..bf69c337ec2 100644 --- a/src/test/regress/expected/arrays.out +++ b/src/test/regress/expected/arrays.out @@ -1601,6 +1601,11 @@ select '[0:1]={1.1,2.2}'::float8[]; (1 row) -- all of the above should be accepted +-- some day we might allow these cases, but for now they're errors: +select array[]::oidvector; +ERROR: array is not a valid oidvector +select array[]::int2vector; +ERROR: array is not a valid int2vector -- tests for array aggregates CREATE TEMP TABLE arraggtest ( f1 INT[], f2 TEXT[][], f3 FLOAT[]) DISTRIBUTED RANDOMLY; INSERT INTO arraggtest (f1, f2, f3) VALUES diff --git a/src/test/regress/expected/encoding.out b/src/test/regress/expected/encoding.out new file mode 100644 index 00000000000..ea1f38cff41 --- /dev/null +++ b/src/test/regress/expected/encoding.out @@ -0,0 +1,401 @@ +/* skip test if not UTF8 server encoding */ +SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset +\if :skip_test +\quit +\endif +\getenv libdir PG_LIBDIR +\getenv dlsuffix PG_DLSUFFIX +\set regresslib :libdir '/regress' :dlsuffix +CREATE FUNCTION test_bytea_to_text(bytea) RETURNS text + AS :'regresslib' LANGUAGE C STRICT; +CREATE FUNCTION test_text_to_bytea(text) RETURNS bytea + AS :'regresslib' LANGUAGE C STRICT; +CREATE FUNCTION test_mblen_func(text, text, text, int) RETURNS int + AS :'regresslib' LANGUAGE C STRICT; +CREATE FUNCTION test_text_to_wchars(text, text) RETURNS int[] + AS :'regresslib' LANGUAGE C STRICT; +CREATE FUNCTION test_wchars_to_text(text, int[]) RETURNS text + AS :'regresslib' LANGUAGE C STRICT; +CREATE FUNCTION test_valid_server_encoding(text) RETURNS boolean + AS :'regresslib' LANGUAGE C STRICT; +CREATE TABLE regress_encoding(good text, truncated text, with_nul text, truncated_with_nul text); +INSERT INTO regress_encoding +VALUES ('café', + 'caf' || test_bytea_to_text('\xc3'), + 'café' || test_bytea_to_text('\x00') || 'dcba', + 'caf' || test_bytea_to_text('\xc300') || 'dcba'); +SELECT good, truncated, with_nul FROM regress_encoding; + good | truncated | with_nul +------+-----------+---------- + café | caf | café +(1 row) + +SELECT length(good) FROM regress_encoding; + length +-------- + 4 +(1 row) + +SELECT substring(good, 3, 1) FROM regress_encoding; + substring +----------- + f +(1 row) + +SELECT substring(good, 4, 1) FROM regress_encoding; + substring +----------- + é +(1 row) + +SELECT regexp_replace(good, '^caf(.)$', '\1') FROM regress_encoding; + regexp_replace +---------------- + é +(1 row) + +SELECT reverse(good) FROM regress_encoding; + reverse +--------- + éfac +(1 row) + +-- invalid short mb character = error +SELECT length(truncated) FROM regress_encoding; +ERROR: invalid byte sequence for encoding "UTF8": 0xc3 +SELECT substring(truncated, 1, 1) FROM regress_encoding; +ERROR: invalid byte sequence for encoding "UTF8": 0xc3 +SELECT reverse(truncated) FROM regress_encoding; +ERROR: invalid byte sequence for encoding "UTF8": 0xc3 +-- invalid short mb character = silently dropped +SELECT regexp_replace(truncated, '^caf(.)$', '\1') FROM regress_encoding; + regexp_replace +---------------- + caf +(1 row) + +-- PostgreSQL doesn't allow strings to contain NUL. If a corrupted string +-- contains NUL at a character boundary position, some functions treat it as a +-- character while others treat it as a terminator, as implementation details. +-- NUL = terminator +SELECT length(with_nul) FROM regress_encoding; + length +-------- + 4 +(1 row) + +SELECT substring(with_nul, 3, 1) FROM regress_encoding; + substring +----------- + f +(1 row) + +SELECT substring(with_nul, 4, 1) FROM regress_encoding; + substring +----------- + é +(1 row) + +SELECT substring(with_nul, 5, 1) FROM regress_encoding; + substring +----------- + +(1 row) + +SELECT convert_to(substring(with_nul, 5, 1), 'UTF8') FROM regress_encoding; + convert_to +------------ + \x +(1 row) + +SELECT regexp_replace(with_nul, '^caf(.)$', '\1') FROM regress_encoding; + regexp_replace +---------------- + é +(1 row) + +-- NUL = character +SELECT with_nul, reverse(with_nul), reverse(reverse(with_nul)) FROM regress_encoding; + with_nul | reverse | reverse +----------+---------+--------- + café | abcd | café +(1 row) + +-- If a corrupted string contains NUL in the tail bytes of a multibyte +-- character (invalid in all encodings), it is considered part of the +-- character for length purposes. An error will only be raised in code paths +-- that convert or verify encodings. +SELECT length(truncated_with_nul) FROM regress_encoding; + length +-------- + 8 +(1 row) + +SELECT substring(truncated_with_nul, 3, 1) FROM regress_encoding; + substring +----------- + f +(1 row) + +SELECT substring(truncated_with_nul, 4, 1) FROM regress_encoding; + substring +----------- + +(1 row) + +SELECT convert_to(substring(truncated_with_nul, 4, 1), 'UTF8') FROM regress_encoding; +ERROR: invalid byte sequence for encoding "UTF8": 0xc3 0x00 +SELECT substring(truncated_with_nul, 5, 1) FROM regress_encoding; + substring +----------- + d +(1 row) + +SELECT regexp_replace(truncated_with_nul, '^caf(.)dcba$', '\1') = test_bytea_to_text('\xc300') FROM regress_encoding; + ?column? +---------- + t +(1 row) + +SELECT reverse(truncated_with_nul) FROM regress_encoding; + reverse +--------- + abcd +(1 row) + +-- unbounded: sequence would overrun the string! +SELECT test_mblen_func('pg_mblen_unbounded', 'UTF8', truncated, 3) +FROM regress_encoding; + test_mblen_func +----------------- + 2 +(1 row) + +-- condition detected when using the length/range variants +SELECT test_mblen_func('pg_mblen_with_len', 'UTF8', truncated, 3) +FROM regress_encoding; +ERROR: invalid byte sequence for encoding "UTF8": 0xc3 +SELECT test_mblen_func('pg_mblen_range', 'UTF8', truncated, 3) +FROM regress_encoding; +ERROR: invalid byte sequence for encoding "UTF8": 0xc3 +-- unbounded: sequence would overrun the string, if the terminator were really +-- the end of it +SELECT test_mblen_func('pg_mblen_unbounded', 'UTF8', truncated_with_nul, 3) +FROM regress_encoding; + test_mblen_func +----------------- + 2 +(1 row) + +SELECT test_mblen_func('pg_encoding_mblen', 'GB18030', truncated_with_nul, 3) +FROM regress_encoding; + test_mblen_func +----------------- + 2 +(1 row) + +-- condition detected when using the cstr variants +SELECT test_mblen_func('pg_mblen_cstr', 'UTF8', truncated_with_nul, 3) +FROM regress_encoding; +ERROR: invalid byte sequence for encoding "UTF8": 0xc3 +DROP TABLE regress_encoding; +-- mb<->wchar conversions +CREATE FUNCTION test_encoding(encoding text, description text, input bytea) +RETURNS VOID LANGUAGE plpgsql AS +$$ +DECLARE + prefix text; + len int; + wchars int[]; + round_trip bytea; + result text; +BEGIN + prefix := rpad(encoding || ' ' || description || ':', 28); + + -- XXX could also test validation, length functions and include client + -- only encodings with these test cases + + IF test_valid_server_encoding(encoding) THEN + wchars := test_text_to_wchars(encoding, test_bytea_to_text(input)); + round_trip = test_text_to_bytea(test_wchars_to_text(encoding, wchars)); + if input = round_trip then + result := 'OK'; + elsif length(input) > length(round_trip) and round_trip = substr(input, 1, length(round_trip)) then + result := 'truncated'; + else + result := 'failed'; + end if; + RAISE NOTICE '% % -> % -> % = %', prefix, input, wchars, round_trip, result; + END IF; +END; +$$; +-- No validation is done on the encoding itself, just the length to avoid +-- overruns, so some of the byte sequences below are bogus. They cover +-- all code branches, server encodings only for now. +CREATE TABLE encoding_tests (encoding text, description text, input bytea); +INSERT INTO encoding_tests VALUES + -- LATIN1, other single-byte encodings + ('LATIN1', 'ASCII', 'a'), + ('LATIN1', 'extended', '\xe9'), + -- EUC_JP, EUC_JIS_2004, EUR_KR (for the purposes of wchar conversion): + -- 2 8e (CS2, not used by EUR_KR but arbitrarily considered to have EUC_JP length) + -- 3 8f (CS3, not used by EUR_KR but arbitrarily considered to have EUC_JP length) + -- 2 80..ff (CS1) + ('EUC_JP', 'ASCII', 'a'), + ('EUC_JP', 'CS1, short', '\x80'), + ('EUC_JP', 'CS1', '\x8002'), + ('EUC_JP', 'CS2, short', '\x8e'), + ('EUC_JP', 'CS2', '\x8e02'), + ('EUC_JP', 'CS3, short', '\x8f'), + ('EUC_JP', 'CS3, short', '\x8f02'), + ('EUC_JP', 'CS3', '\x8f0203'), + -- EUC_CN + -- 3 8e (CS2, not used but arbitrarily considered to have length 3) + -- 3 8f (CS3, not used but arbitrarily considered to have length 3) + -- 2 80..ff (CS1) + ('EUC_CN', 'ASCII', 'a'), + ('EUC_CN', 'CS1, short', '\x80'), + ('EUC_CN', 'CS1', '\x8002'), + ('EUC_CN', 'CS2, short', '\x8e'), + ('EUC_CN', 'CS2, short', '\x8e02'), + ('EUC_CN', 'CS2', '\x8e0203'), + ('EUC_CN', 'CS3, short', '\x8f'), + ('EUC_CN', 'CS3, short', '\x8f02'), + ('EUC_CN', 'CS3', '\x8f0203'), + -- EUC_TW: + -- 4 8e (CS2) + -- 3 8f (CS3, not used but arbitrarily considered to have length 3) + -- 2 80..ff (CS1) + ('EUC_TW', 'ASCII', 'a'), + ('EUC_TW', 'CS1, short', '\x80'), + ('EUC_TW', 'CS1', '\x8002'), + ('EUC_TW', 'CS2, short', '\x8e'), + ('EUC_TW', 'CS2, short', '\x8e02'), + ('EUC_TW', 'CS2, short', '\x8e0203'), + ('EUC_TW', 'CS2', '\x8e020304'), + ('EUC_TW', 'CS3, short', '\x8f'), + ('EUC_TW', 'CS3, short', '\x8f02'), + ('EUC_TW', 'CS3', '\x8f0203'), + -- UTF8 + -- 2 c0..df + -- 3 e0..ef + -- 4 f0..f7 (but maximum real codepoint U+10ffff has f4) + -- 5 f8..fb (not supported) + -- 6 fc..fd (not supported) + ('UTF8', 'ASCII', 'a'), + ('UTF8', '2 byte, short', '\xdf'), + ('UTF8', '2 byte', '\xdf82'), + ('UTF8', '3 byte, short', '\xef'), + ('UTF8', '3 byte, short', '\xef82'), + ('UTF8', '3 byte', '\xef8283'), + ('UTF8', '4 byte, short', '\xf7'), + ('UTF8', '4 byte, short', '\xf782'), + ('UTF8', '4 byte, short', '\xf78283'), + ('UTF8', '4 byte', '\xf7828384'), + ('UTF8', '5 byte, unsupported', '\xfb'), + ('UTF8', '5 byte, unsupported', '\xfb82'), + ('UTF8', '5 byte, unsupported', '\xfb8283'), + ('UTF8', '5 byte, unsupported', '\xfb828384'), + ('UTF8', '5 byte, unsupported', '\xfb82838485'), + ('UTF8', '6 byte, unsupported', '\xfd'), + ('UTF8', '6 byte, unsupported', '\xfd82'), + ('UTF8', '6 byte, unsupported', '\xfd8283'), + ('UTF8', '6 byte, unsupported', '\xfd828384'), + ('UTF8', '6 byte, unsupported', '\xfd82838485'), + ('UTF8', '6 byte, unsupported', '\xfd8283848586'), + -- MULE_INTERNAL + -- 2 81..8d LC1 + -- 3 90..99 LC2 + ('MULE_INTERNAL', 'ASCII', 'a'), + ('MULE_INTERNAL', 'LC1, short', '\x81'), + ('MULE_INTERNAL', 'LC1', '\x8182'), + ('MULE_INTERNAL', 'LC2, short', '\x90'), + ('MULE_INTERNAL', 'LC2, short', '\x9082'), + ('MULE_INTERNAL', 'LC2', '\x908283'); +SELECT COUNT(test_encoding(encoding, description, input)) > 0 +FROM encoding_tests; +NOTICE: LATIN1 ASCII: \x61 -> {97} -> \x61 = OK +NOTICE: LATIN1 extended: \xe9 -> {233} -> \xe9 = OK +NOTICE: EUC_JP ASCII: \x61 -> {97} -> \x61 = OK +NOTICE: EUC_JP CS1, short: \x80 -> {} -> \x = truncated +NOTICE: EUC_JP CS1: \x8002 -> {32770} -> \x8002 = OK +NOTICE: EUC_JP CS2, short: \x8e -> {} -> \x = truncated +NOTICE: EUC_JP CS2: \x8e02 -> {36354} -> \x8e02 = OK +NOTICE: EUC_JP CS3, short: \x8f -> {} -> \x = truncated +NOTICE: EUC_JP CS3, short: \x8f02 -> {} -> \x = truncated +NOTICE: EUC_JP CS3: \x8f0203 -> {9372163} -> \x8f0203 = OK +NOTICE: EUC_CN ASCII: \x61 -> {97} -> \x61 = OK +NOTICE: EUC_CN CS1, short: \x80 -> {} -> \x = truncated +NOTICE: EUC_CN CS1: \x8002 -> {32770} -> \x8002 = OK +NOTICE: EUC_CN CS2, short: \x8e -> {} -> \x = truncated +NOTICE: EUC_CN CS2, short: \x8e02 -> {} -> \x = truncated +NOTICE: EUC_CN CS2: \x8e0203 -> {9306627} -> \x8e0203 = OK +NOTICE: EUC_CN CS3, short: \x8f -> {} -> \x = truncated +NOTICE: EUC_CN CS3, short: \x8f02 -> {} -> \x = truncated +NOTICE: EUC_CN CS3: \x8f0203 -> {9372163} -> \x8f0203 = OK +NOTICE: EUC_TW ASCII: \x61 -> {97} -> \x61 = OK +NOTICE: EUC_TW CS1, short: \x80 -> {} -> \x = truncated +NOTICE: EUC_TW CS1: \x8002 -> {32770} -> \x8002 = OK +NOTICE: EUC_TW CS2, short: \x8e -> {} -> \x = truncated +NOTICE: EUC_TW CS2, short: \x8e02 -> {} -> \x = truncated +NOTICE: EUC_TW CS2, short: \x8e0203 -> {} -> \x = truncated +NOTICE: EUC_TW CS2: \x8e020304 -> {-1912470780} -> \x8e020304 = OK +NOTICE: EUC_TW CS3, short: \x8f -> {} -> \x = truncated +NOTICE: EUC_TW CS3, short: \x8f02 -> {} -> \x = truncated +NOTICE: EUC_TW CS3: \x8f0203 -> {9372163} -> \x8f0203 = OK +NOTICE: UTF8 ASCII: \x61 -> {97} -> \x61 = OK +NOTICE: UTF8 2 byte, short: \xdf -> {} -> \x = truncated +NOTICE: UTF8 2 byte: \xdf82 -> {1986} -> \xdf82 = OK +NOTICE: UTF8 3 byte, short: \xef -> {} -> \x = truncated +NOTICE: UTF8 3 byte, short: \xef82 -> {} -> \x = truncated +NOTICE: UTF8 3 byte: \xef8283 -> {61571} -> \xef8283 = OK +NOTICE: UTF8 4 byte, short: \xf7 -> {} -> \x = truncated +NOTICE: UTF8 4 byte, short: \xf782 -> {} -> \x = truncated +NOTICE: UTF8 4 byte, short: \xf78283 -> {} -> \x = truncated +NOTICE: UTF8 4 byte: \xf7828384 -> {1843396} -> \xf7828384 = OK +NOTICE: UTF8 5 byte, unsupported: \xfb -> {251} -> \xc3bb = failed +NOTICE: UTF8 5 byte, unsupported: \xfb82 -> {251,130} -> \xc3bbc282 = failed +NOTICE: UTF8 5 byte, unsupported: \xfb8283 -> {251,130,131} -> \xc3bbc282c283 = failed +NOTICE: UTF8 5 byte, unsupported: \xfb828384 -> {251,130,131,132} -> \xc3bbc282c283c284 = failed +NOTICE: UTF8 5 byte, unsupported: \xfb82838485 -> {251,130,131,132,133} -> \xc3bbc282c283c284c285 = failed +NOTICE: UTF8 6 byte, unsupported: \xfd -> {253} -> \xc3bd = failed +NOTICE: UTF8 6 byte, unsupported: \xfd82 -> {253,130} -> \xc3bdc282 = failed +NOTICE: UTF8 6 byte, unsupported: \xfd8283 -> {253,130,131} -> \xc3bdc282c283 = failed +NOTICE: UTF8 6 byte, unsupported: \xfd828384 -> {253,130,131,132} -> \xc3bdc282c283c284 = failed +NOTICE: UTF8 6 byte, unsupported: \xfd82838485 -> {253,130,131,132,133} -> \xc3bdc282c283c284c285 = failed +NOTICE: UTF8 6 byte, unsupported: \xfd8283848586 -> {253,130,131,132,133,134} -> \xc3bdc282c283c284c285c286 = failed +NOTICE: MULE_INTERNAL ASCII: \x61 -> {97} -> \x61 = OK +NOTICE: MULE_INTERNAL LC1, short: \x81 -> {} -> \x = truncated +NOTICE: MULE_INTERNAL LC1: \x8182 -> {8454274} -> \x8182 = OK +NOTICE: MULE_INTERNAL LC2, short: \x90 -> {} -> \x = truncated +NOTICE: MULE_INTERNAL LC2, short: \x9082 -> {} -> \x = truncated +NOTICE: MULE_INTERNAL LC2: \x908283 -> {9470595} -> \x908283 = OK + ?column? +---------- + t +(1 row) + +DROP TABLE encoding_tests; +DROP FUNCTION test_encoding; +DROP FUNCTION test_text_to_wchars; +DROP FUNCTION test_mblen_func; +DROP FUNCTION test_bytea_to_text; +DROP FUNCTION test_text_to_bytea; +-- substring slow path: multi-byte escape char vs. multi-byte pattern char. +SELECT SUBSTRING('a' SIMILAR U&'\00AC' ESCAPE U&'\00A7'); + substring +----------- + +(1 row) + +-- Levenshtein distance metric: exercise character length cache. +SELECT U&"real\00A7_name" FROM (select 1) AS x(real_name); +ERROR: column "real§_name" does not exist +LINE 1: SELECT U&"real\00A7_name" FROM (select 1) AS x(real_name); + ^ +HINT: Perhaps you meant to reference the column "x.real_name". +-- JSON errcontext: truncate long data. +SELECT repeat(U&'\00A7', 30)::json; +ERROR: invalid input syntax for type json +DETAIL: Token "§§§§§§§§§§§§§§§§§§§§§§§§§§§§§§" is invalid. +CONTEXT: JSON data, line 1: ...§§§§§§§§§§§§§§§§§§§§§§§§ diff --git a/src/test/regress/expected/encoding_1.out b/src/test/regress/expected/encoding_1.out new file mode 100644 index 00000000000..a5b02090901 --- /dev/null +++ b/src/test/regress/expected/encoding_1.out @@ -0,0 +1,4 @@ +/* skip test if not UTF8 server encoding */ +SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset +\if :skip_test +\quit diff --git a/src/test/regress/expected/euc_kr.out b/src/test/regress/expected/euc_kr.out new file mode 100644 index 00000000000..7a61c89a43a --- /dev/null +++ b/src/test/regress/expected/euc_kr.out @@ -0,0 +1,16 @@ +-- This test is about EUC_KR encoding, chosen as perhaps the most prevalent +-- non-UTF8, multibyte encoding as of 2026-01. Since UTF8 can represent all +-- of EUC_KR, also run the test in UTF8. +SELECT getdatabaseencoding() NOT IN ('EUC_KR', 'UTF8') AS skip_test \gset +\if :skip_test +\quit +\endif +-- Exercise is_multibyte_char_in_char (non-UTF8) slow path. +SELECT POSITION( + convert_from('\xbcf6c7d0', 'EUC_KR') IN + convert_from('\xb0fac7d02c20bcf6c7d02c20b1e2bcfa2c20bbee', 'EUC_KR')); + position +---------- + 5 +(1 row) + diff --git a/src/test/regress/expected/euc_kr_1.out b/src/test/regress/expected/euc_kr_1.out new file mode 100644 index 00000000000..faaac5d6355 --- /dev/null +++ b/src/test/regress/expected/euc_kr_1.out @@ -0,0 +1,6 @@ +-- This test is about EUC_KR encoding, chosen as perhaps the most prevalent +-- non-UTF8, multibyte encoding as of 2026-01. Since UTF8 can represent all +-- of EUC_KR, also run the test in UTF8. +SELECT getdatabaseencoding() NOT IN ('EUC_KR', 'UTF8') AS skip_test \gset +\if :skip_test +\quit diff --git a/src/test/regress/expected/stats_ext.out b/src/test/regress/expected/stats_ext.out index cd6a5e61e67..80bf1733e89 100644 --- a/src/test/regress/expected/stats_ext.out +++ b/src/test/regress/expected/stats_ext.out @@ -3336,6 +3336,40 @@ SELECT statistics_name, most_common_vals FROM pg_stats_ext_exprs x s_expr | {1} (2 rows) +-- CREATE STATISTICS checks for CREATE on the schema +RESET SESSION AUTHORIZATION; +CREATE SCHEMA sts_sch1 CREATE TABLE sts_sch1.tbl (a INT, b INT); +CREATE SCHEMA sts_sch2; +GRANT USAGE ON SCHEMA sts_sch1, sts_sch2 TO regress_stats_user1; +ALTER TABLE sts_sch1.tbl OWNER TO regress_stats_user1; +SET SESSION AUTHORIZATION regress_stats_user1; +CREATE STATISTICS ON a, b FROM sts_sch1.tbl; +ERROR: permission denied for schema sts_sch1 +CREATE STATISTICS sts_sch2.fail ON a, b FROM sts_sch1.tbl; +ERROR: permission denied for schema sts_sch2 +RESET SESSION AUTHORIZATION; +GRANT CREATE ON SCHEMA sts_sch1 TO regress_stats_user1; +SET SESSION AUTHORIZATION regress_stats_user1; +CREATE STATISTICS ON a, b FROM sts_sch1.tbl; +CREATE STATISTICS sts_sch2.fail ON a, b FROM sts_sch1.tbl; +ERROR: permission denied for schema sts_sch2 +RESET SESSION AUTHORIZATION; +REVOKE CREATE ON SCHEMA sts_sch1 FROM regress_stats_user1; +GRANT CREATE ON SCHEMA sts_sch2 TO regress_stats_user1; +SET SESSION AUTHORIZATION regress_stats_user1; +CREATE STATISTICS ON a, b FROM sts_sch1.tbl; +ERROR: permission denied for schema sts_sch1 +CREATE STATISTICS sts_sch2.pass1 ON a, b FROM sts_sch1.tbl; +RESET SESSION AUTHORIZATION; +GRANT CREATE ON SCHEMA sts_sch1, sts_sch2 TO regress_stats_user1; +SET SESSION AUTHORIZATION regress_stats_user1; +CREATE STATISTICS ON a, b FROM sts_sch1.tbl; +CREATE STATISTICS sts_sch2.pass2 ON a, b FROM sts_sch1.tbl; +-- re-creating statistics via ALTER TABLE bypasses checks for CREATE on schema +RESET SESSION AUTHORIZATION; +REVOKE CREATE ON SCHEMA sts_sch1, sts_sch2 FROM regress_stats_user1; +SET SESSION AUTHORIZATION regress_stats_user1; +ALTER TABLE sts_sch1.tbl ALTER COLUMN a TYPE SMALLINT; -- Tidy up DROP OPERATOR <<< (int, int); DROP FUNCTION op_leak(int, int); @@ -3345,6 +3379,8 @@ DROP SCHEMA tststats CASCADE; NOTICE: drop cascades to 2 other objects DETAIL: drop cascades to table tststats.priv_test_tbl drop cascades to view tststats.priv_test_view +DROP SCHEMA sts_sch1, sts_sch2 CASCADE; +NOTICE: drop cascades to table sts_sch1.tbl DROP USER regress_stats_user1; -- test analyze with extended statistics CREATE TABLE tbl_issue1293 (col1 int, col2 int); diff --git a/src/test/regress/expected/stats_ext_optimizer.out b/src/test/regress/expected/stats_ext_optimizer.out index 97513665ae1..cf187147fb9 100644 --- a/src/test/regress/expected/stats_ext_optimizer.out +++ b/src/test/regress/expected/stats_ext_optimizer.out @@ -3371,6 +3371,40 @@ SELECT statistics_name, most_common_vals FROM pg_stats_ext_exprs x s_expr | {1} (2 rows) +-- CREATE STATISTICS checks for CREATE on the schema +RESET SESSION AUTHORIZATION; +CREATE SCHEMA sts_sch1 CREATE TABLE sts_sch1.tbl (a INT, b INT); +CREATE SCHEMA sts_sch2; +GRANT USAGE ON SCHEMA sts_sch1, sts_sch2 TO regress_stats_user1; +ALTER TABLE sts_sch1.tbl OWNER TO regress_stats_user1; +SET SESSION AUTHORIZATION regress_stats_user1; +CREATE STATISTICS ON a, b FROM sts_sch1.tbl; +ERROR: permission denied for schema sts_sch1 +CREATE STATISTICS sts_sch2.fail ON a, b FROM sts_sch1.tbl; +ERROR: permission denied for schema sts_sch2 +RESET SESSION AUTHORIZATION; +GRANT CREATE ON SCHEMA sts_sch1 TO regress_stats_user1; +SET SESSION AUTHORIZATION regress_stats_user1; +CREATE STATISTICS ON a, b FROM sts_sch1.tbl; +CREATE STATISTICS sts_sch2.fail ON a, b FROM sts_sch1.tbl; +ERROR: permission denied for schema sts_sch2 +RESET SESSION AUTHORIZATION; +REVOKE CREATE ON SCHEMA sts_sch1 FROM regress_stats_user1; +GRANT CREATE ON SCHEMA sts_sch2 TO regress_stats_user1; +SET SESSION AUTHORIZATION regress_stats_user1; +CREATE STATISTICS ON a, b FROM sts_sch1.tbl; +ERROR: permission denied for schema sts_sch1 +CREATE STATISTICS sts_sch2.pass1 ON a, b FROM sts_sch1.tbl; +RESET SESSION AUTHORIZATION; +GRANT CREATE ON SCHEMA sts_sch1, sts_sch2 TO regress_stats_user1; +SET SESSION AUTHORIZATION regress_stats_user1; +CREATE STATISTICS ON a, b FROM sts_sch1.tbl; +CREATE STATISTICS sts_sch2.pass2 ON a, b FROM sts_sch1.tbl; +-- re-creating statistics via ALTER TABLE bypasses checks for CREATE on schema +RESET SESSION AUTHORIZATION; +REVOKE CREATE ON SCHEMA sts_sch1, sts_sch2 FROM regress_stats_user1; +SET SESSION AUTHORIZATION regress_stats_user1; +ALTER TABLE sts_sch1.tbl ALTER COLUMN a TYPE SMALLINT; -- Tidy up DROP OPERATOR <<< (int, int); DROP FUNCTION op_leak(int, int); @@ -3380,6 +3414,8 @@ DROP SCHEMA tststats CASCADE; NOTICE: drop cascades to 2 other objects DETAIL: drop cascades to table tststats.priv_test_tbl drop cascades to view tststats.priv_test_view +DROP SCHEMA sts_sch1, sts_sch2 CASCADE; +NOTICE: drop cascades to table sts_sch1.tbl DROP USER regress_stats_user1; -- test analyze with extended statistics CREATE TABLE tbl_issue1293 (col1 int, col2 int); diff --git a/src/test/regress/parallel_schedule b/src/test/regress/parallel_schedule index bc5918a22b8..c4b5a58713c 100644 --- a/src/test/regress/parallel_schedule +++ b/src/test/regress/parallel_schedule @@ -32,7 +32,7 @@ test: strings md5 numerology point lseg line box path polygon circle date time t # geometry depends on point, lseg, line, box, path, polygon, circle # horology depends on date, time, timetz, timestamp, timestamptz, interval # ---------- -test: geometry horology tstypes regex type_sanity opr_sanity misc_sanity comments expressions unicode xid mvcc database +test: geometry horology tstypes regex type_sanity opr_sanity misc_sanity comments expressions unicode xid mvcc database encoding euc_kr # ---------- # Load huge amounts of data diff --git a/src/test/regress/regress.c b/src/test/regress/regress.c index 0fc787c1aaf..cd4d1df4ef0 100644 --- a/src/test/regress/regress.c +++ b/src/test/regress/regress.c @@ -1285,6 +1285,145 @@ test_enc_conversion(PG_FUNCTION_ARGS) PG_RETURN_DATUM(HeapTupleGetDatum(tuple)); } +/* Convert bytea to text without validation for corruption tests from SQL. */ +PG_FUNCTION_INFO_V1(test_bytea_to_text); +Datum +test_bytea_to_text(PG_FUNCTION_ARGS) +{ + PG_RETURN_TEXT_P(PG_GETARG_BYTEA_PP(0)); +} + +/* And the reverse. */ +PG_FUNCTION_INFO_V1(test_text_to_bytea); +Datum +test_text_to_bytea(PG_FUNCTION_ARGS) +{ + PG_RETURN_BYTEA_P(PG_GETARG_TEXT_PP(0)); +} + +/* Corruption tests in C. */ +PG_FUNCTION_INFO_V1(test_mblen_func); +Datum +test_mblen_func(PG_FUNCTION_ARGS) +{ + const char *func = text_to_cstring(PG_GETARG_BYTEA_PP(0)); + const char *encoding = text_to_cstring(PG_GETARG_BYTEA_PP(1)); + text *string = PG_GETARG_BYTEA_PP(2); + int offset = PG_GETARG_INT32(3); + const char *data = VARDATA_ANY(string); + size_t size = VARSIZE_ANY_EXHDR(string); + int result = 0; + + if (strcmp(func, "pg_mblen_unbounded") == 0) + result = pg_mblen_unbounded(data + offset); + else if (strcmp(func, "pg_mblen_cstr") == 0) + result = pg_mblen_cstr(data + offset); + else if (strcmp(func, "pg_mblen_with_len") == 0) + result = pg_mblen_with_len(data + offset, size - offset); + else if (strcmp(func, "pg_mblen_range") == 0) + result = pg_mblen_range(data + offset, data + size); + else if (strcmp(func, "pg_encoding_mblen") == 0) + result = pg_encoding_mblen(pg_char_to_encoding(encoding), data + offset); + else + elog(ERROR, "unknown function"); + + PG_RETURN_INT32(result); +} + +PG_FUNCTION_INFO_V1(test_text_to_wchars); +Datum +test_text_to_wchars(PG_FUNCTION_ARGS) +{ + const char *encoding_name = text_to_cstring(PG_GETARG_BYTEA_PP(0)); + text *string = PG_GETARG_TEXT_PP(1); + const char *data = VARDATA_ANY(string); + size_t size = VARSIZE_ANY_EXHDR(string); + pg_wchar *wchars = palloc(sizeof(pg_wchar) * (size + 1)); + Datum *datums; + int wlen; + int encoding; + + encoding = pg_char_to_encoding(encoding_name); + if (encoding < 0) + elog(ERROR, "unknown encoding name: %s", encoding_name); + + if (size > 0) + { + datums = palloc(sizeof(Datum) * size); + wlen = pg_encoding_mb2wchar_with_len(encoding, + data, + wchars, + size); + Assert(wlen >= 0); + Assert(wlen <= size); + Assert(wchars[wlen] == 0); + + for (int i = 0; i < wlen; ++i) + datums[i] = UInt32GetDatum(wchars[i]); + } + else + { + datums = NULL; + wlen = 0; + } + + PG_RETURN_ARRAYTYPE_P(construct_array_builtin(datums, wlen, INT4OID)); +} + +PG_FUNCTION_INFO_V1(test_wchars_to_text); +Datum +test_wchars_to_text(PG_FUNCTION_ARGS) +{ + const char *encoding_name = text_to_cstring(PG_GETARG_BYTEA_PP(0)); + ArrayType *array = PG_GETARG_ARRAYTYPE_P(1); + Datum *datums; + bool *nulls; + char *mb; + text *result; + int wlen; + int bytes; + int encoding; + + encoding = pg_char_to_encoding(encoding_name); + if (encoding < 0) + elog(ERROR, "unknown encoding name: %s", encoding_name); + + deconstruct_array_builtin(array, INT4OID, &datums, &nulls, &wlen); + + if (wlen > 0) + { + pg_wchar *wchars = palloc(sizeof(pg_wchar) * wlen); + + for (int i = 0; i < wlen; ++i) + { + if (nulls[i]) + elog(ERROR, "unexpected NULL in array"); + wchars[i] = DatumGetInt32(datums[i]); + } + + mb = palloc(pg_encoding_max_length(encoding) * wlen + 1); + bytes = pg_encoding_wchar2mb_with_len(encoding, wchars, mb, wlen); + } + else + { + mb = ""; + bytes = 0; + } + + result = palloc(bytes + VARHDRSZ); + SET_VARSIZE(result, bytes + VARHDRSZ); + memcpy(VARDATA(result), mb, bytes); + + PG_RETURN_TEXT_P(result); +} + +PG_FUNCTION_INFO_V1(test_valid_server_encoding); +Datum +test_valid_server_encoding(PG_FUNCTION_ARGS) +{ + return pg_valid_server_encoding(text_to_cstring(PG_GETARG_TEXT_PP(0))); +} + /* Provide SQL access to IsBinaryCoercible() */ PG_FUNCTION_INFO_V1(binary_coercible); Datum diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql index aabbd6f92de..075e6949846 100644 --- a/src/test/regress/sql/arrays.sql +++ b/src/test/regress/sql/arrays.sql @@ -502,6 +502,10 @@ select array[]::text[]; select '[0:1]={1.1,2.2}'::float8[]; -- all of the above should be accepted +-- some day we might allow these cases, but for now they're errors: +select array[]::oidvector; +select array[]::int2vector; + -- tests for array aggregates CREATE TEMP TABLE arraggtest ( f1 INT[], f2 TEXT[][], f3 FLOAT[]) DISTRIBUTED RANDOMLY; diff --git a/src/test/regress/sql/encoding.sql b/src/test/regress/sql/encoding.sql new file mode 100644 index 00000000000..b9543c0cb32 --- /dev/null +++ b/src/test/regress/sql/encoding.sql @@ -0,0 +1,228 @@ +/* skip test if not UTF8 server encoding */ +SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset +\if :skip_test +\quit +\endif + +\getenv libdir PG_LIBDIR +\getenv dlsuffix PG_DLSUFFIX + +\set regresslib :libdir '/regress' :dlsuffix + +CREATE FUNCTION test_bytea_to_text(bytea) RETURNS text + AS :'regresslib' LANGUAGE C STRICT; +CREATE FUNCTION test_text_to_bytea(text) RETURNS bytea + AS :'regresslib' LANGUAGE C STRICT; +CREATE FUNCTION test_mblen_func(text, text, text, int) RETURNS int + AS :'regresslib' LANGUAGE C STRICT; +CREATE FUNCTION test_text_to_wchars(text, text) RETURNS int[] + AS :'regresslib' LANGUAGE C STRICT; +CREATE FUNCTION test_wchars_to_text(text, int[]) RETURNS text + AS :'regresslib' LANGUAGE C STRICT; +CREATE FUNCTION test_valid_server_encoding(text) RETURNS boolean + AS :'regresslib' LANGUAGE C STRICT; + + +CREATE TABLE regress_encoding(good text, truncated text, with_nul text, truncated_with_nul text); +INSERT INTO regress_encoding +VALUES ('café', + 'caf' || test_bytea_to_text('\xc3'), + 'café' || test_bytea_to_text('\x00') || 'dcba', + 'caf' || test_bytea_to_text('\xc300') || 'dcba'); + +SELECT good, truncated, with_nul FROM regress_encoding; + +SELECT length(good) FROM regress_encoding; +SELECT substring(good, 3, 1) FROM regress_encoding; +SELECT substring(good, 4, 1) FROM regress_encoding; +SELECT regexp_replace(good, '^caf(.)$', '\1') FROM regress_encoding; +SELECT reverse(good) FROM regress_encoding; + +-- invalid short mb character = error +SELECT length(truncated) FROM regress_encoding; +SELECT substring(truncated, 1, 1) FROM regress_encoding; +SELECT reverse(truncated) FROM regress_encoding; +-- invalid short mb character = silently dropped +SELECT regexp_replace(truncated, '^caf(.)$', '\1') FROM regress_encoding; + +-- PostgreSQL doesn't allow strings to contain NUL. If a corrupted string +-- contains NUL at a character boundary position, some functions treat it as a +-- character while others treat it as a terminator, as implementation details. + +-- NUL = terminator +SELECT length(with_nul) FROM regress_encoding; +SELECT substring(with_nul, 3, 1) FROM regress_encoding; +SELECT substring(with_nul, 4, 1) FROM regress_encoding; +SELECT substring(with_nul, 5, 1) FROM regress_encoding; +SELECT convert_to(substring(with_nul, 5, 1), 'UTF8') FROM regress_encoding; +SELECT regexp_replace(with_nul, '^caf(.)$', '\1') FROM regress_encoding; +-- NUL = character +SELECT with_nul, reverse(with_nul), reverse(reverse(with_nul)) FROM regress_encoding; + +-- If a corrupted string contains NUL in the tail bytes of a multibyte +-- character (invalid in all encodings), it is considered part of the +-- character for length purposes. An error will only be raised in code paths +-- that convert or verify encodings. + +SELECT length(truncated_with_nul) FROM regress_encoding; +SELECT substring(truncated_with_nul, 3, 1) FROM regress_encoding; +SELECT substring(truncated_with_nul, 4, 1) FROM regress_encoding; +SELECT convert_to(substring(truncated_with_nul, 4, 1), 'UTF8') FROM regress_encoding; +SELECT substring(truncated_with_nul, 5, 1) FROM regress_encoding; +SELECT regexp_replace(truncated_with_nul, '^caf(.)dcba$', '\1') = test_bytea_to_text('\xc300') FROM regress_encoding; +SELECT reverse(truncated_with_nul) FROM regress_encoding; + +-- unbounded: sequence would overrun the string! +SELECT test_mblen_func('pg_mblen_unbounded', 'UTF8', truncated, 3) +FROM regress_encoding; + +-- condition detected when using the length/range variants +SELECT test_mblen_func('pg_mblen_with_len', 'UTF8', truncated, 3) +FROM regress_encoding; +SELECT test_mblen_func('pg_mblen_range', 'UTF8', truncated, 3) +FROM regress_encoding; + +-- unbounded: sequence would overrun the string, if the terminator were really +-- the end of it +SELECT test_mblen_func('pg_mblen_unbounded', 'UTF8', truncated_with_nul, 3) +FROM regress_encoding; +SELECT test_mblen_func('pg_encoding_mblen', 'GB18030', truncated_with_nul, 3) +FROM regress_encoding; + +-- condition detected when using the cstr variants +SELECT test_mblen_func('pg_mblen_cstr', 'UTF8', truncated_with_nul, 3) +FROM regress_encoding; + +DROP TABLE regress_encoding; + +-- mb<->wchar conversions +CREATE FUNCTION test_encoding(encoding text, description text, input bytea) +RETURNS VOID LANGUAGE plpgsql AS +$$ +DECLARE + prefix text; + len int; + wchars int[]; + round_trip bytea; + result text; +BEGIN + prefix := rpad(encoding || ' ' || description || ':', 28); + + -- XXX could also test validation, length functions and include client + -- only encodings with these test cases + + IF test_valid_server_encoding(encoding) THEN + wchars := test_text_to_wchars(encoding, test_bytea_to_text(input)); + round_trip = test_text_to_bytea(test_wchars_to_text(encoding, wchars)); + if input = round_trip then + result := 'OK'; + elsif length(input) > length(round_trip) and round_trip = substr(input, 1, length(round_trip)) then + result := 'truncated'; + else + result := 'failed'; + end if; + RAISE NOTICE '% % -> % -> % = %', prefix, input, wchars, round_trip, result; + END IF; +END; +$$; +-- No validation is done on the encoding itself, just the length to avoid +-- overruns, so some of the byte sequences below are bogus. They cover +-- all code branches, server encodings only for now. +CREATE TABLE encoding_tests (encoding text, description text, input bytea); +INSERT INTO encoding_tests VALUES + -- LATIN1, other single-byte encodings + ('LATIN1', 'ASCII', 'a'), + ('LATIN1', 'extended', '\xe9'), + -- EUC_JP, EUC_JIS_2004, EUR_KR (for the purposes of wchar conversion): + -- 2 8e (CS2, not used by EUR_KR but arbitrarily considered to have EUC_JP length) + -- 3 8f (CS3, not used by EUR_KR but arbitrarily considered to have EUC_JP length) + -- 2 80..ff (CS1) + ('EUC_JP', 'ASCII', 'a'), + ('EUC_JP', 'CS1, short', '\x80'), + ('EUC_JP', 'CS1', '\x8002'), + ('EUC_JP', 'CS2, short', '\x8e'), + ('EUC_JP', 'CS2', '\x8e02'), + ('EUC_JP', 'CS3, short', '\x8f'), + ('EUC_JP', 'CS3, short', '\x8f02'), + ('EUC_JP', 'CS3', '\x8f0203'), + -- EUC_CN + -- 3 8e (CS2, not used but arbitrarily considered to have length 3) + -- 3 8f (CS3, not used but arbitrarily considered to have length 3) + -- 2 80..ff (CS1) + ('EUC_CN', 'ASCII', 'a'), + ('EUC_CN', 'CS1, short', '\x80'), + ('EUC_CN', 'CS1', '\x8002'), + ('EUC_CN', 'CS2, short', '\x8e'), + ('EUC_CN', 'CS2, short', '\x8e02'), + ('EUC_CN', 'CS2', '\x8e0203'), + ('EUC_CN', 'CS3, short', '\x8f'), + ('EUC_CN', 'CS3, short', '\x8f02'), + ('EUC_CN', 'CS3', '\x8f0203'), + -- EUC_TW: + -- 4 8e (CS2) + -- 3 8f (CS3, not used but arbitrarily considered to have length 3) + -- 2 80..ff (CS1) + ('EUC_TW', 'ASCII', 'a'), + ('EUC_TW', 'CS1, short', '\x80'), + ('EUC_TW', 'CS1', '\x8002'), + ('EUC_TW', 'CS2, short', '\x8e'), + ('EUC_TW', 'CS2, short', '\x8e02'), + ('EUC_TW', 'CS2, short', '\x8e0203'), + ('EUC_TW', 'CS2', '\x8e020304'), + ('EUC_TW', 'CS3, short', '\x8f'), + ('EUC_TW', 'CS3, short', '\x8f02'), + ('EUC_TW', 'CS3', '\x8f0203'), + -- UTF8 + -- 2 c0..df + -- 3 e0..ef + -- 4 f0..f7 (but maximum real codepoint U+10ffff has f4) + -- 5 f8..fb (not supported) + -- 6 fc..fd (not supported) + ('UTF8', 'ASCII', 'a'), + ('UTF8', '2 byte, short', '\xdf'), + ('UTF8', '2 byte', '\xdf82'), + ('UTF8', '3 byte, short', '\xef'), + ('UTF8', '3 byte, short', '\xef82'), + ('UTF8', '3 byte', '\xef8283'), + ('UTF8', '4 byte, short', '\xf7'), + ('UTF8', '4 byte, short', '\xf782'), + ('UTF8', '4 byte, short', '\xf78283'), + ('UTF8', '4 byte', '\xf7828384'), + ('UTF8', '5 byte, unsupported', '\xfb'), + ('UTF8', '5 byte, unsupported', '\xfb82'), + ('UTF8', '5 byte, unsupported', '\xfb8283'), + ('UTF8', '5 byte, unsupported', '\xfb828384'), + ('UTF8', '5 byte, unsupported', '\xfb82838485'), + ('UTF8', '6 byte, unsupported', '\xfd'), + ('UTF8', '6 byte, unsupported', '\xfd82'), + ('UTF8', '6 byte, unsupported', '\xfd8283'), + ('UTF8', '6 byte, unsupported', '\xfd828384'), + ('UTF8', '6 byte, unsupported', '\xfd82838485'), + ('UTF8', '6 byte, unsupported', '\xfd8283848586'), + -- MULE_INTERNAL + -- 2 81..8d LC1 + -- 3 90..99 LC2 + ('MULE_INTERNAL', 'ASCII', 'a'), + ('MULE_INTERNAL', 'LC1, short', '\x81'), + ('MULE_INTERNAL', 'LC1', '\x8182'), + ('MULE_INTERNAL', 'LC2, short', '\x90'), + ('MULE_INTERNAL', 'LC2, short', '\x9082'), + ('MULE_INTERNAL', 'LC2', '\x908283'); + +SELECT COUNT(test_encoding(encoding, description, input)) > 0 +FROM encoding_tests; + +DROP TABLE encoding_tests; +DROP FUNCTION test_encoding; +DROP FUNCTION test_text_to_wchars; +DROP FUNCTION test_mblen_func; +DROP FUNCTION test_bytea_to_text; +DROP FUNCTION test_text_to_bytea; + + +-- substring slow path: multi-byte escape char vs. multi-byte pattern char. +SELECT SUBSTRING('a' SIMILAR U&'\00AC' ESCAPE U&'\00A7'); +-- Levenshtein distance metric: exercise character length cache. +SELECT U&"real\00A7_name" FROM (select 1) AS x(real_name); +-- JSON errcontext: truncate long data. +SELECT repeat(U&'\00A7', 30)::json; diff --git a/src/test/regress/sql/euc_kr.sql b/src/test/regress/sql/euc_kr.sql new file mode 100644 index 00000000000..1851b2a8c14 --- /dev/null +++ b/src/test/regress/sql/euc_kr.sql @@ -0,0 +1,12 @@ +-- This test is about EUC_KR encoding, chosen as perhaps the most prevalent +-- non-UTF8, multibyte encoding as of 2026-01. Since UTF8 can represent all +-- of EUC_KR, also run the test in UTF8. +SELECT getdatabaseencoding() NOT IN ('EUC_KR', 'UTF8') AS skip_test \gset +\if :skip_test +\quit +\endif + +-- Exercise is_multibyte_char_in_char (non-UTF8) slow path. +SELECT POSITION( + convert_from('\xbcf6c7d0', 'EUC_KR') IN + convert_from('\xb0fac7d02c20bcf6c7d02c20b1e2bcfa2c20bbee', 'EUC_KR')); diff --git a/src/test/regress/sql/stats_ext.sql b/src/test/regress/sql/stats_ext.sql index db9196b3918..5ac9587c8fa 100644 --- a/src/test/regress/sql/stats_ext.sql +++ b/src/test/regress/sql/stats_ext.sql @@ -1698,12 +1698,45 @@ SELECT statistics_name, most_common_vals FROM pg_stats_ext x SELECT statistics_name, most_common_vals FROM pg_stats_ext_exprs x WHERE tablename = 'stats_ext_tbl' ORDER BY ROW(x.*); +-- CREATE STATISTICS checks for CREATE on the schema +RESET SESSION AUTHORIZATION; +CREATE SCHEMA sts_sch1 CREATE TABLE sts_sch1.tbl (a INT, b INT); +CREATE SCHEMA sts_sch2; +GRANT USAGE ON SCHEMA sts_sch1, sts_sch2 TO regress_stats_user1; +ALTER TABLE sts_sch1.tbl OWNER TO regress_stats_user1; +SET SESSION AUTHORIZATION regress_stats_user1; +CREATE STATISTICS ON a, b FROM sts_sch1.tbl; +CREATE STATISTICS sts_sch2.fail ON a, b FROM sts_sch1.tbl; +RESET SESSION AUTHORIZATION; +GRANT CREATE ON SCHEMA sts_sch1 TO regress_stats_user1; +SET SESSION AUTHORIZATION regress_stats_user1; +CREATE STATISTICS ON a, b FROM sts_sch1.tbl; +CREATE STATISTICS sts_sch2.fail ON a, b FROM sts_sch1.tbl; +RESET SESSION AUTHORIZATION; +REVOKE CREATE ON SCHEMA sts_sch1 FROM regress_stats_user1; +GRANT CREATE ON SCHEMA sts_sch2 TO regress_stats_user1; +SET SESSION AUTHORIZATION regress_stats_user1; +CREATE STATISTICS ON a, b FROM sts_sch1.tbl; +CREATE STATISTICS sts_sch2.pass1 ON a, b FROM sts_sch1.tbl; +RESET SESSION AUTHORIZATION; +GRANT CREATE ON SCHEMA sts_sch1, sts_sch2 TO regress_stats_user1; +SET SESSION AUTHORIZATION regress_stats_user1; +CREATE STATISTICS ON a, b FROM sts_sch1.tbl; +CREATE STATISTICS sts_sch2.pass2 ON a, b FROM sts_sch1.tbl; + +-- re-creating statistics via ALTER TABLE bypasses checks for CREATE on schema +RESET SESSION AUTHORIZATION; +REVOKE CREATE ON SCHEMA sts_sch1, sts_sch2 FROM regress_stats_user1; +SET SESSION AUTHORIZATION regress_stats_user1; +ALTER TABLE sts_sch1.tbl ALTER COLUMN a TYPE SMALLINT; + -- Tidy up DROP OPERATOR <<< (int, int); DROP FUNCTION op_leak(int, int); RESET SESSION AUTHORIZATION; DROP TABLE stats_ext_tbl; DROP SCHEMA tststats CASCADE; +DROP SCHEMA sts_sch1, sts_sch2 CASCADE; DROP USER regress_stats_user1; -- test analyze with extended statistics diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 97ae28337d3..0b1b1df3b4a 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -737,6 +737,7 @@ ExtensibleNodeEntry ExtensibleNodeMethods ExtensionControlFile ExtensionInfo +ExtensionSiblingCache ExtensionVersionInfo FDWCollateState FD_SET