From 8b66f0f74cc0b7cb9b0668c7deaee633b1b42c25 Mon Sep 17 00:00:00 2001 From: DuckDB Labs GitHub Bot Date: Wed, 1 Apr 2026 05:50:54 +0000 Subject: [PATCH] Update vendored DuckDB sources to 699249af49 --- src/duckdb/src/catalog/dependency_manager.cpp | 3 +- src/duckdb/src/common/hive_partitioning.cpp | 6 +- .../common/multi_file/multi_file_reader.cpp | 3 + .../operator/join/physical_iejoin.cpp | 6 +- .../function/table/version/pragma_version.cpp | 6 +- .../duckdb/common/hive_partitioning.hpp | 2 + .../optimizer/topn_window_elimination.hpp | 2 + .../src/optimizer/topn_window_elimination.cpp | 55 ++++++++++--------- .../planner/binder/statement/bind_insert.cpp | 8 ++- .../table/variant/variant_shredding.cpp | 20 ++++--- .../src/storage/table/variant_column_data.cpp | 19 +------ 11 files changed, 66 insertions(+), 64 deletions(-) diff --git a/src/duckdb/src/catalog/dependency_manager.cpp b/src/duckdb/src/catalog/dependency_manager.cpp index 92d58991f..5d064f497 100644 --- a/src/duckdb/src/catalog/dependency_manager.cpp +++ b/src/duckdb/src/catalog/dependency_manager.cpp @@ -647,7 +647,8 @@ void DependencyManager::AlterObject(CatalogTransaction transaction, CatalogEntry disallow_alter = false; break; } - case AlterTableType::ADD_COLUMN: { + case AlterTableType::ADD_COLUMN: + case AlterTableType::SET_DEFAULT: { disallow_alter = false; break; } diff --git a/src/duckdb/src/common/hive_partitioning.cpp b/src/duckdb/src/common/hive_partitioning.cpp index a4ae9486a..71e5bf941 100644 --- a/src/duckdb/src/common/hive_partitioning.cpp +++ b/src/duckdb/src/common/hive_partitioning.cpp @@ -88,6 +88,10 @@ string HivePartitioning::Unescape(const string &input) { return StringUtil::URLDecode(input); } +bool HivePartitioning::IsNull(const string &input) { + return StringUtil::CIEquals(input, "NULL") || input == "__HIVE_DEFAULT_PARTITION__"; +} + // matches hive partitions in file name. For example: // - s3://bucket/var1=value1/bla/bla/var2=value2 // - http(s)://domain(:port)/lala/kasdl/var1=value1/?not-a-var=not-a-value @@ -126,7 +130,7 @@ std::map HivePartitioning::Parse(const string &filename) { Value HivePartitioning::GetValue(ClientContext &context, const string &key, const string &str_val, const LogicalType &type) { // Handle nulls - if (StringUtil::CIEquals(str_val, "NULL") || str_val == "__HIVE_DEFAULT_PARTITION__") { + if (IsNull(str_val)) { return Value(type); } if (type.id() == LogicalTypeId::VARCHAR) { diff --git a/src/duckdb/src/common/multi_file/multi_file_reader.cpp b/src/duckdb/src/common/multi_file/multi_file_reader.cpp index b5a160694..bbeeec309 100644 --- a/src/duckdb/src/common/multi_file/multi_file_reader.cpp +++ b/src/duckdb/src/common/multi_file/multi_file_reader.cpp @@ -717,6 +717,9 @@ void MultiFileOptions::AutoDetectHiveTypesInternal(MultiFileList &files, ClientC // type was explicitly provided by the user continue; } + if (HivePartitioning::IsNull(part.second)) { + continue; // don't update detected_types for this partition/file + } LogicalType detected_type = LogicalType::VARCHAR; Value value(part.second); for (auto &candidate : candidates) { diff --git a/src/duckdb/src/execution/operator/join/physical_iejoin.cpp b/src/duckdb/src/execution/operator/join/physical_iejoin.cpp index fd392f3b9..40b2c3b94 100644 --- a/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +++ b/src/duckdb/src/execution/operator/join/physical_iejoin.cpp @@ -161,7 +161,7 @@ SinkResultType PhysicalIEJoin::Sink(ExecutionContext &context, DataChunk &chunk, gstate.Sink(context, chunk, lstate); - if (filter_pushdown && !gstate.skip_filter_pushdown) { + if (filter_pushdown && !gstate.skip_filter_pushdown && gstate.child == 1) { filter_pushdown->Sink(lstate.table.keys, *lstate.local_filter_state); } @@ -177,7 +177,7 @@ SinkCombineResultType PhysicalIEJoin::Combine(ExecutionContext &context, Operato context.thread.profiler.Flush(*this); client_profiler.Flush(context.thread.profiler); - if (filter_pushdown && !gstate.skip_filter_pushdown) { + if (filter_pushdown && !gstate.skip_filter_pushdown && gstate.child == 1) { filter_pushdown->Combine(*gstate.global_filter_state, *lstate.local_filter_state); } @@ -190,7 +190,7 @@ SinkCombineResultType PhysicalIEJoin::Combine(ExecutionContext &context, Operato SinkFinalizeType PhysicalIEJoin::Finalize(Pipeline &pipeline, Event &event, ClientContext &client, OperatorSinkFinalizeInput &input) const { auto &gstate = input.global_state.Cast(); - if (filter_pushdown && !gstate.skip_filter_pushdown) { + if (filter_pushdown && !gstate.skip_filter_pushdown && gstate.child == 1) { (void)filter_pushdown->Finalize(client, nullptr, *gstate.global_filter_state, *this); } auto &table = *gstate.tables[gstate.child]; diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp index c0fb8585e..0480bc99e 100644 --- a/src/duckdb/src/function/table/version/pragma_version.cpp +++ b/src/duckdb/src/function/table/version/pragma_version.cpp @@ -1,5 +1,5 @@ #ifndef DUCKDB_PATCH_VERSION -#define DUCKDB_PATCH_VERSION "2-dev302" +#define DUCKDB_PATCH_VERSION "2-dev329" #endif #ifndef DUCKDB_MINOR_VERSION #define DUCKDB_MINOR_VERSION 5 @@ -8,10 +8,10 @@ #define DUCKDB_MAJOR_VERSION 1 #endif #ifndef DUCKDB_VERSION -#define DUCKDB_VERSION "v1.5.2-dev302" +#define DUCKDB_VERSION "v1.5.2-dev329" #endif #ifndef DUCKDB_SOURCE_ID -#define DUCKDB_SOURCE_ID "ddba343762" +#define DUCKDB_SOURCE_ID "699249af49" #endif #include "duckdb/function/table/system_functions.hpp" #include "duckdb/main/database.hpp" diff --git a/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp b/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp index f99bfe677..0f04e47d9 100644 --- a/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp +++ b/src/duckdb/src/include/duckdb/common/hive_partitioning.hpp @@ -46,6 +46,8 @@ class HivePartitioning { DUCKDB_API static string Escape(const string &input); //! Unescape a hive partition key or value encoded using URL encoding DUCKDB_API static string Unescape(const string &input); + //! Whether the column is "NULL"/"__HIVE_DEFAULT_PARTITION" + DUCKDB_API static bool IsNull(const string &input); }; struct HivePartitionKey { diff --git a/src/duckdb/src/include/duckdb/optimizer/topn_window_elimination.hpp b/src/duckdb/src/include/duckdb/optimizer/topn_window_elimination.hpp index 18b5dfea4..a94b006d1 100644 --- a/src/duckdb/src/include/duckdb/optimizer/topn_window_elimination.hpp +++ b/src/duckdb/src/include/duckdb/optimizer/topn_window_elimination.hpp @@ -75,6 +75,8 @@ class TopNWindowElimination : public BaseColumnPruner { const TopNWindowEliminationParameters ¶ms); bool CanUseLateMaterialization(const LogicalWindow &window, vector> &args, vector &projections, vector> &stack); + bool ExtractSingleBinding(unique_ptr *expr, ColumnBinding &binding, + bool require_direct_column_ref = false); private: ClientContext &context; diff --git a/src/duckdb/src/optimizer/topn_window_elimination.cpp b/src/duckdb/src/optimizer/topn_window_elimination.cpp index 0fbbeab98..75c73a656 100644 --- a/src/duckdb/src/optimizer/topn_window_elimination.cpp +++ b/src/duckdb/src/optimizer/topn_window_elimination.cpp @@ -122,7 +122,7 @@ string GetLHSRowIdColumnName(const unique_ptr &op, idx_t column if (op.get()->type != LogicalOperatorType::LOGICAL_GET) { D_ASSERT(op.get()->type == LogicalOperatorType::LOGICAL_PROJECTION); - D_ASSERT(op.get()->expressions.size() >= column_id && + D_ASSERT(op.get()->expressions.size() > column_id && op.get()->expressions[column_id]->type == ExpressionType::BOUND_COLUMN_REF); const auto &colref = op.get()->expressions[column_id]->Cast(); column_id = colref.binding.column_index; @@ -139,6 +139,7 @@ string GetLHSRowIdColumnName(const unique_ptr &op, idx_t column TopNWindowElimination::TopNWindowElimination(ClientContext &context_p, Optimizer &optimizer, optional_ptr>> stats_p) : context(context_p), optimizer(optimizer), stats(stats_p) { + D_ASSERT(stats); } unique_ptr TopNWindowElimination::Optimize(unique_ptr op) { @@ -302,13 +303,11 @@ TopNWindowElimination::CreateAggregateOperator(LogicalWindow &window, vectorgroups[i]; if (group->type == ExpressionType::BOUND_COLUMN_REF) { auto &column_ref = group->Cast(); - if (stats) { - auto group_stats = stats->find(column_ref.binding); - if (group_stats == stats->end()) { - continue; - } - aggregate->group_stats[i] = group_stats->second->ToUnique(); + auto group_stats = stats->find(column_ref.binding); + if (group_stats == stats->end()) { + continue; } + aggregate->group_stats[i] = group_stats->second->ToUnique(); } } @@ -775,6 +774,7 @@ TopNWindowElimination::ExtractOptimizerParameters(const LogicalWindow &window, c const auto &column_stats = stats->find(column_ref.first); if (column_stats == stats->end() || column_stats->second->CanHaveNull()) { params.can_be_null = true; + break; } } column_references.clear(); @@ -782,32 +782,37 @@ TopNWindowElimination::ExtractOptimizerParameters(const LogicalWindow &window, c return params; } +bool TopNWindowElimination::ExtractSingleBinding(unique_ptr *expr, ColumnBinding &binding, + const bool require_direct_column_ref) { + if (require_direct_column_ref && expr->get()->type != ExpressionType::BOUND_COLUMN_REF) { + return false; + } + VisitExpression(expr); + if (column_references.size() != 1) { + column_references.clear(); + return false; + } + binding = column_references.begin()->first; + column_references.clear(); + return true; +} + bool TopNWindowElimination::CanUseLateMaterialization(const LogicalWindow &window, vector> &args, vector &lhs_projections, vector> &stack) { auto &window_expr = window.expressions[0]->Cast(); vector projections(window_expr.partitions.size() + args.size()); - auto extract_single_binding = [&](unique_ptr *expr, ColumnBinding &binding) { - VisitExpression(expr); - if (column_references.size() != 1) { - column_references.clear(); - return false; - } - binding = column_references.begin()->first; - column_references.clear(); - return true; - }; // Build a projection list for an LHS table scan to recreate the column order of an aggregate with struct packing for (idx_t i = 0; i < window_expr.partitions.size(); i++) { auto &partition = window_expr.partitions[i]; - if (!extract_single_binding(&partition, projections[i])) { + if (!ExtractSingleBinding(&partition, projections[i])) { return false; } } for (idx_t i = 0; i < args.size(); i++) { auto &arg = args[i]; - if (!extract_single_binding(&arg, projections[window_expr.partitions.size() + i])) { + if (!ExtractSingleBinding(&arg, projections[window_expr.partitions.size() + i])) { return false; } } @@ -828,7 +833,7 @@ bool TopNWindowElimination::CanUseLateMaterialization(const LogicalWindow &windo if (projection_idx >= projection.expressions.size()) { return false; } - if (!extract_single_binding(&projection.expressions[projection_idx], projections[i])) { + if (!ExtractSingleBinding(&projection.expressions[projection_idx], projections[i])) { return false; } } @@ -854,11 +859,11 @@ bool TopNWindowElimination::CanUseLateMaterialization(const LogicalWindow &windo return false; } ColumnBinding left_binding; - if (!extract_single_binding(&condition.left, left_binding)) { + if (!ExtractSingleBinding(&condition.left, left_binding, true)) { return false; } ColumnBinding right_binding; - if (!extract_single_binding(&condition.right, right_binding)) { + if (!ExtractSingleBinding(&condition.right, right_binding, true)) { return false; } @@ -1029,10 +1034,8 @@ unique_ptr TopNWindowElimination::TryPrepareLateMaterialization auto &join = op.Cast(); auto &op_child = std::prev(stack_it)->get(); - auto &projection_map = join.left_projection_map; - if (&op_child != &*join.children[0]) { - projection_map = join.right_projection_map; - } + auto &projection_map = RefersToSameObject(op_child, *join.children[0]) ? join.left_projection_map + : join.right_projection_map; for (const auto rowid_idx : rhs_rowid_idxs) { projection_map.push_back(rowid_idx); } diff --git a/src/duckdb/src/planner/binder/statement/bind_insert.cpp b/src/duckdb/src/planner/binder/statement/bind_insert.cpp index 5579d73d7..707e87c87 100644 --- a/src/duckdb/src/planner/binder/statement/bind_insert.cpp +++ b/src/duckdb/src/planner/binder/statement/bind_insert.cpp @@ -521,11 +521,13 @@ BoundStatement Binder::Bind(InsertStatement &stmt) { auto merge_into = GenerateMergeInto(stmt, table); return Bind(*merge_into); } - if (!table.temporary) { + if (table.temporary) { + // Temporary inserts still need a catalog dependency so prepared statements are rebound if the table is dropped. + GetStatementProperties().RegisterDBRead(table.catalog, context); + } else { // inserting into a non-temporary table: alters underlying database - auto &properties = GetStatementProperties(); DatabaseModificationType modification_type = DatabaseModificationType::INSERT_DATA; - properties.RegisterDBModify(table.catalog, context, modification_type); + GetStatementProperties().RegisterDBModify(table.catalog, context, modification_type); } auto insert = make_uniq(table, GenerateTableIndex()); diff --git a/src/duckdb/src/storage/table/variant/variant_shredding.cpp b/src/duckdb/src/storage/table/variant/variant_shredding.cpp index 4d975e61e..f4739ce8b 100644 --- a/src/duckdb/src/storage/table/variant/variant_shredding.cpp +++ b/src/duckdb/src/storage/table/variant/variant_shredding.cpp @@ -391,21 +391,27 @@ static LogicalType SetShreddedType(const LogicalType &typed_value, bool fully_co bool VariantShreddingStats::GetShreddedTypeInternal(const VariantColumnStatsData &column, LogicalType &out_type, optional_idx parent_count) const { - idx_t max_count = 0; - uint8_t type_index = 0; - if (column.type_counts[0] == column.total_count) { + if (parent_count.IsValid() && column.total_count > parent_count.GetIndex()) { + throw InternalException("Column count is larger than parent count - this should not be possible"); + } + auto total_value_count = parent_count.IsValid() ? parent_count.GetIndex() : column.total_count; + const auto null_count = column.type_counts[0]; + if (null_count == column.total_count) { //! All NULL, emit INT32 - out_type = SetShreddedType(LogicalTypeId::INTEGER, true); + auto fully_consistent = null_count == total_value_count; + out_type = SetShreddedType(LogicalTypeId::INTEGER, fully_consistent); return true; } + idx_t max_count = 0; + uint8_t type_index = 0; //! Skip the 'VARIANT_NULL' type, we can't shred on NULL for (uint8_t i = 1; i < static_cast(VariantLogicalType::ENUM_SIZE); i++) { if (i == static_cast(VariantLogicalType::DECIMAL) && !column.decimal_consistent) { //! Can't shred on DECIMAL, not consistent continue; } - idx_t count = column.type_counts[i] + column.type_counts[0]; + idx_t count = column.type_counts[i] + null_count; if (!max_count || count > max_count) { max_count = count; type_index = i; @@ -415,11 +421,7 @@ bool VariantShreddingStats::GetShreddedTypeInternal(const VariantColumnStatsData if (!max_count) { return false; } - if (parent_count.IsValid() && column.total_count > parent_count.GetIndex()) { - throw InternalException("Column count is larger than parent count - this should not be possible"); - } - auto total_value_count = parent_count.IsValid() ? parent_count.GetIndex() : column.total_count; bool fully_consistent = max_count == total_value_count; if (type_index == static_cast(VariantLogicalType::OBJECT)) { child_list_t child_types; diff --git a/src/duckdb/src/storage/table/variant_column_data.cpp b/src/duckdb/src/storage/table/variant_column_data.cpp index d349239c6..a150747a0 100644 --- a/src/duckdb/src/storage/table/variant_column_data.cpp +++ b/src/duckdb/src/storage/table/variant_column_data.cpp @@ -396,24 +396,7 @@ void VariantColumnData::Append(BaseStatistics &stats, ColumnAppendState &state, validity->Append(stats, state.child_appends[0], vector, count); if (IsShredded()) { - auto &unshredded_type = sub_columns[0]->type; - auto &shredded_type = sub_columns[1]->type; - - auto variant_shredded_type = LogicalType::STRUCT({ - {"unshredded", unshredded_type}, - {"shredded", shredded_type}, - }); - Vector append_vector(variant_shredded_type, count); - - VariantShreddedAppendInput append_data { - *sub_columns[0], - *sub_columns[1], - state.child_appends[1], - state.child_appends[2], - VariantStats::GetUnshreddedStats(stats), - VariantStats::GetShreddedStats(stats), - }; - AppendShredded(vector, append_vector, count, append_data); + throw InternalException("Can't append to a shredded VariantColumnData"); } else { for (idx_t i = 0; i < sub_columns.size(); i++) { sub_columns[i]->Append(VariantStats::GetUnshreddedStats(stats), state.child_appends[i + 1], vector, count);