Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/duckdb/src/catalog/dependency_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -647,7 +647,8 @@ void DependencyManager::AlterObject(CatalogTransaction transaction, CatalogEntry
disallow_alter = false;
break;
}
case AlterTableType::ADD_COLUMN: {
case AlterTableType::ADD_COLUMN:
case AlterTableType::SET_DEFAULT: {
disallow_alter = false;
break;
}
Expand Down
6 changes: 5 additions & 1 deletion src/duckdb/src/common/hive_partitioning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ string HivePartitioning::Unescape(const string &input) {
return StringUtil::URLDecode(input);
}

bool HivePartitioning::IsNull(const string &input) {
return StringUtil::CIEquals(input, "NULL") || input == "__HIVE_DEFAULT_PARTITION__";
}

// matches hive partitions in file name. For example:
// - s3://bucket/var1=value1/bla/bla/var2=value2
// - http(s)://domain(:port)/lala/kasdl/var1=value1/?not-a-var=not-a-value
Expand Down Expand Up @@ -126,7 +130,7 @@ std::map<string, string> HivePartitioning::Parse(const string &filename) {
Value HivePartitioning::GetValue(ClientContext &context, const string &key, const string &str_val,
const LogicalType &type) {
// Handle nulls
if (StringUtil::CIEquals(str_val, "NULL") || str_val == "__HIVE_DEFAULT_PARTITION__") {
if (IsNull(str_val)) {
return Value(type);
}
if (type.id() == LogicalTypeId::VARCHAR) {
Expand Down
3 changes: 3 additions & 0 deletions src/duckdb/src/common/multi_file/multi_file_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,9 @@ void MultiFileOptions::AutoDetectHiveTypesInternal(MultiFileList &files, ClientC
// type was explicitly provided by the user
continue;
}
if (HivePartitioning::IsNull(part.second)) {
continue; // don't update detected_types for this partition/file
}
LogicalType detected_type = LogicalType::VARCHAR;
Value value(part.second);
for (auto &candidate : candidates) {
Expand Down
6 changes: 3 additions & 3 deletions src/duckdb/src/execution/operator/join/physical_iejoin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ SinkResultType PhysicalIEJoin::Sink(ExecutionContext &context, DataChunk &chunk,

gstate.Sink(context, chunk, lstate);

if (filter_pushdown && !gstate.skip_filter_pushdown) {
if (filter_pushdown && !gstate.skip_filter_pushdown && gstate.child == 1) {
filter_pushdown->Sink(lstate.table.keys, *lstate.local_filter_state);
}

Expand All @@ -177,7 +177,7 @@ SinkCombineResultType PhysicalIEJoin::Combine(ExecutionContext &context, Operato
context.thread.profiler.Flush(*this);
client_profiler.Flush(context.thread.profiler);

if (filter_pushdown && !gstate.skip_filter_pushdown) {
if (filter_pushdown && !gstate.skip_filter_pushdown && gstate.child == 1) {
filter_pushdown->Combine(*gstate.global_filter_state, *lstate.local_filter_state);
}

Expand All @@ -190,7 +190,7 @@ SinkCombineResultType PhysicalIEJoin::Combine(ExecutionContext &context, Operato
SinkFinalizeType PhysicalIEJoin::Finalize(Pipeline &pipeline, Event &event, ClientContext &client,
OperatorSinkFinalizeInput &input) const {
auto &gstate = input.global_state.Cast<IEJoinGlobalState>();
if (filter_pushdown && !gstate.skip_filter_pushdown) {
if (filter_pushdown && !gstate.skip_filter_pushdown && gstate.child == 1) {
(void)filter_pushdown->Finalize(client, nullptr, *gstate.global_filter_state, *this);
}
auto &table = *gstate.tables[gstate.child];
Expand Down
6 changes: 3 additions & 3 deletions src/duckdb/src/function/table/version/pragma_version.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#ifndef DUCKDB_PATCH_VERSION
#define DUCKDB_PATCH_VERSION "2-dev302"
#define DUCKDB_PATCH_VERSION "2-dev329"
#endif
#ifndef DUCKDB_MINOR_VERSION
#define DUCKDB_MINOR_VERSION 5
Expand All @@ -8,10 +8,10 @@
#define DUCKDB_MAJOR_VERSION 1
#endif
#ifndef DUCKDB_VERSION
#define DUCKDB_VERSION "v1.5.2-dev302"
#define DUCKDB_VERSION "v1.5.2-dev329"
#endif
#ifndef DUCKDB_SOURCE_ID
#define DUCKDB_SOURCE_ID "ddba343762"
#define DUCKDB_SOURCE_ID "699249af49"
#endif
#include "duckdb/function/table/system_functions.hpp"
#include "duckdb/main/database.hpp"
Expand Down
2 changes: 2 additions & 0 deletions src/duckdb/src/include/duckdb/common/hive_partitioning.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ class HivePartitioning {
DUCKDB_API static string Escape(const string &input);
//! Unescape a hive partition key or value encoded using URL encoding
DUCKDB_API static string Unescape(const string &input);
//! Whether the column is "NULL"/"__HIVE_DEFAULT_PARTITION"
DUCKDB_API static bool IsNull(const string &input);
};

struct HivePartitionKey {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ class TopNWindowElimination : public BaseColumnPruner {
const TopNWindowEliminationParameters &params);
bool CanUseLateMaterialization(const LogicalWindow &window, vector<unique_ptr<Expression>> &args,
vector<idx_t> &projections, vector<reference<LogicalOperator>> &stack);
bool ExtractSingleBinding(unique_ptr<Expression> *expr, ColumnBinding &binding,
bool require_direct_column_ref = false);

private:
ClientContext &context;
Expand Down
55 changes: 29 additions & 26 deletions src/duckdb/src/optimizer/topn_window_elimination.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ string GetLHSRowIdColumnName(const unique_ptr<LogicalOperator> &op, idx_t column

if (op.get()->type != LogicalOperatorType::LOGICAL_GET) {
D_ASSERT(op.get()->type == LogicalOperatorType::LOGICAL_PROJECTION);
D_ASSERT(op.get()->expressions.size() >= column_id &&
D_ASSERT(op.get()->expressions.size() > column_id &&
op.get()->expressions[column_id]->type == ExpressionType::BOUND_COLUMN_REF);
const auto &colref = op.get()->expressions[column_id]->Cast<BoundColumnRefExpression>();
column_id = colref.binding.column_index;
Expand All @@ -139,6 +139,7 @@ string GetLHSRowIdColumnName(const unique_ptr<LogicalOperator> &op, idx_t column
TopNWindowElimination::TopNWindowElimination(ClientContext &context_p, Optimizer &optimizer,
optional_ptr<column_binding_map_t<unique_ptr<BaseStatistics>>> stats_p)
: context(context_p), optimizer(optimizer), stats(stats_p) {
D_ASSERT(stats);
}

unique_ptr<LogicalOperator> TopNWindowElimination::Optimize(unique_ptr<LogicalOperator> op) {
Expand Down Expand Up @@ -302,13 +303,11 @@ TopNWindowElimination::CreateAggregateOperator(LogicalWindow &window, vector<uni
auto &group = aggregate->groups[i];
if (group->type == ExpressionType::BOUND_COLUMN_REF) {
auto &column_ref = group->Cast<BoundColumnRefExpression>();
if (stats) {
auto group_stats = stats->find(column_ref.binding);
if (group_stats == stats->end()) {
continue;
}
aggregate->group_stats[i] = group_stats->second->ToUnique();
auto group_stats = stats->find(column_ref.binding);
if (group_stats == stats->end()) {
continue;
}
aggregate->group_stats[i] = group_stats->second->ToUnique();
}
}

Expand Down Expand Up @@ -775,39 +774,45 @@ TopNWindowElimination::ExtractOptimizerParameters(const LogicalWindow &window, c
const auto &column_stats = stats->find(column_ref.first);
if (column_stats == stats->end() || column_stats->second->CanHaveNull()) {
params.can_be_null = true;
break;
}
}
column_references.clear();

return params;
}

bool TopNWindowElimination::ExtractSingleBinding(unique_ptr<Expression> *expr, ColumnBinding &binding,
const bool require_direct_column_ref) {
if (require_direct_column_ref && expr->get()->type != ExpressionType::BOUND_COLUMN_REF) {
return false;
}
VisitExpression(expr);
if (column_references.size() != 1) {
column_references.clear();
return false;
}
binding = column_references.begin()->first;
column_references.clear();
return true;
}

bool TopNWindowElimination::CanUseLateMaterialization(const LogicalWindow &window, vector<unique_ptr<Expression>> &args,
vector<idx_t> &lhs_projections,
vector<reference<LogicalOperator>> &stack) {
auto &window_expr = window.expressions[0]->Cast<BoundWindowExpression>();
vector<ColumnBinding> projections(window_expr.partitions.size() + args.size());
auto extract_single_binding = [&](unique_ptr<Expression> *expr, ColumnBinding &binding) {
VisitExpression(expr);
if (column_references.size() != 1) {
column_references.clear();
return false;
}
binding = column_references.begin()->first;
column_references.clear();
return true;
};

// Build a projection list for an LHS table scan to recreate the column order of an aggregate with struct packing
for (idx_t i = 0; i < window_expr.partitions.size(); i++) {
auto &partition = window_expr.partitions[i];
if (!extract_single_binding(&partition, projections[i])) {
if (!ExtractSingleBinding(&partition, projections[i])) {
return false;
}
}
for (idx_t i = 0; i < args.size(); i++) {
auto &arg = args[i];
if (!extract_single_binding(&arg, projections[window_expr.partitions.size() + i])) {
if (!ExtractSingleBinding(&arg, projections[window_expr.partitions.size() + i])) {
return false;
}
}
Expand All @@ -828,7 +833,7 @@ bool TopNWindowElimination::CanUseLateMaterialization(const LogicalWindow &windo
if (projection_idx >= projection.expressions.size()) {
return false;
}
if (!extract_single_binding(&projection.expressions[projection_idx], projections[i])) {
if (!ExtractSingleBinding(&projection.expressions[projection_idx], projections[i])) {
return false;
}
}
Expand All @@ -854,11 +859,11 @@ bool TopNWindowElimination::CanUseLateMaterialization(const LogicalWindow &windo
return false;
}
ColumnBinding left_binding;
if (!extract_single_binding(&condition.left, left_binding)) {
if (!ExtractSingleBinding(&condition.left, left_binding, true)) {
return false;
}
ColumnBinding right_binding;
if (!extract_single_binding(&condition.right, right_binding)) {
if (!ExtractSingleBinding(&condition.right, right_binding, true)) {
return false;
}

Expand Down Expand Up @@ -1029,10 +1034,8 @@ unique_ptr<LogicalOperator> TopNWindowElimination::TryPrepareLateMaterialization
auto &join = op.Cast<LogicalComparisonJoin>();
auto &op_child = std::prev(stack_it)->get();

auto &projection_map = join.left_projection_map;
if (&op_child != &*join.children[0]) {
projection_map = join.right_projection_map;
}
auto &projection_map = RefersToSameObject(op_child, *join.children[0]) ? join.left_projection_map
: join.right_projection_map;
for (const auto rowid_idx : rhs_rowid_idxs) {
projection_map.push_back(rowid_idx);
}
Expand Down
8 changes: 5 additions & 3 deletions src/duckdb/src/planner/binder/statement/bind_insert.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -521,11 +521,13 @@ BoundStatement Binder::Bind(InsertStatement &stmt) {
auto merge_into = GenerateMergeInto(stmt, table);
return Bind(*merge_into);
}
if (!table.temporary) {
if (table.temporary) {
// Temporary inserts still need a catalog dependency so prepared statements are rebound if the table is dropped.
GetStatementProperties().RegisterDBRead(table.catalog, context);
} else {
// inserting into a non-temporary table: alters underlying database
auto &properties = GetStatementProperties();
DatabaseModificationType modification_type = DatabaseModificationType::INSERT_DATA;
properties.RegisterDBModify(table.catalog, context, modification_type);
GetStatementProperties().RegisterDBModify(table.catalog, context, modification_type);
}

auto insert = make_uniq<LogicalInsert>(table, GenerateTableIndex());
Expand Down
20 changes: 11 additions & 9 deletions src/duckdb/src/storage/table/variant/variant_shredding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -391,21 +391,27 @@ static LogicalType SetShreddedType(const LogicalType &typed_value, bool fully_co

bool VariantShreddingStats::GetShreddedTypeInternal(const VariantColumnStatsData &column, LogicalType &out_type,
optional_idx parent_count) const {
idx_t max_count = 0;
uint8_t type_index = 0;
if (column.type_counts[0] == column.total_count) {
if (parent_count.IsValid() && column.total_count > parent_count.GetIndex()) {
throw InternalException("Column count is larger than parent count - this should not be possible");
}
auto total_value_count = parent_count.IsValid() ? parent_count.GetIndex() : column.total_count;
const auto null_count = column.type_counts[0];
if (null_count == column.total_count) {
//! All NULL, emit INT32
out_type = SetShreddedType(LogicalTypeId::INTEGER, true);
auto fully_consistent = null_count == total_value_count;
out_type = SetShreddedType(LogicalTypeId::INTEGER, fully_consistent);
return true;
}

idx_t max_count = 0;
uint8_t type_index = 0;
//! Skip the 'VARIANT_NULL' type, we can't shred on NULL
for (uint8_t i = 1; i < static_cast<uint8_t>(VariantLogicalType::ENUM_SIZE); i++) {
if (i == static_cast<uint8_t>(VariantLogicalType::DECIMAL) && !column.decimal_consistent) {
//! Can't shred on DECIMAL, not consistent
continue;
}
idx_t count = column.type_counts[i] + column.type_counts[0];
idx_t count = column.type_counts[i] + null_count;
if (!max_count || count > max_count) {
max_count = count;
type_index = i;
Expand All @@ -415,11 +421,7 @@ bool VariantShreddingStats::GetShreddedTypeInternal(const VariantColumnStatsData
if (!max_count) {
return false;
}
if (parent_count.IsValid() && column.total_count > parent_count.GetIndex()) {
throw InternalException("Column count is larger than parent count - this should not be possible");
}

auto total_value_count = parent_count.IsValid() ? parent_count.GetIndex() : column.total_count;
bool fully_consistent = max_count == total_value_count;
if (type_index == static_cast<uint8_t>(VariantLogicalType::OBJECT)) {
child_list_t<LogicalType> child_types;
Expand Down
19 changes: 1 addition & 18 deletions src/duckdb/src/storage/table/variant_column_data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -396,24 +396,7 @@ void VariantColumnData::Append(BaseStatistics &stats, ColumnAppendState &state,
validity->Append(stats, state.child_appends[0], vector, count);

if (IsShredded()) {
auto &unshredded_type = sub_columns[0]->type;
auto &shredded_type = sub_columns[1]->type;

auto variant_shredded_type = LogicalType::STRUCT({
{"unshredded", unshredded_type},
{"shredded", shredded_type},
});
Vector append_vector(variant_shredded_type, count);

VariantShreddedAppendInput append_data {
*sub_columns[0],
*sub_columns[1],
state.child_appends[1],
state.child_appends[2],
VariantStats::GetUnshreddedStats(stats),
VariantStats::GetShreddedStats(stats),
};
AppendShredded(vector, append_vector, count, append_data);
throw InternalException("Can't append to a shredded VariantColumnData");
} else {
for (idx_t i = 0; i < sub_columns.size(); i++) {
sub_columns[i]->Append(VariantStats::GetUnshreddedStats(stats), state.child_appends[i + 1], vector, count);
Expand Down
Loading