diff --git a/src/duckdb/extension/parquet/include/parquet_reader.hpp b/src/duckdb/extension/parquet/include/parquet_reader.hpp index 1726d32a8..3fd7dbb8d 100644 --- a/src/duckdb/extension/parquet/include/parquet_reader.hpp +++ b/src/duckdb/extension/parquet/include/parquet_reader.hpp @@ -107,6 +107,7 @@ struct ParquetOptions { explicit ParquetOptions(ClientContext &context); bool binary_as_string = false; + bool variant_legacy_encoding = false; bool file_row_number = false; shared_ptr encryption_config; diff --git a/src/duckdb/extension/parquet/parquet_reader.cpp b/src/duckdb/extension/parquet/parquet_reader.cpp index 583b8e62a..24da46088 100644 --- a/src/duckdb/extension/parquet/parquet_reader.cpp +++ b/src/duckdb/extension/parquet/parquet_reader.cpp @@ -527,6 +527,38 @@ static bool IsGeometryType(const SchemaElement &s_ele, const ParquetFileMetadata return false; } +static bool IsVariantType(const SchemaElement &root, const vector &children) { + if (children.size() < 2) { + return false; + } + //! Names have to be 'metadata' and 'value' respectively + //! But apparently some writers can mix the order, so we are more lenient + if (children[0].name != "metadata" && children[1].name != "metadata") { + return false; + } + if (children[0].name != "value" && children[1].name != "value") { + return false; + } + + //! Verify types + if (children[0].parquet_type != duckdb_parquet::Type::BYTE_ARRAY) { + return false; + } + if (children[1].parquet_type != duckdb_parquet::Type::BYTE_ARRAY) { + return false; + } + if (children.size() == 3) { + auto &typed_value = children[2]; + if (typed_value.name != "typed_value") { + return false; + } + throw NotImplementedException("Shredded Variants are not supported yet"); + } else if (children.size() != 2) { + return false; + } + return true; +} + ParquetColumnSchema ParquetReader::ParseSchemaRecursive(idx_t depth, idx_t max_define, idx_t max_repeat, idx_t &next_schema_idx, idx_t &next_file_idx, ClientContext &context) { @@ -628,6 +660,9 @@ ParquetColumnSchema ParquetReader::ParseSchemaRecursive(idx_t depth, idx_t max_d const bool is_map = s_ele.__isset.converted_type && s_ele.converted_type == ConvertedType::MAP; bool is_map_kv = s_ele.__isset.converted_type && s_ele.converted_type == ConvertedType::MAP_KEY_VALUE; bool is_variant = s_ele.__isset.logicalType && s_ele.logicalType.__isset.VARIANT == true; + if (!is_variant && parquet_options.variant_legacy_encoding && IsVariantType(s_ele, child_schemas)) { + is_variant = true; + } if (!is_map_kv && this_idx > 0) { // check if the parent node of this is a map @@ -797,6 +832,9 @@ ParquetOptions::ParquetOptions(ClientContext &context) { if (context.TryGetCurrentSetting("binary_as_string", lookup_value)) { binary_as_string = lookup_value.GetValue(); } + if (context.TryGetCurrentSetting("__delta_only_variant_encoding_enabled", lookup_value)) { + variant_legacy_encoding = lookup_value.GetValue(); + } } ParquetColumnDefinition ParquetColumnDefinition::FromSchemaValue(ClientContext &context, const Value &column_value) { diff --git a/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp b/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp index 8f94f0932..7eb3e4996 100644 --- a/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +++ b/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp @@ -140,7 +140,9 @@ optional_ptr DuckSchemaEntry::AddEntryInternal(CatalogTransaction if (!set.CreateEntry(transaction, entry_name, std::move(entry), dependencies)) { // entry already exists! if (on_conflict == OnCreateConflict::ERROR_ON_CONFLICT) { - throw CatalogException::EntryAlreadyExists(entry_type, entry_name); + auto existing_entry = set.GetEntry(transaction, entry_name); + auto existing_type = existing_entry ? existing_entry->type : entry_type; + throw CatalogException::EntryAlreadyExists(existing_type, entry_name); } else { return nullptr; } diff --git a/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp b/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp index 3b7174b7f..a513d5f2d 100644 --- a/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +++ b/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp @@ -745,6 +745,7 @@ unique_ptr DuckTableEntry::RemoveColumn(ClientContext &context, Re dropped_column_is_generated); auto bound_create_info = binder->BindCreateTableInfo(std::move(create_info), schema); + info.new_dependencies = make_uniq(std::move(bound_create_info->dependencies)); if (columns.GetColumn(LogicalIndex(removed_index)).Generated()) { return make_uniq(catalog, schema, *bound_create_info, storage); } @@ -968,6 +969,7 @@ unique_ptr DuckTableEntry::SetDefault(ClientContext &context, SetD auto binder = Binder::CreateBinder(context); auto bound_create_info = binder->BindCreateTableInfo(std::move(create_info), schema); + info.new_dependencies = make_uniq(std::move(bound_create_info->dependencies)); return make_uniq(catalog, schema, *bound_create_info, storage); } diff --git a/src/duckdb/src/catalog/dependency_manager.cpp b/src/duckdb/src/catalog/dependency_manager.cpp index 5d064f497..b0760a621 100644 --- a/src/duckdb/src/catalog/dependency_manager.cpp +++ b/src/duckdb/src/catalog/dependency_manager.cpp @@ -677,8 +677,13 @@ void DependencyManager::AlterObject(CatalogTransaction transaction, CatalogEntry }); // Keep old dependencies - dependency_set_t dependents; + bool has_new_dependencies = alter_info.new_dependencies.get(); ScanSubjects(transaction, old_info, [&](DependencyEntry &dep) { + if (has_new_dependencies && !dep.Subject().flags.IsOwnership()) { + // The alter provided updated dependencies - skip old non-ownership subject dependencies + // as they will be replaced by the new dependencies + return; + } auto entry = LookupEntry(transaction, dep); if (!entry) { return; @@ -689,15 +694,18 @@ void DependencyManager::AlterObject(CatalogTransaction transaction, CatalogEntry dependencies.emplace_back(dep_info); }); - // FIXME: we should update dependencies in the future - // some alters could cause dependencies to change (imagine types of table columns) - // or DEFAULT depending on a sequence - if (!StringUtil::CIEquals(old_obj.name, new_obj.name)) { - // The name has been changed, we need to recreate the dependency links + if (has_new_dependencies || !StringUtil::CIEquals(old_obj.name, new_obj.name)) { + // The dependencies have changed (e.g. SET DEFAULT) or the name has changed + // We need to recreate the dependency links CleanupDependencies(transaction, old_obj); } - // Reinstate the old dependencies + if (has_new_dependencies) { + // Add the new dependencies + CreateDependencies(transaction, new_obj, *alter_info.new_dependencies); + } + + // Reinstate any old dependencies for (auto &dep : dependencies) { CreateDependency(transaction, dep); } diff --git a/src/duckdb/src/execution/operator/join/physical_hash_join.cpp b/src/duckdb/src/execution/operator/join/physical_hash_join.cpp index fc3c46a13..b8f234ac8 100644 --- a/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +++ b/src/duckdb/src/execution/operator/join/physical_hash_join.cpp @@ -726,7 +726,12 @@ void JoinFilterPushdownInfo::PushInFilter(const JoinFilterPushdownFilter &info, // generate the OR-clause - note that we only need to consider unique values here (so we use a seT) value_set_t unique_ht_values; for (idx_t k = 0; k < key_count; k++) { - unique_ht_values.insert(build_vector.GetValue(k)); + // Cast to storage type, only insert if it succeeds + auto value = build_vector.GetValue(k); + if (!value.DefaultTryCastAs(info.columns[filter_idx].storage_type)) { + return; // it's all or nothing sadly + } + unique_ht_values.insert(value); } vector in_list(unique_ht_values.begin(), unique_ht_values.end()); @@ -811,12 +816,23 @@ unique_ptr JoinFilterPushdownInfo::FinalizeFilters(ClientContext &con for (idx_t filter_idx = 0; filter_idx < join_condition.size(); filter_idx++) { const auto cmp = op.conditions[join_condition[filter_idx]].comparison; for (auto &info : probe_info) { - auto filter_col_idx = info.columns[filter_idx].probe_column_index.column_index; + const auto &pushdown_column = info.columns[filter_idx]; + auto &filter_col_idx = pushdown_column.probe_column_index.column_index; auto min_idx = filter_idx * 2; auto max_idx = min_idx + 1; auto min_val = final_min_max->data[min_idx].GetValue(0); auto max_val = final_min_max->data[max_idx].GetValue(0); + + // Cast to storage type, skip if fails + D_ASSERT(pushdown_column.storage_type.IsValid()); + if (!min_val.DefaultTryCastAs(pushdown_column.storage_type)) { + continue; + } + if (!max_val.DefaultTryCastAs(pushdown_column.storage_type)) { + continue; + } + if (min_val.IsNull() || max_val.IsNull()) { // min/max is NULL // this can happen in case all values in the RHS column are NULL, but they are still pushed into the diff --git a/src/duckdb/src/function/aggregate/distributive/minmax.cpp b/src/duckdb/src/function/aggregate/distributive/minmax.cpp index 40c96d412..fd3f4acfc 100644 --- a/src/duckdb/src/function/aggregate/distributive/minmax.cpp +++ b/src/duckdb/src/function/aggregate/distributive/minmax.cpp @@ -333,43 +333,42 @@ static AggregateFunction GetMinMaxOperator(const LogicalType &type) { template unique_ptr BindMinMax(ClientContext &context, AggregateFunction &function, vector> &arguments) { - if (arguments[0]->return_type.id() == LogicalTypeId::VARCHAR) { - auto str_collation = StringType::GetCollation(arguments[0]->return_type); - if (!str_collation.empty() || !Settings::Get(context).empty()) { - // If aggr function is min/max and uses collations, replace bound_function with arg_min/arg_max - // to make sure the result's correctness. - string function_name = function.name == "min" ? "arg_min" : "arg_max"; - QueryErrorContext error_context; - auto func = Catalog::GetEntry(context, "", "", function_name, - OnEntryNotFound::RETURN_NULL, error_context); - if (!func) { - throw NotImplementedException( - "Failure while binding function \"%s\" using collations - arg_min/arg_max do not exist in the " - "catalog - load the core_functions module to fix this issue", - function.name); - } - - auto &func_entry = *func; - - FunctionBinder function_binder(context); - vector types {arguments[0]->return_type, arguments[0]->return_type}; - ErrorData error; - auto best_function = function_binder.BindFunction(func_entry.name, func_entry.functions, types, error); - if (!best_function.IsValid()) { - throw BinderException(string("Fail to find corresponding function for collation min/max: ") + - error.Message()); - } - function = func_entry.functions.GetFunctionByOffset(best_function.GetIndex()); + // We should also push collations for non-VARCHAR here, but we aren't ready for it yet (see internal #8704) + const auto collation = arguments[0]->return_type.id() == LogicalTypeId::VARCHAR && + (!StringType::GetCollation(arguments[0]->return_type).empty() || + !Settings::Get(context).empty()); + auto collated_arg = collation ? arguments[0]->Copy() : nullptr; + if (collation && ExpressionBinder::PushCollation(context, collated_arg, collated_arg->return_type)) { + // If aggr function is min/max and uses collations, replace bound_function with arg_min/arg_max + // to make sure the result's correctness. + string function_name = function.name == "min" ? "arg_min" : "arg_max"; + QueryErrorContext error_context; + auto func = Catalog::GetEntry(context, "", "", function_name, + OnEntryNotFound::RETURN_NULL, error_context); + if (!func) { + throw NotImplementedException( + "Failure while binding function \"%s\" using collations - arg_min/arg_max do not exist in the " + "catalog - load the core_functions module to fix this issue", + function.name); + } - // Create a copied child and PushCollation for it. - arguments.push_back(arguments[0]->Copy()); - ExpressionBinder::PushCollation(context, arguments[1], arguments[0]->return_type); + auto &func_entry = *func; - // Bind function like arg_min/arg_max. - function.arguments[0] = arguments[0]->return_type; - function.SetReturnType(arguments[0]->return_type); - return make_uniq(); + FunctionBinder function_binder(context); + vector types {arguments[0]->return_type, collated_arg->return_type}; + ErrorData error; + auto best_function = function_binder.BindFunction(func_entry.name, func_entry.functions, types, error); + if (!best_function.IsValid()) { + throw BinderException(string("Fail to find corresponding function for collation min/max: ") + + error.Message()); } + function = func_entry.functions.GetFunctionByOffset(best_function.GetIndex()); + + // Bind function like arg_min/arg_max. + arguments.push_back(std::move(collated_arg)); + function.arguments[0] = arguments[0]->return_type; + function.SetReturnType(arguments[0]->return_type); + return make_uniq(); } auto input_type = arguments[0]->return_type; diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp index 0480bc99e..a3623e18a 100644 --- a/src/duckdb/src/function/table/version/pragma_version.cpp +++ b/src/duckdb/src/function/table/version/pragma_version.cpp @@ -1,5 +1,5 @@ #ifndef DUCKDB_PATCH_VERSION -#define DUCKDB_PATCH_VERSION "2-dev329" +#define DUCKDB_PATCH_VERSION "2-dev356" #endif #ifndef DUCKDB_MINOR_VERSION #define DUCKDB_MINOR_VERSION 5 @@ -8,10 +8,10 @@ #define DUCKDB_MAJOR_VERSION 1 #endif #ifndef DUCKDB_VERSION -#define DUCKDB_VERSION "v1.5.2-dev329" +#define DUCKDB_VERSION "v1.5.2-dev356" #endif #ifndef DUCKDB_SOURCE_ID -#define DUCKDB_SOURCE_ID "699249af49" +#define DUCKDB_SOURCE_ID "10c4e2493e" #endif #include "duckdb/function/table/system_functions.hpp" #include "duckdb/main/database.hpp" diff --git a/src/duckdb/src/include/duckdb/execution/operator/join/join_filter_pushdown.hpp b/src/duckdb/src/include/duckdb/execution/operator/join/join_filter_pushdown.hpp index bbaa04262..92da8998b 100644 --- a/src/duckdb/src/include/duckdb/execution/operator/join/join_filter_pushdown.hpp +++ b/src/duckdb/src/include/duckdb/execution/operator/join/join_filter_pushdown.hpp @@ -24,6 +24,8 @@ struct LocalUngroupedAggregateState; struct JoinFilterPushdownColumn { //! The probe column index to which this filter should be applied ColumnBinding probe_column_index; + //! The type of the value in storage (LogicalGet) + LogicalType storage_type; }; struct JoinFilterGlobalState { diff --git a/src/duckdb/src/include/duckdb/main/config.hpp b/src/duckdb/src/include/duckdb/main/config.hpp index 74fcbe6c7..9468779bc 100644 --- a/src/duckdb/src/include/duckdb/main/config.hpp +++ b/src/duckdb/src/include/duckdb/main/config.hpp @@ -149,6 +149,8 @@ struct DBConfigOptions { idx_t allocator_flush_threshold = 134217728ULL; //! If bulk deallocation larger than this occurs, flush outstanding allocations (1 << 30, ~1GB) idx_t allocator_bulk_deallocation_flush_threshold = 536870912ULL; + //! Delta Only! - Fall back to recognizing Variant columns structurally + bool variant_legacy_encoding = false; //! Metadata from DuckDB callers string custom_user_agent; //! The default block header size for new duckdb database files. diff --git a/src/duckdb/src/include/duckdb/main/settings.hpp b/src/duckdb/src/include/duckdb/main/settings.hpp index 30a2f3021..d1277a9e1 100644 --- a/src/duckdb/src/include/duckdb/main/settings.hpp +++ b/src/duckdb/src/include/duckdb/main/settings.hpp @@ -87,6 +87,16 @@ struct Settings { // Start of the auto-generated list of settings structures //===----------------------------------------------------------------------===// +struct DeltaOnlyVariantEncodingEnabledSetting { + using RETURN_TYPE = bool; + static constexpr const char *Name = "__delta_only_variant_encoding_enabled"; + static constexpr const char *Description = "Enables the Parquet reader to identify a Variant structurally."; + static constexpr const char *InputType = "BOOLEAN"; + static void SetGlobal(DatabaseInstance *db, DBConfig &config, const Value ¶meter); + static void ResetGlobal(DatabaseInstance *db, DBConfig &config); + static Value GetSetting(const ClientContext &context); +}; + struct AccessModeSetting { using RETURN_TYPE = AccessMode; static constexpr const char *Name = "access_mode"; diff --git a/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp b/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp index ddf31b265..c496b102c 100644 --- a/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +++ b/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp @@ -11,6 +11,7 @@ #include "duckdb/common/enums/catalog_type.hpp" #include "duckdb/parser/parsed_data/parse_info.hpp" #include "duckdb/common/enums/on_entry_not_found.hpp" +#include "duckdb/catalog/dependency_list.hpp" namespace duckdb { @@ -60,6 +61,8 @@ struct AlterInfo : public ParseInfo { string name; //! Allow altering internal entries bool allow_internal; + //! New dependencies for the altered entry (set during binding) + unique_ptr new_dependencies; public: virtual CatalogType GetCatalogType() const = 0; diff --git a/src/duckdb/src/include/duckdb/planner/operator/logical_update.hpp b/src/duckdb/src/include/duckdb/planner/operator/logical_update.hpp index b9324be04..eabbd3e82 100644 --- a/src/duckdb/src/include/duckdb/planner/operator/logical_update.hpp +++ b/src/duckdb/src/include/duckdb/planner/operator/logical_update.hpp @@ -45,6 +45,8 @@ class LogicalUpdate : public LogicalOperator { DUCKDB_API static void BindExtraColumns(TableCatalogEntry &table, LogicalGet &get, LogicalProjection &proj, LogicalUpdate &update, physical_index_set_t &bound_columns); + static void RewriteInPlaceUpdates(LogicalOperator &update_op); + protected: vector GetColumnBindings() override; void ResolveTypes() override; diff --git a/src/duckdb/src/main/config.cpp b/src/duckdb/src/main/config.cpp index 3f27c726a..479be82c4 100644 --- a/src/duckdb/src/main/config.cpp +++ b/src/duckdb/src/main/config.cpp @@ -65,6 +65,7 @@ bool DBConfigOptions::debug_print_bindings = false; static const ConfigurationOption internal_options[] = { + DUCKDB_GLOBAL(DeltaOnlyVariantEncodingEnabledSetting), DUCKDB_GLOBAL(AccessModeSetting), DUCKDB_SETTING_CALLBACK(AllocatorBackgroundThreadsSetting), DUCKDB_GLOBAL(AllocatorBulkDeallocationFlushThresholdSetting), @@ -206,12 +207,12 @@ static const ConfigurationOption internal_options[] = { DUCKDB_SETTING(ZstdMinStringLengthSetting), FINAL_SETTING}; -static const ConfigurationAlias setting_aliases[] = {DUCKDB_SETTING_ALIAS("memory_limit", 98), - DUCKDB_SETTING_ALIAS("null_order", 42), - DUCKDB_SETTING_ALIAS("profiling_output", 117), - DUCKDB_SETTING_ALIAS("user", 132), - DUCKDB_SETTING_ALIAS("wal_autocheckpoint", 24), - DUCKDB_SETTING_ALIAS("worker_threads", 131), +static const ConfigurationAlias setting_aliases[] = {DUCKDB_SETTING_ALIAS("memory_limit", 99), + DUCKDB_SETTING_ALIAS("null_order", 43), + DUCKDB_SETTING_ALIAS("profiling_output", 118), + DUCKDB_SETTING_ALIAS("user", 133), + DUCKDB_SETTING_ALIAS("wal_autocheckpoint", 25), + DUCKDB_SETTING_ALIAS("worker_threads", 132), FINAL_ALIAS}; vector DBConfig::GetOptions() { diff --git a/src/duckdb/src/main/settings/custom_settings.cpp b/src/duckdb/src/main/settings/custom_settings.cpp index 91ef2d454..e762e54e2 100644 --- a/src/duckdb/src/main/settings/custom_settings.cpp +++ b/src/duckdb/src/main/settings/custom_settings.cpp @@ -108,6 +108,22 @@ Value AllocatorBulkDeallocationFlushThresholdSetting::GetSetting(const ClientCon return Value(StringUtil::BytesToHumanReadableString(config.options.allocator_bulk_deallocation_flush_threshold)); } +//===----------------------------------------------------------------------===// +// Delta Only Variant Legacy Encoding +//===----------------------------------------------------------------------===// +void DeltaOnlyVariantEncodingEnabledSetting::SetGlobal(DatabaseInstance *db, DBConfig &config, const Value &input) { + throw InvalidInputException("This setting is not adjustable by a user"); +} + +void DeltaOnlyVariantEncodingEnabledSetting::ResetGlobal(DatabaseInstance *db, DBConfig &config) { + throw InvalidInputException("This setting is not adjustable by a user"); +} + +Value DeltaOnlyVariantEncodingEnabledSetting::GetSetting(const ClientContext &context) { + auto &config = DBConfig::GetConfig(context); + return Value::BOOLEAN(config.options.variant_legacy_encoding); +} + //===----------------------------------------------------------------------===// // Allocator Flush Threshold //===----------------------------------------------------------------------===// diff --git a/src/duckdb/src/optimizer/join_filter_pushdown_optimizer.cpp b/src/duckdb/src/optimizer/join_filter_pushdown_optimizer.cpp index 74227ddda..08015778d 100644 --- a/src/duckdb/src/optimizer/join_filter_pushdown_optimizer.cpp +++ b/src/duckdb/src/optimizer/join_filter_pushdown_optimizer.cpp @@ -7,6 +7,7 @@ #include "duckdb/optimizer/optimizer.hpp" #include "duckdb/planner/expression/bound_aggregate_expression.hpp" #include "duckdb/planner/expression/bound_columnref_expression.hpp" +#include "duckdb/planner/expression/bound_cast_expression.hpp" #include "duckdb/planner/operator/logical_aggregate.hpp" #include "duckdb/planner/operator/logical_comparison_join.hpp" #include "duckdb/planner/operator/logical_get.hpp" @@ -20,14 +21,38 @@ JoinFilterPushdownOptimizer::JoinFilterPushdownOptimizer(Optimizer &optimizer) : } bool PushdownJoinFilterExpression(Expression &expr, JoinFilterPushdownColumn &filter) { - if (expr.GetExpressionType() != ExpressionType::BOUND_COLUMN_REF) { - // not a simple column ref - bail-out + if (expr.return_type.IsNested()) { + // nested columns are not supported for pushdown + return false; + } + if (expr.return_type.id() == LogicalTypeId::INTERVAL) { + // interval is not supported for pushdown + return false; + } + switch (expr.GetExpressionClass()) { + case ExpressionClass::BOUND_COLUMN_REF: { + // column-ref - pass through the new column binding + auto &colref = expr.Cast(); + filter.probe_column_index = colref.binding; + return true; + } + case ExpressionClass::BOUND_CAST: { + // We allow pushing through integral down/upcasts, as long as source/target are (u)bigint or smaller + const auto &bound_cast = expr.Cast(); + const auto &src = bound_cast.child->return_type; + const auto &tgt = bound_cast.return_type; + if (!src.IsIntegral() || !tgt.IsIntegral()) { + return false; + } + if (GetTypeIdSize(src.InternalType()) > GetTypeIdSize(PhysicalType::INT64) || + GetTypeIdSize(tgt.InternalType()) > GetTypeIdSize(PhysicalType::INT64)) { + return false; // Only do this for (u)bigint and smaller + } + return PushdownJoinFilterExpression(*bound_cast.child, filter); + } + default: return false; } - // column-ref - pass through the new column binding - auto &colref = expr.Cast(); - filter.probe_column_index = colref.binding; - return true; } void JoinFilterPushdownOptimizer::GetPushdownFilterTargets(LogicalOperator &op, @@ -97,11 +122,21 @@ void JoinFilterPushdownOptimizer::GetPushdownFilterTargets(LogicalOperator &op, // filter pushdown is not supported - no need to consider this node return; } + get.ResolveOperatorTypes(); + const auto bindings = get.GetColumnBindings(); for (auto &filter : columns) { if (filter.probe_column_index.table_index != get.table_index) { // the filter does not apply to the probe side here - bail-out return; } + // add storage type to columns + for (idx_t i = 0; i < bindings.size(); i++) { + if (filter.probe_column_index.column_index == bindings[i].column_index) { + filter.storage_type = get.types[i]; + break; + } + } + D_ASSERT(filter.storage_type != LogicalType::INVALID); } targets.emplace_back(get, std::move(columns)); break; @@ -205,23 +240,13 @@ void JoinFilterPushdownOptimizer::GenerateJoinFilters(LogicalComparisonJoin &joi default: continue; } - if (cond.left->GetExpressionType() != ExpressionType::BOUND_COLUMN_REF) { - // only bound column ref supported for now - continue; - } - if (cond.left->return_type.IsNested()) { - // nested columns are not supported for pushdown - continue; - } - if (cond.left->return_type.id() == LogicalTypeId::INTERVAL) { - // interval is not supported for pushdown + + JoinFilterPushdownColumn pushdown_col; + if (!PushdownJoinFilterExpression(*cond.left, pushdown_col)) { continue; } - JoinFilterPushdownColumn pushdown_col; - auto &colref = cond.left->Cast(); - pushdown_col.probe_column_index = colref.binding; - pushdown_columns.push_back(pushdown_col); + pushdown_columns.push_back(pushdown_col); pushdown_info->join_condition.push_back(cond_idx); } if (pushdown_columns.empty()) { diff --git a/src/duckdb/src/planner/operator/logical_update.cpp b/src/duckdb/src/planner/operator/logical_update.cpp index b2de4d12d..36ceaad5f 100644 --- a/src/duckdb/src/planner/operator/logical_update.cpp +++ b/src/duckdb/src/planner/operator/logical_update.cpp @@ -1,4 +1,6 @@ #include "duckdb/planner/operator/logical_update.hpp" +#include "duckdb/planner/operator/logical_projection.hpp" +#include "duckdb/planner/operator/logical_get.hpp" #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp" #include "duckdb/main/config.hpp" @@ -47,4 +49,153 @@ string LogicalUpdate::GetName() const { return LogicalOperator::GetName(); } +void LogicalUpdate::RewriteInPlaceUpdates(LogicalOperator &update_op) { + auto &update = update_op.Cast(); + + if (update.update_is_del_and_insert) { + return; + } + auto needs_reinsert = false; + for (auto &col : update.table.GetColumns().GetColumnTypes()) { + if (!col.SupportsRegularUpdate()) { + needs_reinsert = true; + break; + } + } + if (!needs_reinsert) { + return; + } + + // Okay, we are reading an old plan version that has in-place updates for a type that no longer supports it. + // We need to convert the update into a delete + insert. + + // The UPDATE operator always expects the rowid column to be the last. + auto rowid_binding = update_op.children.back()->GetColumnBindings().back(); + + // We're looking for the GET operator that produces this rowid, and therefore produce this binding. + // There might be projections in betweeen though, so we need to traverse through them. + auto target_binding = rowid_binding; + vector>> stack; + + // Collect all projections along the way, as well as the GET operator, so that we can update them later. + vector>> projections; + unique_ptr *get_op = nullptr; + + stack.emplace_back(update_op.children.back()); + + auto is_done = false; + + while (!stack.empty() && !is_done) { + auto ¤t = stack.back().get(); + stack.pop_back(); + + switch (current->type) { + case LogicalOperatorType::LOGICAL_PROJECTION: { + // Does this projection produce the target binding? + auto &proj = current->Cast(); + if (target_binding.table_index == proj.table_index) { + // This is the projection we're looking for! + projections.emplace_back(current); + + // Update the target binding. + target_binding = + proj.expressions[target_binding.column_index]->Cast().binding; + + // Traverse the child. + stack.push_back(proj.children.back()); + } + } break; + case LogicalOperatorType::LOGICAL_GET: { + // Is this the GET at the root of our projection chain? + + auto &get = current->Cast(); + if (target_binding.table_index == get.table_index) { + // We found the GET operator we're looking for, we can stop traversing. + is_done = true; + get_op = ¤t; + break; + } + } break; + default: { + // Otherwise, this is some random operator. Traverse all children! + for (auto &child : current->children) { + stack.emplace_back(child); + } + } + } + } + + // We should have found the GET operator by now, if not, this is an error in the plan. + if (!is_done || !get_op) { + throw InternalException("Could not find the expected GET operator in the LogicalUpdate operator children"); + } + + auto &get = get_op->get()->Cast(); + + // Now, we need to update the GET operator to include ALL the other columns. + physical_index_set_t all_columns; + for (auto &column : update.table.GetColumns().Physical()) { + all_columns.insert(column.Physical()); + } + + idx_t found_column_count = 0; + physical_index_set_t found_columns; + for (idx_t i = 0; i < update.columns.size(); i++) { + if (all_columns.find(update.columns[i]) != all_columns.end()) { + // this column is referenced already + found_column_count++; + found_columns.insert(update.columns[i]); + } + } + + if (found_column_count != all_columns.size()) { + for (auto &physical_id : all_columns) { + if (found_columns.find(physical_id) != found_columns.end()) { + // column is already projected + continue; + } + + auto &column = update.table.GetColumns().GetColumn(physical_id); + + // Add it to the GET operator + get.AddColumnId(column.Logical().index); + + // Now, for each projection on the path from the GET to the UPDATE, we need to add this column as well. + // We do this in backwards order, always adding a column reference to the previously added column, + // so that we end up with a chain of column references that all point to the newly added column in the GET. + + auto prev_col_idx = get.GetColumnIds().size() - 1; + auto prev_tbl_idx = get.GetTableIndex().back(); + + for (int64_t i = UnsafeNumericCast(projections.size()) - 1; i >= 0; i--) { + auto &proj = projections[UnsafeNumericCast(i)].get()->Cast(); + + if (i == 0) { + // This is the last projection, push the new columns next-to-last so that rowid remains last + proj.expressions.insert( + proj.expressions.end() - 1, + make_uniq(column.Type(), ColumnBinding(prev_tbl_idx, prev_col_idx))); + + prev_col_idx = proj.expressions.size() - 2; + prev_tbl_idx = proj.table_index; + } else { + proj.expressions.push_back( + make_uniq(column.Type(), ColumnBinding(prev_tbl_idx, prev_col_idx))); + + prev_col_idx = proj.expressions.size() - 1; + prev_tbl_idx = proj.table_index; + } + } + + // Finally, add the column to the UPDATE operator too + update.columns.push_back(physical_id); + update.expressions.push_back( + make_uniq(column.Type(), ColumnBinding(prev_tbl_idx, prev_col_idx))); + } + } + + update.update_is_del_and_insert = true; + update.ResolveOperatorTypes(); +} + } // namespace duckdb diff --git a/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp b/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp index 5e5dfcf0e..114a4143e 100644 --- a/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +++ b/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp @@ -189,6 +189,9 @@ unique_ptr LogicalOperator::Deserialize(Deserializer &deseriali } deserializer.Unset(); result->children = std::move(children); + if (type == LogicalOperatorType::LOGICAL_UPDATE) { + LogicalUpdate::RewriteInPlaceUpdates(*result); + } return result; }