diff --git a/src/duckdb/extension/json/include/json_executors.hpp b/src/duckdb/extension/json/include/json_executors.hpp index 121cbd0f5..8d511ef95 100644 --- a/src/duckdb/extension/json/include/json_executors.hpp +++ b/src/duckdb/extension/json/include/json_executors.hpp @@ -80,7 +80,7 @@ struct JSONExecutors { for (idx_t i = 0; i < vals.size(); i++) { auto &val = vals[i]; D_ASSERT(val != nullptr); // Wildcard extract shouldn't give back nullptrs - child_vals[current_size + i] = fun(val, alc, result, child_validity, current_size + i); + child_vals[current_size + i] = fun(val, alc, child_entry, child_validity, current_size + i); } ListVector::SetListSize(result, new_size); diff --git a/src/duckdb/extension/json/include/json_reader_options.hpp b/src/duckdb/extension/json/include/json_reader_options.hpp index 85c8d0d00..67f5f5059 100644 --- a/src/duckdb/extension/json/include/json_reader_options.hpp +++ b/src/duckdb/extension/json/include/json_reader_options.hpp @@ -43,7 +43,11 @@ struct DateFormatMap { auto &formats = candidate_formats[type]; formats.emplace_back(); formats.back().format_specifier = format_string; - StrpTimeFormat::ParseFormatSpecifier(formats.back().format_specifier, formats.back()); + const auto error = StrpTimeFormat::ParseFormatSpecifier(formats.back().format_specifier, formats.back()); + if (!error.empty()) { + formats.pop_back(); + throw InvalidInputException(error); + } } static bool HasFormats(const type_id_map_t> &candidate_formats, LogicalTypeId type) { diff --git a/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp b/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp index 543e0a00b..fd54dd0c5 100644 --- a/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +++ b/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp @@ -6,6 +6,7 @@ #include "json_deserializer.hpp" #include "json_functions.hpp" #include "json_serializer.hpp" +#include "duckdb/parser/parsed_expression_iterator.hpp" namespace duckdb { @@ -214,6 +215,11 @@ static vector> DeserializeSelectStatement(string_t i if (!stmt->node) { throw ParserException("Error parsing json: no select node found in json"); } + ParsedExpressionIterator::EnumerateQueryNodeChildren(*stmt->node, [](unique_ptr &child) { + if (!child) { + throw ParserException("Error parsing json: null expression found in json"); + } + }); result.push_back(std::move(stmt)); } diff --git a/src/duckdb/extension/json/json_functions/json_structure.cpp b/src/duckdb/extension/json/json_functions/json_structure.cpp index 7b17f25f7..fd419f27e 100644 --- a/src/duckdb/extension/json/json_functions/json_structure.cpp +++ b/src/duckdb/extension/json/json_functions/json_structure.cpp @@ -689,6 +689,9 @@ static double CalculateTypeSimilarity(const LogicalType &merged, const LogicalTy } case LogicalTypeId::LIST: { // Only lists can be merged into a list + if (type.id() != LogicalTypeId::LIST) { + return -1; + } D_ASSERT(type.id() == LogicalTypeId::LIST); const auto &merged_child_type = ListType::GetChildType(merged); const auto &type_child_type = ListType::GetChildType(type); diff --git a/src/duckdb/src/common/arrow/arrow_type_extension.cpp b/src/duckdb/src/common/arrow/arrow_type_extension.cpp index 97ece72f7..63fb46737 100644 --- a/src/duckdb/src/common/arrow/arrow_type_extension.cpp +++ b/src/duckdb/src/common/arrow/arrow_type_extension.cpp @@ -474,7 +474,6 @@ struct ArrowGeometry { duckdb_yyjson::yyjson_doc_free(projjson_doc); } else { - duckdb_yyjson::yyjson_mut_doc_free(doc); throw SerializationException("Could not parse PROJJSON CRS for GeoArrow metadata"); } } break; @@ -523,6 +522,7 @@ struct ArrowGeometry { duckdb_yyjson::yyjson_mut_doc_free(doc); free(json_text); } else { + duckdb_yyjson::yyjson_mut_doc_free(doc); schema_metadata.AddOption(ArrowSchemaMetadata::ARROW_METADATA_KEY, "{}"); } diff --git a/src/duckdb/src/common/printer.cpp b/src/duckdb/src/common/printer.cpp index 46187ac24..115c1cae0 100644 --- a/src/duckdb/src/common/printer.cpp +++ b/src/duckdb/src/common/printer.cpp @@ -73,8 +73,11 @@ idx_t Printer::TerminalWidth() { rows = csbi.srWindow.Right - csbi.srWindow.Left + 1; return rows; #else - struct winsize w; + struct winsize w = {}; ioctl(0, TIOCGWINSZ, &w); + if (w.ws_col == 0) { + return 120; + } return w.ws_col; #endif #else diff --git a/src/duckdb/src/common/types/value.cpp b/src/duckdb/src/common/types/value.cpp index 02f280a90..e68eb2431 100644 --- a/src/duckdb/src/common/types/value.cpp +++ b/src/duckdb/src/common/types/value.cpp @@ -1743,6 +1743,24 @@ string Value::ToSQLString() const { ret += "]"; return ret; } + case LogicalTypeId::MAP: { + // A bare `MAP {...}` literal infers its element types from the entries + // (and `MAP {}` infers MAP(INTEGER, INTEGER)), so it does not faithfully + // round-trip on its own. Append an explicit cast to the real type + auto &entries = MapValue::GetChildren(*this); + string ret = "MAP {"; + for (idx_t i = 0; i < entries.size(); i++) { + auto &kv = StructValue::GetChildren(entries[i]); + if (i > 0) { + ret += ", "; + } + ret += kv[0].ToSQLString(); + ret += ": "; + ret += kv[1].ToSQLString(); + } + ret += "}::" + type_.ToString(); + return ret; + } case LogicalTypeId::UNION: { string ret = "union_value("; auto union_tag = UnionValue::GetTag(*this); diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp index 673c6a642..f868efcd0 100644 --- a/src/duckdb/src/function/table/version/pragma_version.cpp +++ b/src/duckdb/src/function/table/version/pragma_version.cpp @@ -1,5 +1,5 @@ #ifndef DUCKDB_PATCH_VERSION -#define DUCKDB_PATCH_VERSION "3" +#define DUCKDB_PATCH_VERSION "4-dev41" #endif #ifndef DUCKDB_MINOR_VERSION #define DUCKDB_MINOR_VERSION 5 @@ -8,10 +8,10 @@ #define DUCKDB_MAJOR_VERSION 1 #endif #ifndef DUCKDB_VERSION -#define DUCKDB_VERSION "v1.5.3" +#define DUCKDB_VERSION "v1.5.4-dev41" #endif #ifndef DUCKDB_SOURCE_ID -#define DUCKDB_SOURCE_ID "14eca11bd9" +#define DUCKDB_SOURCE_ID "0bf2a03cb1" #endif #include "duckdb/function/table/system_functions.hpp" #include "duckdb/main/database.hpp" diff --git a/src/duckdb/src/include/duckdb/common/serializer/deserializer.hpp b/src/duckdb/src/include/duckdb/common/serializer/deserializer.hpp index f58edf615..df2e255d3 100644 --- a/src/duckdb/src/include/duckdb/common/serializer/deserializer.hpp +++ b/src/duckdb/src/include/duckdb/common/serializer/deserializer.hpp @@ -17,6 +17,7 @@ #include "duckdb/common/unordered_set.hpp" #include "duckdb/common/exception/parser_exception.hpp" #include "duckdb/execution/operator/csv_scanner/csv_reader_options.hpp" +#include "duckdb/storage/table/per_column_metadata_blocks.hpp" namespace duckdb { @@ -528,6 +529,12 @@ class Deserializer { return idx == DConstants::INVALID_INDEX ? optional_idx() : optional_idx(idx); } + // Deserialize a ProjectionIndex + template + inline typename std::enable_if::value, T>::type Read() { + return PerColumnMetadataBlock::Unpack(ReadUnsignedInt64()); + } + protected: // Hooks for subclasses to override to implement custom behavior virtual void OnPropertyBegin(const field_id_t field_id, const char *tag) = 0; diff --git a/src/duckdb/src/include/duckdb/common/serializer/serializer.hpp b/src/duckdb/src/include/duckdb/common/serializer/serializer.hpp index 97aeef51a..990680aef 100644 --- a/src/duckdb/src/include/duckdb/common/serializer/serializer.hpp +++ b/src/duckdb/src/include/duckdb/common/serializer/serializer.hpp @@ -23,6 +23,7 @@ #include "duckdb/execution/operator/csv_scanner/csv_option.hpp" #include "duckdb/main/config.hpp" #include "duckdb/common/insertion_order_preserving_map.hpp" +#include "duckdb/storage/table/per_column_metadata_blocks.hpp" namespace duckdb { @@ -374,6 +375,9 @@ class Serializer { void WriteValue(optional_idx value) { WriteValue(value.IsValid() ? value.GetIndex() : DConstants::INVALID_INDEX); } + void WriteValue(PerColumnMetadataBlock value) { + WriteValue(value.GetPacked()); + } }; // We need to special case vector because elements of vector cannot be referenced diff --git a/src/duckdb/src/include/duckdb/main/settings.hpp b/src/duckdb/src/include/duckdb/main/settings.hpp index 1c282384d..15a5570aa 100644 --- a/src/duckdb/src/include/duckdb/main/settings.hpp +++ b/src/duckdb/src/include/duckdb/main/settings.hpp @@ -929,6 +929,18 @@ struct ForceBitpackingModeSetting { static void OnSet(SettingCallbackInfo &info, Value &input); }; +struct ForceColumnMetadataReuseSetting { + using RETURN_TYPE = bool; + static constexpr const char *Name = "force_column_metadata_reuse"; + static constexpr const char *Description = + "Force re-use of row group metadata on a column-level when checkpointing on older storage versions 6 and 7. " + "This breaks storage backward-compatibility with older DuckDB versions."; + static constexpr const char *InputType = "BOOLEAN"; + static constexpr const char *DefaultValue = "false"; + static constexpr SettingScopeTarget Scope = SettingScopeTarget::GLOBAL_ONLY; + static constexpr idx_t SettingIndex = 51; +}; + struct ForceCompressionSetting { using RETURN_TYPE = CompressionType; static constexpr const char *Name = "force_compression"; @@ -936,7 +948,7 @@ struct ForceCompressionSetting { static constexpr const char *InputType = "VARCHAR"; static constexpr const char *DefaultValue = "auto"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::GLOBAL_ONLY; - static constexpr idx_t SettingIndex = 51; + static constexpr idx_t SettingIndex = 52; static void OnSet(SettingCallbackInfo &info, Value &input); }; @@ -969,7 +981,7 @@ struct GeometryMinimumShreddingSize { static constexpr const char *InputType = "BIGINT"; static constexpr const char *DefaultValue = "30000"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::GLOBAL_ONLY; - static constexpr idx_t SettingIndex = 52; + static constexpr idx_t SettingIndex = 53; }; struct HomeDirectorySetting { @@ -979,7 +991,7 @@ struct HomeDirectorySetting { static constexpr const char *InputType = "VARCHAR"; static constexpr const char *DefaultValue = ""; static constexpr SettingScopeTarget Scope = SettingScopeTarget::LOCAL_DEFAULT; - static constexpr idx_t SettingIndex = 53; + static constexpr idx_t SettingIndex = 54; static void OnSet(SettingCallbackInfo &info, Value &input); }; @@ -1012,7 +1024,7 @@ struct HTTPProxyPasswordSetting { static constexpr const char *InputType = "VARCHAR"; static constexpr const char *DefaultValue = ""; static constexpr SettingScopeTarget Scope = SettingScopeTarget::GLOBAL_ONLY; - static constexpr idx_t SettingIndex = 54; + static constexpr idx_t SettingIndex = 55; }; struct HTTPProxyUsernameSetting { @@ -1022,7 +1034,7 @@ struct HTTPProxyUsernameSetting { static constexpr const char *InputType = "VARCHAR"; static constexpr const char *DefaultValue = ""; static constexpr SettingScopeTarget Scope = SettingScopeTarget::GLOBAL_ONLY; - static constexpr idx_t SettingIndex = 55; + static constexpr idx_t SettingIndex = 56; }; struct IeeeFloatingPointOpsSetting { @@ -1033,7 +1045,7 @@ struct IeeeFloatingPointOpsSetting { static constexpr const char *InputType = "BOOLEAN"; static constexpr const char *DefaultValue = "true"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::LOCAL_DEFAULT; - static constexpr idx_t SettingIndex = 56; + static constexpr idx_t SettingIndex = 57; }; struct IgnoreUnknownCrsSetting { @@ -1044,7 +1056,7 @@ struct IgnoreUnknownCrsSetting { static constexpr const char *InputType = "BOOLEAN"; static constexpr const char *DefaultValue = "false"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::LOCAL_DEFAULT; - static constexpr idx_t SettingIndex = 57; + static constexpr idx_t SettingIndex = 58; }; struct ImmediateTransactionModeSetting { @@ -1055,7 +1067,7 @@ struct ImmediateTransactionModeSetting { static constexpr const char *InputType = "BOOLEAN"; static constexpr const char *DefaultValue = "false"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::GLOBAL_DEFAULT; - static constexpr idx_t SettingIndex = 58; + static constexpr idx_t SettingIndex = 59; }; struct IndexScanMaxCountSetting { @@ -1067,7 +1079,7 @@ struct IndexScanMaxCountSetting { static constexpr const char *InputType = "UBIGINT"; static constexpr const char *DefaultValue = "2048"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::GLOBAL_DEFAULT; - static constexpr idx_t SettingIndex = 59; + static constexpr idx_t SettingIndex = 60; }; struct IndexScanPercentageSetting { @@ -1079,7 +1091,7 @@ struct IndexScanPercentageSetting { static constexpr const char *InputType = "DOUBLE"; static constexpr const char *DefaultValue = "0.001"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::GLOBAL_DEFAULT; - static constexpr idx_t SettingIndex = 60; + static constexpr idx_t SettingIndex = 61; static void OnSet(SettingCallbackInfo &info, Value &input); }; @@ -1091,7 +1103,7 @@ struct IntegerDivisionSetting { static constexpr const char *InputType = "BOOLEAN"; static constexpr const char *DefaultValue = "false"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::LOCAL_DEFAULT; - static constexpr idx_t SettingIndex = 61; + static constexpr idx_t SettingIndex = 62; }; struct LambdaSyntaxSetting { @@ -1102,7 +1114,7 @@ struct LambdaSyntaxSetting { static constexpr const char *InputType = "VARCHAR"; static constexpr const char *DefaultValue = "DEFAULT"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::LOCAL_DEFAULT; - static constexpr idx_t SettingIndex = 62; + static constexpr idx_t SettingIndex = 63; static void OnSet(SettingCallbackInfo &info, Value &input); }; @@ -1114,7 +1126,7 @@ struct LateMaterializationMaxRowsSetting { static constexpr const char *InputType = "UBIGINT"; static constexpr const char *DefaultValue = "50"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::LOCAL_DEFAULT; - static constexpr idx_t SettingIndex = 63; + static constexpr idx_t SettingIndex = 64; }; struct LockConfigurationSetting { @@ -1124,7 +1136,7 @@ struct LockConfigurationSetting { static constexpr const char *InputType = "BOOLEAN"; static constexpr const char *DefaultValue = "false"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::GLOBAL_ONLY; - static constexpr idx_t SettingIndex = 64; + static constexpr idx_t SettingIndex = 65; }; struct LogQueryPathSetting { @@ -1135,7 +1147,7 @@ struct LogQueryPathSetting { static constexpr const char *InputType = "VARCHAR"; static constexpr const char *DefaultValue = ""; static constexpr SettingScopeTarget Scope = SettingScopeTarget::LOCAL_DEFAULT; - static constexpr idx_t SettingIndex = 65; + static constexpr idx_t SettingIndex = 66; static void OnSet(SettingCallbackInfo &info, Value &input); }; @@ -1178,7 +1190,7 @@ struct MaxExpressionDepthSetting { static constexpr const char *InputType = "UBIGINT"; static constexpr const char *DefaultValue = "1000"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::LOCAL_DEFAULT; - static constexpr idx_t SettingIndex = 66; + static constexpr idx_t SettingIndex = 67; }; struct MaxMemorySetting { @@ -1209,7 +1221,7 @@ struct MaxVacuumTasksSetting { static constexpr const char *InputType = "UBIGINT"; static constexpr const char *DefaultValue = "100"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::GLOBAL_ONLY; - static constexpr idx_t SettingIndex = 67; + static constexpr idx_t SettingIndex = 68; }; struct MergeJoinThresholdSetting { @@ -1219,7 +1231,7 @@ struct MergeJoinThresholdSetting { static constexpr const char *InputType = "UBIGINT"; static constexpr const char *DefaultValue = "1000"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::LOCAL_DEFAULT; - static constexpr idx_t SettingIndex = 68; + static constexpr idx_t SettingIndex = 69; }; struct NestedLoopJoinThresholdSetting { @@ -1230,7 +1242,7 @@ struct NestedLoopJoinThresholdSetting { static constexpr const char *InputType = "UBIGINT"; static constexpr const char *DefaultValue = "5"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::LOCAL_DEFAULT; - static constexpr idx_t SettingIndex = 69; + static constexpr idx_t SettingIndex = 70; }; struct OldImplicitCastingSetting { @@ -1240,7 +1252,7 @@ struct OldImplicitCastingSetting { static constexpr const char *InputType = "BOOLEAN"; static constexpr const char *DefaultValue = "false"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::GLOBAL_DEFAULT; - static constexpr idx_t SettingIndex = 70; + static constexpr idx_t SettingIndex = 71; }; struct OrderByNonIntegerLiteralSetting { @@ -1251,7 +1263,7 @@ struct OrderByNonIntegerLiteralSetting { static constexpr const char *InputType = "BOOLEAN"; static constexpr const char *DefaultValue = "false"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::LOCAL_DEFAULT; - static constexpr idx_t SettingIndex = 71; + static constexpr idx_t SettingIndex = 72; }; struct OrderedAggregateThresholdSetting { @@ -1261,7 +1273,7 @@ struct OrderedAggregateThresholdSetting { static constexpr const char *InputType = "UBIGINT"; static constexpr const char *DefaultValue = "262144"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::LOCAL_DEFAULT; - static constexpr idx_t SettingIndex = 72; + static constexpr idx_t SettingIndex = 73; static void OnSet(SettingCallbackInfo &info, Value &input); }; @@ -1273,7 +1285,7 @@ struct PartitionedWriteFlushThresholdSetting { static constexpr const char *InputType = "UBIGINT"; static constexpr const char *DefaultValue = "524288"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::LOCAL_DEFAULT; - static constexpr idx_t SettingIndex = 73; + static constexpr idx_t SettingIndex = 74; }; struct PartitionedWriteMaxOpenFilesSetting { @@ -1284,7 +1296,7 @@ struct PartitionedWriteMaxOpenFilesSetting { static constexpr const char *InputType = "UBIGINT"; static constexpr const char *DefaultValue = "100"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::LOCAL_DEFAULT; - static constexpr idx_t SettingIndex = 74; + static constexpr idx_t SettingIndex = 75; }; struct PasswordSetting { @@ -1294,7 +1306,7 @@ struct PasswordSetting { static constexpr const char *InputType = "VARCHAR"; static constexpr const char *DefaultValue = ""; static constexpr SettingScopeTarget Scope = SettingScopeTarget::GLOBAL_DEFAULT; - static constexpr idx_t SettingIndex = 75; + static constexpr idx_t SettingIndex = 76; }; struct PerfectHtThresholdSetting { @@ -1304,7 +1316,7 @@ struct PerfectHtThresholdSetting { static constexpr const char *InputType = "UBIGINT"; static constexpr const char *DefaultValue = "12"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::LOCAL_DEFAULT; - static constexpr idx_t SettingIndex = 76; + static constexpr idx_t SettingIndex = 77; static void OnSet(SettingCallbackInfo &info, Value &input); }; @@ -1316,7 +1328,7 @@ struct PinThreadsSetting { static constexpr const char *InputType = "VARCHAR"; static constexpr const char *DefaultValue = "auto"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::GLOBAL_ONLY; - static constexpr idx_t SettingIndex = 77; + static constexpr idx_t SettingIndex = 78; static void OnSet(SettingCallbackInfo &info, Value &input); }; @@ -1328,7 +1340,7 @@ struct PivotFilterThresholdSetting { static constexpr const char *InputType = "UBIGINT"; static constexpr const char *DefaultValue = "20"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::LOCAL_DEFAULT; - static constexpr idx_t SettingIndex = 78; + static constexpr idx_t SettingIndex = 79; }; struct PivotLimitSetting { @@ -1338,7 +1350,7 @@ struct PivotLimitSetting { static constexpr const char *InputType = "UBIGINT"; static constexpr const char *DefaultValue = "100000"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::LOCAL_DEFAULT; - static constexpr idx_t SettingIndex = 79; + static constexpr idx_t SettingIndex = 80; }; struct PreferRangeJoinsSetting { @@ -1348,7 +1360,7 @@ struct PreferRangeJoinsSetting { static constexpr const char *InputType = "BOOLEAN"; static constexpr const char *DefaultValue = "false"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::LOCAL_DEFAULT; - static constexpr idx_t SettingIndex = 80; + static constexpr idx_t SettingIndex = 81; }; struct PreserveIdentifierCaseSetting { @@ -1359,7 +1371,7 @@ struct PreserveIdentifierCaseSetting { static constexpr const char *InputType = "BOOLEAN"; static constexpr const char *DefaultValue = "true"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::LOCAL_DEFAULT; - static constexpr idx_t SettingIndex = 81; + static constexpr idx_t SettingIndex = 82; }; struct PreserveInsertionOrderSetting { @@ -1371,7 +1383,7 @@ struct PreserveInsertionOrderSetting { static constexpr const char *InputType = "BOOLEAN"; static constexpr const char *DefaultValue = "true"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::GLOBAL_DEFAULT; - static constexpr idx_t SettingIndex = 82; + static constexpr idx_t SettingIndex = 83; }; struct ProduceArrowStringViewSetting { @@ -1382,7 +1394,7 @@ struct ProduceArrowStringViewSetting { static constexpr const char *InputType = "BOOLEAN"; static constexpr const char *DefaultValue = "false"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::GLOBAL_DEFAULT; - static constexpr idx_t SettingIndex = 83; + static constexpr idx_t SettingIndex = 84; }; struct ProfileOutputSetting { @@ -1435,7 +1447,7 @@ struct ScalarSubqueryErrorOnMultipleRowsSetting { static constexpr const char *InputType = "BOOLEAN"; static constexpr const char *DefaultValue = "true"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::LOCAL_DEFAULT; - static constexpr idx_t SettingIndex = 84; + static constexpr idx_t SettingIndex = 85; }; struct SchedulerProcessPartialSetting { @@ -1450,7 +1462,7 @@ struct SchedulerProcessPartialSetting { static constexpr const char *DefaultValue = "false"; #endif static constexpr SettingScopeTarget Scope = SettingScopeTarget::GLOBAL_ONLY; - static constexpr idx_t SettingIndex = 85; + static constexpr idx_t SettingIndex = 86; }; struct SchemaSetting { @@ -1492,7 +1504,7 @@ struct StorageBlockPrefetchSetting { static constexpr const char *InputType = "VARCHAR"; static constexpr const char *DefaultValue = "REMOTE_ONLY"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::GLOBAL_ONLY; - static constexpr idx_t SettingIndex = 86; + static constexpr idx_t SettingIndex = 87; static void OnSet(SettingCallbackInfo &info, Value &input); }; @@ -1534,7 +1546,7 @@ struct TempFileEncryptionSetting { static constexpr const char *InputType = "BOOLEAN"; static constexpr const char *DefaultValue = "false"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::GLOBAL_ONLY; - static constexpr idx_t SettingIndex = 87; + static constexpr idx_t SettingIndex = 88; static void OnSet(SettingCallbackInfo &info, Value &input); }; @@ -1555,7 +1567,7 @@ struct UsernameSetting { static constexpr const char *InputType = "VARCHAR"; static constexpr const char *DefaultValue = ""; static constexpr SettingScopeTarget Scope = SettingScopeTarget::GLOBAL_DEFAULT; - static constexpr idx_t SettingIndex = 88; + static constexpr idx_t SettingIndex = 89; }; struct VacuumRebuildIndexesSetting { @@ -1567,7 +1579,7 @@ struct VacuumRebuildIndexesSetting { static constexpr const char *InputType = "UBIGINT"; static constexpr const char *DefaultValue = "0"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::GLOBAL_DEFAULT; - static constexpr idx_t SettingIndex = 89; + static constexpr idx_t SettingIndex = 90; static void OnSet(SettingCallbackInfo &info, Value &input); }; @@ -1580,7 +1592,7 @@ struct ValidateExternalFileCacheSetting { static constexpr const char *InputType = "VARCHAR"; static constexpr const char *DefaultValue = "VALIDATE_ALL"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::GLOBAL_DEFAULT; - static constexpr idx_t SettingIndex = 90; + static constexpr idx_t SettingIndex = 91; static void OnSet(SettingCallbackInfo &info, Value &input); }; @@ -1592,7 +1604,7 @@ struct VariantMinimumShreddingSizeSetting { static constexpr const char *InputType = "BIGINT"; static constexpr const char *DefaultValue = "30000"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::GLOBAL_ONLY; - static constexpr idx_t SettingIndex = 91; + static constexpr idx_t SettingIndex = 92; }; struct WalAutocheckpointEntriesSetting { @@ -1603,7 +1615,7 @@ struct WalAutocheckpointEntriesSetting { static constexpr const char *InputType = "UBIGINT"; static constexpr const char *DefaultValue = "0"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::GLOBAL_DEFAULT; - static constexpr idx_t SettingIndex = 92; + static constexpr idx_t SettingIndex = 93; }; struct WarningsAsErrorsSetting { @@ -1613,7 +1625,7 @@ struct WarningsAsErrorsSetting { static constexpr const char *InputType = "BOOLEAN"; static constexpr const char *DefaultValue = "false"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::GLOBAL_ONLY; - static constexpr idx_t SettingIndex = 93; + static constexpr idx_t SettingIndex = 94; static void OnSet(SettingCallbackInfo &info, Value &input); }; @@ -1625,7 +1637,7 @@ struct WriteBufferRowGroupCountSetting { static constexpr const char *InputType = "UBIGINT"; static constexpr const char *DefaultValue = "5"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::GLOBAL_DEFAULT; - static constexpr idx_t SettingIndex = 94; + static constexpr idx_t SettingIndex = 95; }; struct WriteBufferRowGroupMemoryLimitSetting { @@ -1649,11 +1661,11 @@ struct ZstdMinStringLengthSetting { static constexpr const char *InputType = "UBIGINT"; static constexpr const char *DefaultValue = "4096"; static constexpr SettingScopeTarget Scope = SettingScopeTarget::GLOBAL_ONLY; - static constexpr idx_t SettingIndex = 95; + static constexpr idx_t SettingIndex = 96; }; struct GeneratedSettingInfo { - static constexpr idx_t MaxSettingIndex = 96; + static constexpr idx_t MaxSettingIndex = 97; }; //===----------------------------------------------------------------------===// diff --git a/src/duckdb/src/include/duckdb/optimizer/window_self_join.hpp b/src/duckdb/src/include/duckdb/optimizer/window_self_join.hpp index bac7e9976..3ed80d451 100644 --- a/src/duckdb/src/include/duckdb/optimizer/window_self_join.hpp +++ b/src/duckdb/src/include/duckdb/optimizer/window_self_join.hpp @@ -16,6 +16,8 @@ namespace duckdb { class WindowSelfJoinOptimizer { public: + static bool CanOptimize(const LogicalOperator &op); + explicit WindowSelfJoinOptimizer(Optimizer &optimizer); unique_ptr Optimize(unique_ptr op); diff --git a/src/duckdb/src/include/duckdb/storage/data_pointer.hpp b/src/duckdb/src/include/duckdb/storage/data_pointer.hpp index 109874abc..3d7e5afdf 100644 --- a/src/duckdb/src/include/duckdb/storage/data_pointer.hpp +++ b/src/duckdb/src/include/duckdb/storage/data_pointer.hpp @@ -11,6 +11,7 @@ #include "duckdb/storage/statistics/base_statistics.hpp" #include "duckdb/storage/storage_info.hpp" #include "duckdb/storage/block.hpp" +#include "duckdb/storage/table/per_column_metadata_blocks.hpp" #include "duckdb/storage/table/row_group.hpp" #include "duckdb/common/enums/compression_type.hpp" @@ -75,7 +76,13 @@ struct RowGroupPointer { bool has_metadata_blocks = false; //! Metadata blocks of the columns that are not mentioned in "data_pointers" //! This is often empty - but can be set for wide columns with a lot of metadata + //! When targeting 2.0 storage format, per_column_metadata_blocks is used instead vector extra_metadata_blocks; + //! Whether or not we have per-column metadata blocks + bool has_per_column_metadata_blocks = false; + //! Per-column metadata blocks beyond the start block + //! Each column entry contains the additional block IDs that the column's metadata spans (excluding the start block) + PerColumnMetadataBlocks per_column_metadata_blocks; }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp b/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp index 29d160260..65f5a2414 100644 --- a/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp +++ b/src/duckdb/src/include/duckdb/storage/metadata/metadata_manager.hpp @@ -80,6 +80,7 @@ class MetadataManager { //! Flush all blocks to disk void Flush(); + bool BlockIsModified(const MetaBlockPointer &ptr); bool BlockHasBeenCleared(const MetaBlockPointer &ptr); void MarkBlocksAsModified(); diff --git a/src/duckdb/src/include/duckdb/storage/metadata/metadata_reader.hpp b/src/duckdb/src/include/duckdb/storage/metadata/metadata_reader.hpp index ce8d01b41..cb94abfb8 100644 --- a/src/duckdb/src/include/duckdb/storage/metadata/metadata_reader.hpp +++ b/src/duckdb/src/include/duckdb/storage/metadata/metadata_reader.hpp @@ -34,9 +34,6 @@ class MetadataReader : public ReadStream { MetadataManager &GetMetadataManager() { return manager; } - //! Gets a list of all remaining blocks to be read by this metadata reader - consumes all blocks - //! If "last_block" is specified, we stop when reaching that block - vector GetRemainingBlocks(MetaBlockPointer last_block = MetaBlockPointer()); private: data_ptr_t BasePtr(); diff --git a/src/duckdb/src/include/duckdb/storage/table/geo_column_data.hpp b/src/duckdb/src/include/duckdb/storage/table/geo_column_data.hpp index d842ec602..c5ec537ab 100644 --- a/src/duckdb/src/include/duckdb/storage/table/geo_column_data.hpp +++ b/src/duckdb/src/include/duckdb/storage/table/geo_column_data.hpp @@ -74,7 +74,7 @@ class GeoColumnData final : public ColumnData { private: static void Specialize(Vector &source, Vector &target, idx_t count, GeometryStorageType storage_type); static void Reassemble(Vector &source, Vector &target, idx_t count, GeometryStorageType storage_type, idx_t offset); - static void InterpretStats(BaseStatistics &source, BaseStatistics &target, GeometryType geom_type, + static void InterpretStats(const BaseStatistics &source, BaseStatistics &target, GeometryType geom_type, VertexType vert_type); }; diff --git a/src/duckdb/src/include/duckdb/storage/table/per_column_metadata_blocks.hpp b/src/duckdb/src/include/duckdb/storage/table/per_column_metadata_blocks.hpp new file mode 100644 index 000000000..0754878eb --- /dev/null +++ b/src/duckdb/src/include/duckdb/storage/table/per_column_metadata_blocks.hpp @@ -0,0 +1,54 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/storage/table/per_column_metadata_blocks.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "duckdb/common/typedefs.hpp" +#include "duckdb/common/vector.hpp" + +namespace duckdb { + +class Serializer; +class Deserializer; + +struct PerColumnMetadataBlock { + bool is_column_index : 1; + idx_t index : 63; + + idx_t GetPacked(); + + static PerColumnMetadataBlock Unpack(idx_t packed); +}; + +class PerColumnMetadataBlocks { +public: + //! Get block IDs for specific columns (linear scan), returns one vector per requested column + vector> GetBlocksForColumns(const vector &columns) const; + + //! Add a column entry with its block IDs + void AddColumn(idx_t col_idx, const vector &blocks); + //! Remove a column entry and all its block IDs (linear scan) + void RemoveColumn(idx_t col_idx); + + //! Iterate over all block IDs, passing (column_index, block_id) to the callback + template + void ForEachBlock(Func func) const { + idx_t current_col = 0; + for (auto &entry : data) { + if (entry.is_column_index) { + current_col = entry.index; + } else { + func(current_col, entry.index); + } + } + } + + vector data; +}; + +} // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/storage/table/row_group.hpp b/src/duckdb/src/include/duckdb/storage/table/row_group.hpp index 70fcf1ed9..aa15953dc 100644 --- a/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +++ b/src/duckdb/src/include/duckdb/storage/table/row_group.hpp @@ -12,6 +12,7 @@ #include "duckdb/storage/statistics/segment_statistics.hpp" #include "duckdb/common/types/data_chunk.hpp" #include "duckdb/common/enums/scan_options.hpp" +#include "duckdb/storage/table/per_column_metadata_blocks.hpp" #include "duckdb/common/mutex.hpp" #include "duckdb/parser/column_list.hpp" #include "duckdb/storage/table/segment_base.hpp" @@ -73,12 +74,17 @@ struct RowGroupWriteInfo { optional_ptr>> column_partial_block_managers; }; +enum class RowGroupWriteAction { + REUSE_EXISTING_ROW_GROUP_METADATA, + PARTIALLY_REUSE_COLUMN_METADATA, + FULLY_CHECKPOINT_ROW_GROUP +}; + struct RowGroupWriteData { shared_ptr result_row_group; vector> states; vector statistics; - bool reuse_existing_metadata_blocks = false; - vector existing_extra_metadata_blocks; + RowGroupWriteAction write_action = RowGroupWriteAction::FULLY_CHECKPOINT_ROW_GROUP; optional_idx write_count; }; @@ -107,11 +113,13 @@ class RowGroup : public SegmentBase { RowGroupCollection &GetCollection() const { return collection.get(); } - //! Returns the list of meta block pointers used by the columns - vector GetOrComputeExtraMetadataBlocks(bool force_compute = false); + //! Compute per-column metadata blocks by reading column metadata from disk + PerColumnMetadataBlocks ComputePerColumnMetadataBlocks() const; const vector &GetColumnStartPointers() const; + vector GetExtraMetadataBlockPointers() const; + BlockManager &GetBlockManager() const; DataTableInfo &GetTableInfo() const; @@ -205,7 +213,7 @@ class RowGroup : public SegmentBase { RowVersionManager &GetOrCreateVersionInfo(); // Serialization - static void Serialize(RowGroupPointer &pointer, Serializer &serializer); + static void Serialize(RowGroupPointer &pointer, Serializer &serializer, bool supports_per_column_writes); static RowGroupPointer Deserialize(Deserializer &deserializer); idx_t GetRowGroupSize() const; @@ -232,8 +240,14 @@ class RowGroup : public SegmentBase { vector> &GetColumns(); void LoadRowIdColumnData() const; void SetCount(idx_t count); + bool ColumnIsLoaded(storage_t c) const; + void UnloadColumn(storage_t c); + bool HasUnchangedColumns() const; + static shared_ptr CheckpointColumn(const RowGroup &row_group, idx_t column_idx, RowGroupWriteInfo &info, + RowGroupWriteData &write_data); bool HasUnloadedDeletes() const; + unique_ptr CreateNewRowGroupCopy(RowGroupCollection &new_collection, idx_t new_column_count); private: mutable mutex row_group_lock; @@ -243,6 +257,8 @@ class RowGroup : public SegmentBase { vector deletes_pointers; bool has_metadata_blocks = false; vector extra_metadata_blocks; + bool has_per_column_metadata_blocks = false; + PerColumnMetadataBlocks per_column_metadata_blocks; atomic deletes_is_loaded; atomic allocation_size; //! The row id column data (mutable because `const` can lazy load) diff --git a/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp b/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp index aebd74042..3a7c7a2cc 100644 --- a/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +++ b/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp @@ -127,7 +127,8 @@ class RowGroupCollection { void CommitDropTable(); vector GetPartitionStats() const; - vector GetColumnSegmentInfo(const QueryContext &context); + vector GetColumnSegmentInfo(const QueryContext &context) const; + bool SupportsPerColumnWrites(); const vector &GetTypes() const; shared_ptr AddColumn(ClientContext &context, ColumnDefinition &new_column, diff --git a/src/duckdb/src/include/duckdb/storage/temporary_file_manager.hpp b/src/duckdb/src/include/duckdb/storage/temporary_file_manager.hpp index e292ca28a..2651cfb32 100644 --- a/src/duckdb/src/include/duckdb/storage/temporary_file_manager.hpp +++ b/src/duckdb/src/include/duckdb/storage/temporary_file_manager.hpp @@ -97,6 +97,8 @@ struct BlockIndexManager { idx_t GetMaxIndex() const; //! Whether there are free blocks available within the file bool HasFreeBlocks() const; + //! Get the count of blocks currently holding data + idx_t GetUsedBlockCount() const; private: //! Get/set max block index diff --git a/src/duckdb/src/main/config.cpp b/src/duckdb/src/main/config.cpp index 5615820aa..bbd201ec7 100644 --- a/src/duckdb/src/main/config.cpp +++ b/src/duckdb/src/main/config.cpp @@ -142,6 +142,7 @@ static const ConfigurationOption internal_options[] = { DUCKDB_SETTING_CALLBACK(ExternalThreadsSetting), DUCKDB_SETTING(FileSearchPathSetting), DUCKDB_SETTING_CALLBACK(ForceBitpackingModeSetting), + DUCKDB_SETTING(ForceColumnMetadataReuseSetting), DUCKDB_SETTING_CALLBACK(ForceCompressionSetting), DUCKDB_GLOBAL(ForceMbedtlsUnsafeSetting), DUCKDB_GLOBAL(ForceVariantShredding), @@ -210,12 +211,12 @@ static const ConfigurationOption internal_options[] = { DUCKDB_SETTING(ZstdMinStringLengthSetting), FINAL_SETTING}; -static const ConfigurationAlias setting_aliases[] = {DUCKDB_SETTING_ALIAS("memory_limit", 100), +static const ConfigurationAlias setting_aliases[] = {DUCKDB_SETTING_ALIAS("memory_limit", 101), DUCKDB_SETTING_ALIAS("null_order", 43), - DUCKDB_SETTING_ALIAS("profiling_output", 119), - DUCKDB_SETTING_ALIAS("user", 134), + DUCKDB_SETTING_ALIAS("profiling_output", 120), + DUCKDB_SETTING_ALIAS("user", 135), DUCKDB_SETTING_ALIAS("wal_autocheckpoint", 25), - DUCKDB_SETTING_ALIAS("worker_threads", 133), + DUCKDB_SETTING_ALIAS("worker_threads", 134), FINAL_ALIAS}; vector DBConfig::GetOptions() { diff --git a/src/duckdb/src/optimizer/topn_window_elimination.cpp b/src/duckdb/src/optimizer/topn_window_elimination.cpp index f18657150..d8a69028f 100644 --- a/src/duckdb/src/optimizer/topn_window_elimination.cpp +++ b/src/duckdb/src/optimizer/topn_window_elimination.cpp @@ -917,7 +917,7 @@ bool TopNWindowElimination::CanUseLateMaterialization(const LogicalWindow &windo if (projection_idx >= projection.expressions.size()) { return false; } - if (!ExtractSingleBinding(&projection.expressions[projection_idx], projections[i])) { + if (!ExtractSingleBinding(&projection.expressions[projection_idx], projections[i], true)) { return false; } } diff --git a/src/duckdb/src/optimizer/window_self_join.cpp b/src/duckdb/src/optimizer/window_self_join.cpp index bbc2d88eb..0b81c4535 100644 --- a/src/duckdb/src/optimizer/window_self_join.cpp +++ b/src/duckdb/src/optimizer/window_self_join.cpp @@ -8,8 +8,6 @@ #include "duckdb/planner/expression/bound_columnref_expression.hpp" #include "duckdb/planner/expression/bound_aggregate_expression.hpp" #include "duckdb/function/aggregate_state.hpp" -#include "duckdb/planner/logical_operator_visitor.hpp" -#include "duckdb/planner/operator/logical_get.hpp" #include "duckdb/planner/logical_operator_deep_copy.hpp" namespace duckdb { @@ -103,6 +101,24 @@ bool WindowSelfJoinOptimizer::CanOptimize(const BoundWindowExpression &w_expr, return true; } +bool WindowSelfJoinOptimizer::CanOptimize(const LogicalOperator &op) { + switch (op.type) { + case LogicalOperatorType::LOGICAL_GET: + case LogicalOperatorType::LOGICAL_EXPRESSION_GET: + case LogicalOperatorType::LOGICAL_PROJECTION: + case LogicalOperatorType::LOGICAL_AGGREGATE_AND_GROUP_BY: + case LogicalOperatorType::LOGICAL_DUMMY_SCAN: + case LogicalOperatorType::LOGICAL_FILTER: + if (!op.children.empty()) { + return CanOptimize(*op.children[0]); + } + return true; + default: + break; + } + return false; +} + unique_ptr WindowSelfJoinOptimizer::OptimizeInternal(unique_ptr op, ColumnBindingReplacer &replacer) { if (op->type == LogicalOperatorType::LOGICAL_WINDOW) { @@ -111,6 +127,10 @@ unique_ptr WindowSelfJoinOptimizer::OptimizeInternal(unique_ptr // Check recursively window.children[0] = OptimizeInternal(std::move(window.children[0]), replacer); + if (!CanOptimize(*window.children[0])) { + return op; + } + auto &w_expr0 = window.expressions[0]->Cast(); for (auto &expr : window.expressions) { auto &w_expr = expr->Cast(); @@ -128,7 +148,7 @@ unique_ptr WindowSelfJoinOptimizer::OptimizeInternal(unique_ptr LogicalOperatorDeepCopy deep_copy(optimizer.binder, nullptr); try { copy_child = deep_copy.DeepCopy(window.children[0]); - } catch (NotImplementedException &ex) { + } catch (std::exception &ex) { // failed to copy the LHS - cannot run this optimizer return op; } diff --git a/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp b/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp index 4572a3a36..8cbec4cd2 100644 --- a/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +++ b/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp @@ -123,10 +123,13 @@ unique_ptr Transformer::CreatePivotStatement(unique_ptrcolumn->ToString()); } - result->statements.push_back(GenerateCreateEnumStmt(std::move(pivot))); + auto enum_stmt = GenerateCreateEnumStmt(std::move(pivot)); + enum_stmt->query = enum_stmt->ToString(); + result->statements.push_back(std::move(enum_stmt)); } result->stmt_location = statement->stmt_location; result->stmt_length = statement->stmt_length; + statement->query = statement->ToString(); result->statements.push_back(std::move(statement)); // FIXME: drop the types again!? // for(auto &pivot : pivot_entries) { diff --git a/src/duckdb/src/storage/checkpoint/table_data_writer.cpp b/src/duckdb/src/storage/checkpoint/table_data_writer.cpp index ffd781c0a..7c4f9bf15 100644 --- a/src/duckdb/src/storage/checkpoint/table_data_writer.cpp +++ b/src/duckdb/src/storage/checkpoint/table_data_writer.cpp @@ -96,6 +96,7 @@ void SingleFileTableDataWriter::FinalizeTable(const TableStatistics &global_stat idx_t total_rows; auto debug_verify_blocks = Settings::Get(GetDatabase()); if (!existing_pointer.IsValid()) { + auto supports_per_column_writes = collection.SupportsPerColumnWrites(); // write the metadata // store the current position in the metadata writer // this is where the row groups for this table start @@ -121,7 +122,7 @@ void SingleFileTableDataWriter::FinalizeTable(const TableStatistics &global_stat // Each RowGroup is its own unit BinarySerializer row_group_serializer(table_data_writer, serializer.GetOptions()); row_group_serializer.Begin(); - RowGroup::Serialize(row_group_pointer, row_group_serializer); + RowGroup::Serialize(row_group_pointer, row_group_serializer, supports_per_column_writes); row_group_serializer.End(); } table_data_writer.SetWrittenPointers(nullptr); diff --git a/src/duckdb/src/storage/metadata/metadata_manager.cpp b/src/duckdb/src/storage/metadata/metadata_manager.cpp index 91db5f75e..5056132b6 100644 --- a/src/duckdb/src/storage/metadata/metadata_manager.cpp +++ b/src/duckdb/src/storage/metadata/metadata_manager.cpp @@ -425,6 +425,17 @@ void MetadataManager::ClearModifiedBlocks(const vector &pointe } } +bool MetadataManager::BlockIsModified(const MetaBlockPointer &pointer) { + unique_lock guard(block_lock); + auto block_id = pointer.GetBlockId(); + auto entry = blocks.find(block_id); + if (entry == blocks.end()) { + throw InternalException("BlockIsNotModified - Block id %llu not found in blocks", block_id); + } + auto entry2 = modified_blocks.find(block_id); + return entry2 != modified_blocks.end(); +} + bool MetadataManager::BlockHasBeenCleared(const MetaBlockPointer &pointer) { unique_lock guard(block_lock); auto block_id = pointer.GetBlockId(); diff --git a/src/duckdb/src/storage/metadata/metadata_reader.cpp b/src/duckdb/src/storage/metadata/metadata_reader.cpp index 342833448..2c2839a85 100644 --- a/src/duckdb/src/storage/metadata/metadata_reader.cpp +++ b/src/duckdb/src/storage/metadata/metadata_reader.cpp @@ -53,18 +53,6 @@ MetaBlockPointer MetadataReader::GetMetaBlockPointer() { return manager.GetDiskPointer(block.pointer, UnsafeNumericCast(offset)); } -vector MetadataReader::GetRemainingBlocks(MetaBlockPointer last_block) { - vector result; - while (has_next_block) { - if (last_block.IsValid() && next_pointer.block_pointer == last_block.block_pointer) { - break; - } - result.push_back(next_pointer); - ReadNextBlock(); - } - return result; -} - void MetadataReader::ReadNextBlock() { ReadNextBlock(QueryContext()); } diff --git a/src/duckdb/src/storage/table/geo_column_data.cpp b/src/duckdb/src/storage/table/geo_column_data.cpp index 2a5e936d1..efe26a870 100644 --- a/src/duckdb/src/storage/table/geo_column_data.cpp +++ b/src/duckdb/src/storage/table/geo_column_data.cpp @@ -345,6 +345,20 @@ unique_ptr GeoColumnData::Checkpoint(const RowGroup &row_ checkpoint_state->inner_column = base_column; checkpoint_state->inner_column_state = checkpoint_state->inner_column->Checkpoint(row_group, info, old_column_stats); + + if (base_column->GetType().id() == LogicalTypeId::GEOMETRY) { + // Get the stats from the base column. + checkpoint_state->global_stats = checkpoint_state->inner_column_state->GetStatistics(); + } else { + // Otherwise interpret stats from shredded column + const auto types = Geometry::GetSpecializedType(checkpoint_state->storage_type); + const auto gtype = types.first; + const auto vtype = types.second; + + auto new_stats = checkpoint_state->inner_column_state->GetStatistics(); + InterpretStats(*new_stats, *checkpoint_state->global_stats, gtype, vtype); + } + return std::move(checkpoint_state); } @@ -592,7 +606,7 @@ void GeoColumnData::Reassemble(Vector &source, Vector &target, idx_t count, Geom Geometry::FromVectorizedFormat(source, target, count, type, result_offset); } -static const BaseStatistics *GetVertexStats(BaseStatistics &stats, GeometryType geom_type) { +static const BaseStatistics *GetVertexStats(const BaseStatistics &stats, GeometryType geom_type) { switch (geom_type) { case GeometryType::POINT: { return StructStats::GetChildStats(stats); @@ -627,7 +641,7 @@ static const BaseStatistics *GetVertexStats(BaseStatistics &stats, GeometryType } } -void GeoColumnData::InterpretStats(BaseStatistics &source, BaseStatistics &target, GeometryType geom_type, +void GeoColumnData::InterpretStats(const BaseStatistics &source, BaseStatistics &target, GeometryType geom_type, VertexType vert_type) { // Copy base stats target.CopyBase(source); diff --git a/src/duckdb/src/storage/table/per_column_metadata_blocks.cpp b/src/duckdb/src/storage/table/per_column_metadata_blocks.cpp new file mode 100644 index 000000000..5ec3f82f3 --- /dev/null +++ b/src/duckdb/src/storage/table/per_column_metadata_blocks.cpp @@ -0,0 +1,98 @@ +#include "duckdb/storage/table/per_column_metadata_blocks.hpp" +#include "duckdb/common/serializer/serializer.hpp" +#include "duckdb/common/serializer/deserializer.hpp" + +namespace duckdb { + +static constexpr idx_t COLUMN_INDEX_BIT = idx_t(1) << 63; + +idx_t PerColumnMetadataBlock::GetPacked() { + idx_t packed = index; + if (is_column_index) { + packed |= COLUMN_INDEX_BIT; + } + return packed; +} + +PerColumnMetadataBlock PerColumnMetadataBlock::Unpack(idx_t packed) { + PerColumnMetadataBlock result; + result.is_column_index = (packed & COLUMN_INDEX_BIT) != 0; + result.index = packed & ~COLUMN_INDEX_BIT; + return result; +} + +vector> PerColumnMetadataBlocks::GetBlocksForColumns(const vector &columns) const { + vector> result(columns.size()); + if (columns.empty()) { + return result; + } + idx_t col_pos = 0; + bool collecting = false; + for (auto &entry : data) { + if (entry.is_column_index) { + collecting = false; + // skip past requested columns that are before the current entry + while (col_pos < columns.size() && columns[col_pos] < entry.index) { + col_pos++; + } + if (col_pos >= columns.size()) { + break; + } + if (columns[col_pos] == entry.index) { + collecting = true; + } + } else if (collecting) { + result[col_pos].push_back(entry.index); + } + } + return result; +} + +void PerColumnMetadataBlocks::AddColumn(idx_t col_idx, const vector &blocks) { + if (blocks.empty()) { + return; + } +#ifdef D_ASSERT_IS_ENABLED + // assert sorted insertion: col_idx must be greater than the last column index + for (idx_t i = data.size(); i > 0; i--) { + if (data[i - 1].is_column_index) { + D_ASSERT(col_idx > data[i - 1].index); + break; + } + } +#endif + PerColumnMetadataBlock marker; + marker.is_column_index = true; + marker.index = col_idx; + data.push_back(marker); + for (auto &block_id : blocks) { + PerColumnMetadataBlock block; + block.is_column_index = false; + block.index = block_id; + data.push_back(block); + } +} + +void PerColumnMetadataBlocks::RemoveColumn(idx_t col_idx) { + idx_t start = data.size(); + idx_t end = data.size(); + for (idx_t i = 0; i < data.size(); i++) { + if (data[i].is_column_index && data[i].index == col_idx) { + start = i; + // find the end: next column marker or end of data + end = data.size(); + for (idx_t j = i + 1; j < data.size(); j++) { + if (data[j].is_column_index) { + end = j; + break; + } + } + break; + } + } + if (start < data.size()) { + data.erase(data.begin() + NumericCast(start), data.begin() + NumericCast(end)); + } +} + +} // namespace duckdb diff --git a/src/duckdb/src/storage/table/row_group.cpp b/src/duckdb/src/storage/table/row_group.cpp index 9af7d20ee..e333e3c31 100644 --- a/src/duckdb/src/storage/table/row_group.cpp +++ b/src/duckdb/src/storage/table/row_group.cpp @@ -51,6 +51,8 @@ RowGroup::RowGroup(RowGroupCollection &collection_p, RowGroupPointer pointer) this->deletes_pointers = std::move(pointer.deletes_pointers); this->has_metadata_blocks = pointer.has_metadata_blocks; this->extra_metadata_blocks = std::move(pointer.extra_metadata_blocks); + this->has_per_column_metadata_blocks = pointer.has_per_column_metadata_blocks; + this->per_column_metadata_blocks = std::move(pointer.per_column_metadata_blocks); Verify(); } @@ -94,6 +96,13 @@ void RowGroup::MoveToCollection(RowGroupCollection &collection_p) { RowGroup::~RowGroup() { } +bool RowGroup::ColumnIsLoaded(storage_t c) const { + if (!is_loaded) { + return true; + } + return is_loaded[c]; +} + vector> &RowGroup::GetColumns() { // ensure all columns are loaded for (idx_t c = 0; c < GetColumnCount(); c++) { @@ -375,6 +384,24 @@ bool RowGroup::InitializeScan(CollectionScanState &state, SegmentNode return true; } +unique_ptr RowGroup::CreateNewRowGroupCopy(RowGroupCollection &new_collection, idx_t new_column_count) { + auto row_group = make_uniq(new_collection, this->count); + row_group->deletes_pointers = deletes_pointers; + row_group->deletes_is_loaded = deletes_is_loaded.load(); + row_group->owned_version_info = owned_version_info; + row_group->version_info = version_info.load(); + row_group->columns.resize(new_column_count); + if (is_loaded) { + row_group->is_loaded = unique_ptr[]>(new atomic[new_column_count]); + } + if (!column_pointers.empty()) { + row_group->column_pointers.resize(new_column_count); + } + row_group->has_per_column_metadata_blocks = has_per_column_metadata_blocks; + row_group->has_changes = true; + return row_group; +} + unique_ptr RowGroup::AlterType(RowGroupCollection &new_collection, const LogicalType &target_type, idx_t changed_idx, ExpressionExecutor &executor, CollectionScanState &scan_state, SegmentNode &node, @@ -408,20 +435,37 @@ unique_ptr RowGroup::AlterType(RowGroupCollection &new_collection, con column_data->Append(append_state, append_vector, scan_chunk.size()); } - // set up the row_group based on this row_group - auto row_group = make_uniq(new_collection, this->count); - row_group->SetVersionInfo(GetOrCreateVersionInfoPtr()); - auto &cols = GetColumns(); - for (idx_t i = 0; i < cols.size(); i++) { + auto supports_per_column_writes = new_collection.SupportsPerColumnWrites(); + if (!supports_per_column_writes) { + // ensure all columns are loaded, as checkpointing will need to load them anyway, and it's better to fail-fast + // in case these don't fit in memory here + GetColumns(); + } + unique_lock lock(row_group_lock); + auto row_group = CreateNewRowGroupCopy(new_collection, columns.size()); + // copy existing columns, but swap out the one at changed_idx + for (idx_t i = 0; i < columns.size(); i++) { if (i == changed_idx) { - // this is the altered column: use the new column - row_group->columns.push_back(std::move(column_data)); + row_group->columns[i] = std::move(column_data); + if (row_group->is_loaded) { + row_group->is_loaded[i] = true; + } column_data.reset(); } else { - // this column was not altered: use the data directly - row_group->columns.push_back(cols[i]); + row_group->columns[i] = columns[i]; + if (row_group->is_loaded) { + row_group->is_loaded[i] = is_loaded[i].load(); + } + if (!row_group->column_pointers.empty()) { + row_group->column_pointers[i] = column_pointers[i]; + } } } + if (has_per_column_metadata_blocks) { + row_group->per_column_metadata_blocks = per_column_metadata_blocks; + row_group->per_column_metadata_blocks.RemoveColumn(changed_idx); + } + lock.unlock(); row_group->Verify(); return row_group; } @@ -448,13 +492,33 @@ unique_ptr RowGroup::AddColumn(RowGroupCollection &new_collection, Col } } - // set up the row_group based on this row_group - auto row_group = make_uniq(new_collection, this->count); - row_group->SetVersionInfo(GetOrCreateVersionInfoPtr()); - row_group->columns = GetColumns(); - // now add the new column - row_group->columns.push_back(std::move(added_column)); + if (!new_collection.SupportsPerColumnWrites()) { + // ensure all columns are loaded, as checkpointing will need to load them anyway, and it's better to fail-fast + // in case these don't fit in memory here + GetColumns(); + } + unique_lock lock(row_group_lock); + auto row_group = CreateNewRowGroupCopy(new_collection, columns.size() + 1); + // copy existing columns + for (idx_t i = 0; i < columns.size(); i++) { + row_group->columns[i] = columns[i]; + if (row_group->is_loaded) { + row_group->is_loaded[i] = is_loaded[i].load(); + } + if (!row_group->column_pointers.empty()) { + row_group->column_pointers[i] = column_pointers[i]; + } + } + if (has_per_column_metadata_blocks) { + row_group->per_column_metadata_blocks = per_column_metadata_blocks; + } + // add the new column + row_group->columns[columns.size()] = std::move(added_column); + if (row_group->is_loaded) { + row_group->is_loaded[columns.size()] = true; + } + lock.unlock(); row_group->Verify(); return row_group; } @@ -464,16 +528,33 @@ unique_ptr RowGroup::RemoveColumn(RowGroupCollection &new_collection, D_ASSERT(removed_column < columns.size()); - auto row_group = make_uniq(new_collection, this->count); - row_group->SetVersionInfo(GetOrCreateVersionInfoPtr()); + if (!new_collection.SupportsPerColumnWrites()) { + // ensure all columns are loaded, as checkpointing will need to load them anyway, and it's better to fail-fast + // in case these don't fit in memory here + GetColumns(); + } + unique_lock lock(row_group_lock); + auto row_group = CreateNewRowGroupCopy(new_collection, columns.size() - 1); // copy over all columns except for the removed one - auto &cols = GetColumns(); - for (idx_t i = 0; i < cols.size(); i++) { - if (i != removed_column) { - row_group->columns.push_back(cols[i]); + idx_t target_idx = 0; + for (idx_t i = 0; i < columns.size(); i++) { + if (i == removed_column) { + continue; } + row_group->columns[target_idx] = columns[i]; + if (row_group->is_loaded) { + row_group->is_loaded[target_idx] = is_loaded[i].load(); + } + if (!row_group->column_pointers.empty()) { + row_group->column_pointers[target_idx] = column_pointers[i]; + } + target_idx++; } - + if (has_per_column_metadata_blocks) { + row_group->per_column_metadata_blocks = per_column_metadata_blocks; + row_group->per_column_metadata_blocks.RemoveColumn(removed_column); + } + lock.unlock(); row_group->Verify(); return row_group; } @@ -782,7 +863,7 @@ optional_ptr RowGroup::GetVersionInfo() { if (!HasUnloadedDeletes()) { return version_info; } - // deletes are not loaded - reload + D_ASSERT(!deletes_pointers.empty()); auto root_delete = deletes_pointers[0]; auto loaded_info = RowVersionManager::Deserialize(root_delete, GetBlockManager().GetMetadataManager()); SetVersionInfo(std::move(loaded_info)); @@ -1045,6 +1126,23 @@ CompressionType ColumnCheckpointInfo::GetCompressionType() { return info.compression_types[column_idx]; } +shared_ptr RowGroup::CheckpointColumn(const RowGroup &row_group, idx_t column_idx, RowGroupWriteInfo &info, + RowGroupWriteData &write_data) { + auto &column = row_group.GetColumn(column_idx); + ColumnCheckpointInfo checkpoint_info(info, column_idx); + auto checkpoint_state = column.Checkpoint(row_group, checkpoint_info); + + auto result_col = checkpoint_state->GetFinalResult(); + // FIXME: we should get rid of the checkpoint state statistics - and instead use the stats in the ColumnData + // directly + auto stats = checkpoint_state->GetStatistics(); + result_col->MergeStatistics(*stats); + + write_data.statistics.push_back(stats->Copy()); + write_data.states.push_back(std::move(checkpoint_state)); + return result_col; +} + vector RowGroup::WriteToDisk(RowGroupWriteInfo &info, const vector> &row_groups) { vector result; @@ -1078,20 +1176,8 @@ vector RowGroup::WriteToDisk(RowGroupWriteInfo &info, for (idx_t column_idx = 0; column_idx < column_count; column_idx++) { for (idx_t row_group_idx = 0; row_group_idx < row_groups.size(); row_group_idx++) { auto &row_group = row_groups[row_group_idx].get(); - auto &row_group_write_data = result[row_group_idx]; - auto &column = row_group.GetColumn(column_idx); - ColumnCheckpointInfo checkpoint_info(info, column_idx); - auto checkpoint_state = column.Checkpoint(row_group, checkpoint_info); - - auto result_col = checkpoint_state->GetFinalResult(); - // FIXME: we should get rid of the checkpoint state statistics - and instead use the stats in the ColumnData - // directly - auto stats = checkpoint_state->GetStatistics(); - result_col->MergeStatistics(*stats); - - result_columns[row_group_idx].push_back(std::move(result_col)); - row_group_write_data.statistics.push_back(stats->Copy()); - row_group_write_data.states.push_back(std::move(checkpoint_state)); + result_columns[row_group_idx].emplace_back( + CheckpointColumn(row_group, column_idx, info, result[row_group_idx])); } } @@ -1134,46 +1220,50 @@ bool RowGroup::HasUnloadedDeletes() const { return !deletes_is_loaded; } -vector RowGroup::GetOrComputeExtraMetadataBlocks(bool force_compute) { - if (has_metadata_blocks && !force_compute) { - return extra_metadata_blocks; - } +PerColumnMetadataBlocks RowGroup::ComputePerColumnMetadataBlocks() const { + PerColumnMetadataBlocks result; if (column_pointers.empty()) { - // no pointers - return {}; - } - vector read_pointers; - // column_pointers stores the beginning of each column - // if columns are big - they may span multiple metadata blocks - // we need to figure out all blocks that this row group points to - // we need to follow the linked list in the metadata blocks to allow for this + return result; + } + auto &metadata_manager = GetCollection().GetMetadataManager(); - idx_t last_idx = column_pointers.size() - 1; - if (column_pointers.size() > 1) { - // for all but the last column pointer - we can just follow the linked list until we reach the last column - MetadataReader reader(metadata_manager, column_pointers[0]); - auto last_pointer = column_pointers[last_idx]; - read_pointers = reader.GetRemainingBlocks(last_pointer); - } - // for the last column we need to deserialize the column - because we don't know where it stops auto &types = GetCollection().GetTypes(); - MetadataReader reader(metadata_manager, column_pointers[last_idx], &read_pointers); - ColumnData::Deserialize(GetBlockManager(), GetTableInfo(), last_idx, reader, types[last_idx]); - unordered_set result_as_set; - for (auto &ptr : read_pointers) { - result_as_set.emplace(ptr.block_pointer); - } - for (auto &ptr : column_pointers) { - result_as_set.erase(ptr.block_pointer); + for (idx_t i = 0; i < column_pointers.size(); i++) { + auto &start = column_pointers[i]; + vector col_read_pointers; + MetadataReader col_reader(metadata_manager, start, &col_read_pointers); + ColumnData::Deserialize(GetBlockManager(), GetTableInfo(), i, col_reader, types[i]); + vector extra_blocks; + for (auto &ptr : col_read_pointers) { + if (ptr.block_pointer != start.block_pointer) { + extra_blocks.emplace_back(ptr.block_pointer); + } + } + result.AddColumn(i, extra_blocks); } - return {result_as_set.begin(), result_as_set.end()}; + return result; } const vector &RowGroup::GetColumnStartPointers() const { return column_pointers; } +vector RowGroup::GetExtraMetadataBlockPointers() const { + vector extra_metadata_block_pointers; + if (has_per_column_metadata_blocks) { + per_column_metadata_blocks.ForEachBlock( + [&](idx_t, idx_t block_id) { extra_metadata_block_pointers.emplace_back(block_id, 0); }); + } else { + D_ASSERT(has_metadata_blocks); + extra_metadata_block_pointers.reserve(extra_metadata_blocks.size()); + for (auto &block_pointer : extra_metadata_blocks) { + extra_metadata_block_pointers.emplace_back(block_pointer, 0); + } + } + return extra_metadata_block_pointers; +} + bool RowGroup::CanReuseMetadata(RowGroupWriter &writer) const { if (!Settings::Get(writer.GetDatabase())) { // disabled by configuration @@ -1183,10 +1273,6 @@ bool RowGroup::CanReuseMetadata(RowGroupWriter &writer) const { // no existing metadata on disk - cannot re-use return false; } - if (HasChanges()) { - // we have changes - need to rewrite - return false; - } auto &table_writer = writer.GetTableWriter(); if (table_writer.RequireLegacyStartRow() && table_writer.RowIdsChanged()) { // row-ids changed and we are targeting an old storage version that requires "start_row" - cannot re-use @@ -1195,29 +1281,120 @@ bool RowGroup::CanReuseMetadata(RowGroupWriter &writer) const { return true; } +bool RowGroup::HasUnchangedColumns() const { + for (idx_t c = 0; c < columns.size(); c++) { + if (!ColumnIsLoaded(c)) { + return true; + } + if (!columns[c]->HasAnyChanges()) { + return true; + } + } + return false; +} + RowGroupWriteData RowGroup::WriteToDisk(RowGroupWriter &writer) { - if (CanReuseMetadata(writer)) { - // we have existing metadata and the row group has not been changed - // re-use previous metadata + bool can_reuse_metadata = CanReuseMetadata(writer); + if (can_reuse_metadata && !HasChanges()) { RowGroupWriteData result; - result.reuse_existing_metadata_blocks = true; - result.existing_extra_metadata_blocks = GetOrComputeExtraMetadataBlocks(); - return result; + result.write_action = RowGroupWriteAction::REUSE_EXISTING_ROW_GROUP_METADATA; + if (GetCollection().SupportsPerColumnWrites()) { + if (has_per_column_metadata_blocks) { + return result; + } + per_column_metadata_blocks = ComputePerColumnMetadataBlocks(); + has_per_column_metadata_blocks = true; + return result; + } + + if (has_metadata_blocks) { + return result; + } + + if (!has_per_column_metadata_blocks) { + auto meta_blocks = ComputePerColumnMetadataBlocks(); + + vector computed_extra_metadata_blocks; + meta_blocks.ForEachBlock( + [&](idx_t, idx_t block_id) { computed_extra_metadata_blocks.emplace_back(block_id); }); + extra_metadata_blocks = computed_extra_metadata_blocks; + has_metadata_blocks = true; + return result; + } + + D_ASSERT(has_per_column_metadata_blocks && !GetCollection().SupportsPerColumnWrites()); + // we loaded column-level metadata from disk, but don't support writing it anymore, so we need to fall back to a + // full checkpoint as we need to write out the metadata in a single go } + + // determine which columns can be reused + bool partial_reuse = + can_reuse_metadata && has_per_column_metadata_blocks && GetCollection().SupportsPerColumnWrites(); auto &compression_types = writer.GetCompressionTypes(); - if (columns.size() != compression_types.size()) { - throw InternalException("RowGroup::WriteToDisk - mismatch in column count vs compression types"); + RowGroupWriteData result; + if (partial_reuse) { + result.write_action = RowGroupWriteAction::PARTIALLY_REUSE_COLUMN_METADATA; + } else { + result.write_action = RowGroupWriteAction::FULLY_CHECKPOINT_ROW_GROUP; + } + + auto result_row_group = make_shared_ptr(GetCollection(), this->count); + result_row_group->columns.resize(GetColumnCount()); + result_row_group->column_pointers.resize(GetColumnCount()); + result_row_group->deletes_pointers = deletes_pointers; + result_row_group->deletes_is_loaded = deletes_is_loaded.load(); + result_row_group->owned_version_info = owned_version_info; + result_row_group->version_info = version_info.load(); + if (is_loaded) { + result_row_group->is_loaded = unique_ptr[]>(new atomic[GetColumnCount()]); + for (idx_t c = 0; c < GetColumnCount(); c++) { + result_row_group->is_loaded[c] = true; + } } + + RowGroupWriteInfo info(writer.GetPartialBlockManager(), compression_types, writer.GetCheckpointOptions()); + for (idx_t column_idx = 0; column_idx < GetColumnCount(); column_idx++) { - auto &column = GetColumn(column_idx); - if (column.count != this->count) { - throw InternalException("Corrupted in-memory column - column with index %llu has misaligned count (row " - "group has %llu rows, column has %llu)", - column_idx, this->count.load(), column.count.load()); + bool column_has_changes = true; + if (partial_reuse) { + if (!ColumnIsLoaded(column_idx)) { + column_has_changes = false; + } else if (!columns[column_idx]->HasAnyChanges()) { + column_has_changes = false; + } + } + + if (!column_has_changes) { + // reuse this column's metadata + result.states.push_back(nullptr); + result_row_group->column_pointers[column_idx] = column_pointers[column_idx]; + // carry forward existing column data and statistics + if (!ColumnIsLoaded(column_idx)) { + result_row_group->columns[column_idx] = nullptr; + result_row_group->is_loaded[column_idx] = false; + // column not loaded - stats will be merged from previous table stats during Checkpoint + result.statistics.push_back(BaseStatistics::CreateEmpty(GetCollection().GetTypes()[column_idx])); + } else { + result_row_group->columns[column_idx] = columns[column_idx]; + auto col_stats = columns[column_idx]->GetStatistics(); + result.statistics.push_back(col_stats + ? std::move(*col_stats) + : BaseStatistics::CreateEmpty(GetCollection().GetTypes()[column_idx])); + } + } else { + // checkpoint this column + auto &column = GetColumn(column_idx); + if (column.count != this->count) { + throw InternalException("Corrupted in-memory column - column with index %llu has misaligned count " + "(row group has %llu rows, column has %llu)", + column_idx, this->count.load(), column.count.load()); + } + result_row_group->columns[column_idx] = CheckpointColumn(*this, column_idx, info, result); } } - RowGroupWriteInfo info(writer.GetPartialBlockManager(), compression_types, writer.GetCheckpointOptions()); - return WriteToDisk(info); + + result.result_row_group = std::move(result_row_group); + return result; } void IncrementSegmentStart(PersistentColumnData &data, idx_t start_increment) { @@ -1237,26 +1414,28 @@ RowGroupPointer RowGroup::Checkpoint(RowGroupWriteData write_data, RowGroupWrite // construct the row group pointer and write the column meta data to disk row_group_pointer.row_start = row_group_start; row_group_pointer.tuple_count = count; - if (write_data.reuse_existing_metadata_blocks) { + if (write_data.write_action == RowGroupWriteAction::REUSE_EXISTING_ROW_GROUP_METADATA) { // we are re-using the previous metadata row_group_pointer.data_pointers = column_pointers; - row_group_pointer.has_metadata_blocks = true; - row_group_pointer.extra_metadata_blocks = write_data.existing_extra_metadata_blocks; + row_group_pointer.has_metadata_blocks = has_metadata_blocks; + row_group_pointer.extra_metadata_blocks = extra_metadata_blocks; + row_group_pointer.has_per_column_metadata_blocks = has_per_column_metadata_blocks; + row_group_pointer.per_column_metadata_blocks = per_column_metadata_blocks; if (metadata_manager) { row_group_pointer.deletes_pointers = CheckpointDeletes(writer); - vector extra_metadata_block_pointers; - extra_metadata_block_pointers.reserve(write_data.existing_extra_metadata_blocks.size()); - for (auto &block_pointer : write_data.existing_extra_metadata_blocks) { - extra_metadata_block_pointers.emplace_back(block_pointer, 0); + vector metadata_block_pointers_to_be_cleared; + if (has_per_column_metadata_blocks) { + per_column_metadata_blocks.ForEachBlock( + [&](idx_t, idx_t block_id) { metadata_block_pointers_to_be_cleared.emplace_back(block_id, 0); }); + } else { + metadata_block_pointers_to_be_cleared.reserve(extra_metadata_blocks.size()); + for (auto &block_pointer : extra_metadata_blocks) { + metadata_block_pointers_to_be_cleared.emplace_back(block_pointer, 0); + } } metadata_manager->ClearModifiedBlocks(column_pointers); - metadata_manager->ClearModifiedBlocks(extra_metadata_block_pointers); - metadata_manager->ClearModifiedBlocks(deletes_pointers); - - // remember metadata_blocks to avoid loading them on future checkpoints - has_metadata_blocks = true; - extra_metadata_blocks = row_group_pointer.extra_metadata_blocks; + metadata_manager->ClearModifiedBlocks(metadata_block_pointers_to_be_cleared); } // merge row group stats into the global stats auto lock = global_stats.GetLock(); @@ -1271,27 +1450,70 @@ RowGroupPointer RowGroup::Checkpoint(RowGroupWriteData write_data, RowGroupWrite } return row_group_pointer; } - D_ASSERT(write_data.states.size() == columns.size()); + // write path: write column metadata to disk (with optional per-column reuse) + D_ASSERT(write_data.states.size() == GetColumnCount()); + vector reused_columns; + + // merge stats { auto lock = global_stats.GetLock(); for (idx_t column_idx = 0; column_idx < GetColumnCount(); column_idx++) { - global_stats.GetStats(*lock, column_idx).Statistics().Merge(write_data.statistics[column_idx]); + bool is_reused = !write_data.states[column_idx]; + if (is_reused) { + reused_columns.emplace_back(column_idx); + if (!ColumnIsLoaded(column_idx) && + collection.get().GetTypes()[column_idx].id() != LogicalTypeId::VARIANT) { + writer.SetHasUnloadedColumn(column_idx); + continue; + } + GetColumn(column_idx).MergeIntoStatistics(global_stats.GetStats(*lock, column_idx).Statistics()); + } else { + global_stats.GetStats(*lock, column_idx).Statistics().Merge(write_data.statistics[column_idx]); + } } } - vector column_metadata; - unordered_set metadata_blocks; - writer.StartWritingColumns(column_metadata); auto serialization_options = SerializationOptions(writer.GetAttachedDatabase()); - for (auto &state : write_data.states) { - // get the current position of the table data writer + // collect blocks that need to be preserved for reused columns + vector reused_column_blocks; + + vector> extra_blocks_for_columns; + if (!reused_columns.empty()) { + extra_blocks_for_columns = per_column_metadata_blocks.GetBlocksForColumns(reused_columns); + } + idx_t reused_column_idx = 0; + + for (idx_t column_idx = 0; column_idx < GetColumnCount(); column_idx++) { + bool is_reused = !write_data.states[column_idx]; + if (is_reused) { + // reuse existing column pointer and per-column blocks + auto col_ptr = column_pointers[column_idx]; + row_group_pointer.data_pointers.push_back(col_ptr); + auto &col_blocks = extra_blocks_for_columns[reused_column_idx]; + row_group_pointer.per_column_metadata_blocks.AddColumn(column_idx, col_blocks); + + // collect all blocks for this reused column for ClearModifiedBlocks + reused_column_blocks.push_back(col_ptr); + for (auto &block_id : col_blocks) { + reused_column_blocks.emplace_back(block_id, 0); + } + ++reused_column_idx; + continue; + } + // write new metadata for this column + auto &state = write_data.states[column_idx]; + D_ASSERT(state); + + // track blocks written for this column + vector col_written_blocks; + writer.StartWritingColumns(col_written_blocks); + auto &data_writer = writer.GetPayloadWriter(); auto pointer = writer.GetMetaBlockPointer(); // store the stats and the data pointers in the row group pointers row_group_pointer.data_pointers.push_back(pointer); - metadata_blocks.insert(pointer.block_pointer); // Write pointers to the column segments. // @@ -1306,27 +1528,40 @@ RowGroupPointer RowGroup::Checkpoint(RowGroupWriteData write_data, RowGroupWrite serializer.Begin(); persistent_data.Serialize(serializer); serializer.End(); - } - writer.FinishWritingColumns(); - row_group_pointer.has_metadata_blocks = true; - for (auto &column_pointer : column_metadata) { - auto entry = metadata_blocks.find(column_pointer.block_pointer); - if (entry != metadata_blocks.end()) { - // this metadata block is already stored in "data_pointers" - no need to duplicate it - continue; + writer.FinishWritingColumns(); + + // collect per-column extra blocks (excluding the start block) + vector col_extra_blocks; + for (auto &written_ptr : col_written_blocks) { + if (written_ptr.block_pointer != pointer.block_pointer) { + col_extra_blocks.push_back(written_ptr.block_pointer); + } } - // this metadata block is not stored - add it to the extra metadata blocks - row_group_pointer.extra_metadata_blocks.push_back(column_pointer.block_pointer); - metadata_blocks.insert(column_pointer.block_pointer); + row_group_pointer.per_column_metadata_blocks.AddColumn(column_idx, col_extra_blocks); + } + + if (GetCollection().SupportsPerColumnWrites()) { + row_group_pointer.has_per_column_metadata_blocks = true; // blocks already populated above + } else { + row_group_pointer.has_metadata_blocks = true; + row_group_pointer.per_column_metadata_blocks.ForEachBlock( + [&](idx_t, idx_t block_id) { row_group_pointer.extra_metadata_blocks.push_back(block_id); }); + row_group_pointer.per_column_metadata_blocks = {}; } + if (metadata_manager) { row_group_pointer.deletes_pointers = CheckpointDeletes(writer); + metadata_manager->ClearModifiedBlocks(reused_column_blocks); } - // set up the pointers correctly within this row group for future operations + + // cache metadata pointers for future checkpoint reuse column_pointers = row_group_pointer.data_pointers; - has_metadata_blocks = true; + has_metadata_blocks = row_group_pointer.has_metadata_blocks; extra_metadata_blocks = row_group_pointer.extra_metadata_blocks; + has_per_column_metadata_blocks = row_group_pointer.has_per_column_metadata_blocks; + per_column_metadata_blocks = row_group_pointer.per_column_metadata_blocks; + Verify(); return row_group_pointer; } @@ -1390,15 +1625,26 @@ vector RowGroup::CheckpointDeletes(RowGroupWriter &writer) { return vinfo->Checkpoint(writer); } -void RowGroup::Serialize(RowGroupPointer &pointer, Serializer &serializer) { +void RowGroup::Serialize(RowGroupPointer &pointer, Serializer &serializer, bool supports_per_column_writes) { serializer.WriteProperty(100, "row_start", pointer.row_start); serializer.WriteProperty(101, "tuple_count", pointer.tuple_count); serializer.WriteProperty(102, "data_pointers", pointer.data_pointers); serializer.WriteProperty(103, "delete_pointers", pointer.deletes_pointers); - if (serializer.ShouldSerialize(6)) { + if (serializer.ShouldSerialize(6) && !supports_per_column_writes) { serializer.WriteProperty(104, "has_metadata_blocks", pointer.has_metadata_blocks); serializer.WritePropertyWithDefault(105, "extra_metadata_blocks", pointer.extra_metadata_blocks); } + if (supports_per_column_writes) { + D_ASSERT(serializer.ShouldSerialize(6)); + // also write legacy metadata blocks for v1.4 and v1.5 + serializer.WriteProperty(104, "has_metadata_blocks", pointer.has_per_column_metadata_blocks); + vector extra_metadata_block_ids; + pointer.per_column_metadata_blocks.ForEachBlock( + [&](idx_t, idx_t block_id) { extra_metadata_block_ids.push_back(block_id); }); + serializer.WritePropertyWithDefault(105, "extra_metadata_blocks", extra_metadata_block_ids); + serializer.WriteProperty(106, "has_per_column_metadata_blocks", pointer.has_per_column_metadata_blocks); + serializer.WritePropertyWithDefault(107, "per_column_metadata_blocks", pointer.per_column_metadata_blocks.data); + } } RowGroupPointer RowGroup::Deserialize(Deserializer &deserializer) { @@ -1409,6 +1655,15 @@ RowGroupPointer RowGroup::Deserialize(Deserializer &deserializer) { result.deletes_pointers = deserializer.ReadProperty>(103, "delete_pointers"); result.has_metadata_blocks = deserializer.ReadPropertyWithExplicitDefault(104, "has_metadata_blocks", false); result.extra_metadata_blocks = deserializer.ReadPropertyWithDefault>(105, "extra_metadata_blocks"); + result.has_per_column_metadata_blocks = + deserializer.ReadPropertyWithExplicitDefault(106, "has_per_column_metadata_blocks", false); + result.per_column_metadata_blocks = { + deserializer.ReadPropertyWithDefault>(107, "per_column_metadata_blocks")}; + if (result.has_per_column_metadata_blocks) { + // per-column metadata supersedes legacy extra_metadata_blocks + result.has_metadata_blocks = false; + result.extra_metadata_blocks.clear(); + } return result; } @@ -1511,8 +1766,13 @@ idx_t RowGroup::Delete(TransactionData transaction, DataTable &table, row_t *ids void RowGroup::Verify() { #ifdef DEBUG - for (auto &column : GetColumns()) { - column->Verify(*this); + for (idx_t c = 0; c < columns.size(); c++) { + if (!ColumnIsLoaded(c)) { + continue; + } + if (columns[c]) { + columns[c]->Verify(*this); + } } lock_guard guard(row_group_lock); if (row_id_is_loaded) { diff --git a/src/duckdb/src/storage/table/row_group_collection.cpp b/src/duckdb/src/storage/table/row_group_collection.cpp index 99d06f048..37cb8495b 100644 --- a/src/duckdb/src/storage/table/row_group_collection.cpp +++ b/src/duckdb/src/storage/table/row_group_collection.cpp @@ -1584,7 +1584,7 @@ void RowGroupCollection::Checkpoint(TableDataWriter &writer, TableStatistics &gl break; } auto &write_state = checkpoint_state.write_data[segment_idx]; - if (!write_state.reuse_existing_metadata_blocks) { + if (write_state.write_action != RowGroupWriteAction::REUSE_EXISTING_ROW_GROUP_METADATA) { table_has_changes = true; break; } @@ -1597,14 +1597,10 @@ void RowGroupCollection::Checkpoint(TableDataWriter &writer, TableStatistics &gl for (idx_t segment_idx = 0; segment_idx < checkpoint_state.SegmentCount(); segment_idx++) { auto entry = checkpoint_state.GetSegment(segment_idx); auto &row_group = entry->GetNode(); - auto &write_state = checkpoint_state.write_data[segment_idx]; metadata_manager.ClearModifiedBlocks(row_group.GetColumnStartPointers()); - D_ASSERT(write_state.reuse_existing_metadata_blocks); - vector extra_metadata_block_pointers; - extra_metadata_block_pointers.reserve(write_state.existing_extra_metadata_blocks.size()); - for (auto &block_pointer : write_state.existing_extra_metadata_blocks) { - extra_metadata_block_pointers.emplace_back(block_pointer, 0); - } + D_ASSERT(checkpoint_state.write_data[segment_idx].write_action == + RowGroupWriteAction::REUSE_EXISTING_ROW_GROUP_METADATA); + vector extra_metadata_block_pointers = row_group.GetExtraMetadataBlockPointers(); metadata_manager.ClearModifiedBlocks(extra_metadata_block_pointers); auto row_group_writer = checkpoint_state.writer.GetRowGroupWriter(row_group); row_group.CheckpointDeletes(*row_group_writer); @@ -1649,7 +1645,21 @@ void RowGroupCollection::Checkpoint(TableDataWriter &writer, TableStatistics &gl } auto &row_group_write_data = checkpoint_state.write_data[segment_idx]; idx_t row_start = new_total_rows; - bool metadata_reuse = row_group_write_data.reuse_existing_metadata_blocks; + auto write_action = row_group_write_data.write_action; + auto debug_verify_blocks = Settings::Get(GetAttached().GetDatabase()) && + dynamic_cast(&checkpoint_state.writer) != nullptr; + vector reuse_column; + if (debug_verify_blocks) { + if (write_action == RowGroupWriteAction::REUSE_EXISTING_ROW_GROUP_METADATA) { + auto existing_column_count = entry->ReferenceNode()->GetColumnCount(); + reuse_column.resize(existing_column_count, true); + } else { + reuse_column.resize(row_group_write_data.states.size()); + for (idx_t column_idx = 0; column_idx < row_group_write_data.states.size(); column_idx++) { + reuse_column[column_idx] = !row_group_write_data.states[column_idx]; + } + } + } auto new_row_group = std::move(row_group_write_data.result_row_group); if (!new_row_group) { // row group was unchanged - emit previous row group @@ -1657,9 +1667,6 @@ void RowGroupCollection::Checkpoint(TableDataWriter &writer, TableStatistics &gl } auto &row_group = *new_row_group; RowGroupPointer pointer_copy; - auto debug_verify_blocks = Settings::Get(GetAttached().GetDatabase()) && - dynamic_cast(&checkpoint_state.writer) != nullptr; - // check if we should write this row group to the persistent storage // don't write it if it only has uncommitted transaction-local changes made AFTER this checkpoint was started auto pointer = @@ -1678,46 +1685,91 @@ void RowGroupCollection::Checkpoint(TableDataWriter &writer, TableStatistics &gl new_row_groups->AppendSegment(l, std::move(new_row_group)); if (debug_verify_blocks) { - if (!pointer_copy.has_metadata_blocks) { + if (!pointer_copy.has_metadata_blocks && !pointer_copy.has_per_column_metadata_blocks) { throw InternalException("Checkpointing should always remember metadata blocks"); } - if (metadata_reuse && pointer_copy.data_pointers != row_group.GetColumnStartPointers()) { - throw InternalException("Colum start pointers changed during metadata reuse"); + if (SupportsPerColumnWrites() != pointer_copy.has_per_column_metadata_blocks) { + throw InternalException( + "Checkpointing should always remember per-column metadata blocks when supporting it"); + } + if (write_action == RowGroupWriteAction::PARTIALLY_REUSE_COLUMN_METADATA && !SupportsPerColumnWrites()) { + throw InternalException("Partially reusing column metadata should only be done when supporting it"); + } + if (write_action == RowGroupWriteAction::REUSE_EXISTING_ROW_GROUP_METADATA && + pointer_copy.data_pointers != row_group.GetColumnStartPointers()) { + throw InternalException("Column start pointers changed during full metadata reuse"); } - // Capture blocks that have been written - vector all_written_blocks = pointer_copy.data_pointers; - vector all_metadata_blocks; - for (auto &block : pointer_copy.extra_metadata_blocks) { - all_written_blocks.emplace_back(block, 0); - all_metadata_blocks.emplace_back(block, 0); + // Verify per_column_metadata_blocks matches full deserialization + if (pointer_copy.has_per_column_metadata_blocks) { + const auto &column_start_ptrs = row_group.GetColumnStartPointers(); + auto &col_types = row_group.GetCollection().GetTypes(); + auto &mm = row_group.GetCollection().GetMetadataManager(); + vector columns; + vector> deserialized_extras; + deserialized_extras.reserve(column_start_ptrs.size()); + for (idx_t i = 0; i < column_start_ptrs.size(); i++) { + columns.emplace_back(i); + deserialized_extras.emplace_back(); + vector col_read_pointers; + MetadataReader reader(mm, column_start_ptrs[i], &col_read_pointers); + ColumnData::Deserialize(GetBlockManager(), GetTableInfo(), i, reader, col_types[i]); + // collect extra blocks from deserialization (excluding start block) + for (auto &ptr : col_read_pointers) { + if (ptr.block_pointer != column_start_ptrs[i].block_pointer) { + deserialized_extras[i].emplace_back(ptr.block_pointer); + } + } + } + auto blocks_for_columns = pointer_copy.per_column_metadata_blocks.GetBlocksForColumns(columns); + if (deserialized_extras != blocks_for_columns) { + throw InternalException("per_column_metadata_blocks mismatch"); + } } - // Verify that we can load the metadata correctly again - vector all_quick_read_blocks; - for (auto &ptr : row_group.GetColumnStartPointers()) { - all_quick_read_blocks.emplace_back(ptr); - if (metadata_reuse && !block_manager.GetMetadataManager().BlockHasBeenCleared(ptr)) { - throw InternalException("Found column start block that was not cleared"); + // Verify blocks are cleared for partial column reuse + for (idx_t col_idx = 0; col_idx < pointer_copy.data_pointers.size(); col_idx++) { + if (!reuse_column[col_idx]) { + continue; + } + // reused column: its start block should be cleared + if (!block_manager.GetMetadataManager().BlockHasBeenCleared(pointer_copy.data_pointers[col_idx])) { + throw InternalException("Partial reuse: column %llu start block was not cleared", col_idx); } } - auto extra_metadata_blocks = row_group.GetOrComputeExtraMetadataBlocks(/* force_compute: */ true); - for (auto &ptr : extra_metadata_blocks) { - auto block_pointer = MetaBlockPointer(ptr, 0); - all_quick_read_blocks.emplace_back(block_pointer); - if (metadata_reuse && !block_manager.GetMetadataManager().BlockHasBeenCleared(block_pointer)) { - throw InternalException("Found extra metadata block that was not cleared"); + pointer_copy.per_column_metadata_blocks.ForEachBlock([&](idx_t col_idx, idx_t block_id) { + if (!reuse_column[col_idx]) { + return; + } + // reused column: extra blocks should be cleared + auto block_ptr = MetaBlockPointer(block_id, 0); + if (!block_manager.GetMetadataManager().BlockHasBeenCleared(block_ptr)) { + throw InternalException("Partial reuse: column extra block %llu was not cleared", block_id); + } + }); + + // Capture blocks that have been written + vector all_written_blocks = pointer_copy.data_pointers; + if (pointer_copy.has_metadata_blocks) { + for (auto &block : pointer_copy.extra_metadata_blocks) { + all_written_blocks.emplace_back(block, 0); + } + } else { + if (!pointer_copy.has_per_column_metadata_blocks) { + throw InternalException("Checkpointing should always remember metadata blocks"); } + pointer_copy.per_column_metadata_blocks.ForEachBlock( + [&](idx_t, idx_t block) { all_written_blocks.emplace_back(block, 0); }); } - // Deserialize all columns to check if the quick read via GetOrComputeExtraMetadataBlocks was correct + // Deserialize all columns to check if what's on disk matches our metadata vector all_full_read_blocks; - auto column_start_pointers = row_group.GetColumnStartPointers(); - auto &types = row_group.GetCollection().GetTypes(); + const auto &column_start_pointers = row_group.GetColumnStartPointers(); + auto &column_types = row_group.GetCollection().GetTypes(); auto &metadata_manager = row_group.GetCollection().GetMetadataManager(); for (idx_t i = 0; i < column_start_pointers.size(); i++) { MetadataReader reader(metadata_manager, column_start_pointers[i], &all_full_read_blocks); - ColumnData::Deserialize(GetBlockManager(), GetTableInfo(), i, reader, types[i]); + ColumnData::Deserialize(GetBlockManager(), GetTableInfo(), i, reader, column_types[i]); } // Derive sets of blocks to compare @@ -1725,25 +1777,16 @@ void RowGroupCollection::Checkpoint(TableDataWriter &writer, TableStatistics &gl for (auto &ptr : all_written_blocks) { all_written_block_ids.insert(ptr.block_pointer); } - set all_quick_read_block_ids; - for (auto &ptr : all_quick_read_blocks) { - all_quick_read_block_ids.insert(ptr.block_pointer); - } set all_full_read_block_ids; for (auto &ptr : all_full_read_blocks) { all_full_read_block_ids.insert(ptr.block_pointer); } - if (all_written_block_ids != all_quick_read_block_ids || - all_quick_read_block_ids != all_full_read_block_ids) { + if (all_written_block_ids != all_full_read_block_ids) { std::stringstream oss; oss << "\nWritten: "; for (auto &block : all_written_blocks) { oss << block << ", "; } - oss << "\nQuick read: "; - for (auto &block : all_quick_read_blocks) { - oss << block << ", "; - } oss << "\nFull read: "; for (auto &block : all_full_read_blocks) { oss << block << ", "; @@ -1762,6 +1805,11 @@ void RowGroupCollection::Checkpoint(TableDataWriter &writer, TableStatistics &gl set all_written_deletes_block_ids; for (auto &ptr : pointer_copy.deletes_pointers) { all_written_deletes_block_ids.insert(ptr.block_pointer); + // delete ptr should be cleared (unless it's been newly written) + if (block_manager.GetMetadataManager().BlockIsModified(ptr) && + !block_manager.GetMetadataManager().BlockHasBeenCleared(ptr)) { + throw InternalException("Delete ptr %llu was not cleared", ptr.block_pointer); + } } set all_read_deletes_block_ids; for (auto &ptr : read_deletes_pointers) { @@ -1880,7 +1928,7 @@ vector RowGroupCollection::GetPartitionStats() const { //===--------------------------------------------------------------------===// // GetColumnSegmentInfo //===--------------------------------------------------------------------===// -vector RowGroupCollection::GetColumnSegmentInfo(const QueryContext &context) { +vector RowGroupCollection::GetColumnSegmentInfo(const QueryContext &context) const { vector result; auto row_groups = GetRowGroups(); auto lock = row_groups->Lock(); @@ -1891,6 +1939,14 @@ vector RowGroupCollection::GetColumnSegmentInfo(const QueryCo return result; } +bool RowGroupCollection::SupportsPerColumnWrites() { + auto version = SerializationCompatibility::FromDatabase(GetAttached()); + if (version.serialization_version >= SerializationCompatibility::FromString("v1.4.0").serialization_version) { + return Settings::Get(GetAttached().GetDatabase()); + } + return false; +} + //===--------------------------------------------------------------------===// // Alter //===--------------------------------------------------------------------===// diff --git a/src/duckdb/src/storage/table/row_group_reorderer.cpp b/src/duckdb/src/storage/table/row_group_reorderer.cpp index 00c2bf3d2..642b8571f 100644 --- a/src/duckdb/src/storage/table/row_group_reorderer.cpp +++ b/src/duckdb/src/storage/table/row_group_reorderer.cpp @@ -24,7 +24,7 @@ bool CompareValues(const Value &v1, const Value &v2, const OrderByStatistics ord idx_t GetQualifyingTupleCount(RowGroup &row_group, BaseStatistics &stats, const OrderByColumnType type) { if (!stats.CanHaveNull()) { - return row_group.count; + return row_group.GetCommittedRowCount(); } if (type == OrderByColumnType::NUMERIC) { diff --git a/src/duckdb/src/storage/temporary_file_manager.cpp b/src/duckdb/src/storage/temporary_file_manager.cpp index 8a5bcde94..a7440ec76 100644 --- a/src/duckdb/src/storage/temporary_file_manager.cpp +++ b/src/duckdb/src/storage/temporary_file_manager.cpp @@ -142,6 +142,10 @@ idx_t BlockIndexManager::GetMaxIndex() const { return max_index; } +idx_t BlockIndexManager::GetUsedBlockCount() const { + return indexes_in_use.size(); +} + bool BlockIndexManager::HasFreeBlocks() const { return !free_indexes.empty(); } @@ -318,7 +322,7 @@ TemporaryFileInformation TemporaryFileHandle::GetTemporaryFile() { TemporaryFileLock lock(file_lock); TemporaryFileInformation info; info.path = path; - info.size = GetPositionInFile(index_manager.GetMaxIndex()); + info.size = GetPositionInFile(index_manager.GetUsedBlockCount()); return info; } diff --git a/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp b/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp index f77cc507f..ea3403fca 100644 --- a/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +++ b/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp @@ -348,17 +348,17 @@ #include "extension/icu/third_party/icu/i18n/wintzimpl.cpp" -#include "extension/icu/third_party/icu/i18n/double-conversion-string-to-double.cpp" - #include "extension/icu/third_party/icu/i18n/double-conversion-cached-powers.cpp" #include "extension/icu/third_party/icu/i18n/double-conversion-fast-dtoa.cpp" -#include "extension/icu/third_party/icu/i18n/double-conversion-bignum.cpp" - #include "extension/icu/third_party/icu/i18n/double-conversion-double-to-string.cpp" +#include "extension/icu/third_party/icu/i18n/double-conversion-bignum-dtoa.cpp" + +#include "extension/icu/third_party/icu/i18n/double-conversion-bignum.cpp" + #include "extension/icu/third_party/icu/i18n/double-conversion-strtod.cpp" -#include "extension/icu/third_party/icu/i18n/double-conversion-bignum-dtoa.cpp" +#include "extension/icu/third_party/icu/i18n/double-conversion-string-to-double.cpp" diff --git a/src/duckdb/ub_src_storage_table.cpp b/src/duckdb/ub_src_storage_table.cpp index e06c2d1cc..4c6330c7b 100644 --- a/src/duckdb/ub_src_storage_table.cpp +++ b/src/duckdb/ub_src_storage_table.cpp @@ -18,6 +18,8 @@ #include "src/storage/table/update_segment.cpp" +#include "src/storage/table/per_column_metadata_blocks.cpp" + #include "src/storage/table/persistent_table_data.cpp" #include "src/storage/table/row_id_column_data.cpp"