Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 42 additions & 42 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -353,52 +353,52 @@ set(DUCKDB_SRC_FILES
src/duckdb/third_party/zstd/dict/divsufsort.cpp
src/duckdb/third_party/zstd/dict/fastcover.cpp
src/duckdb/third_party/zstd/dict/zdict.cpp
src/duckdb/extension/core_functions/function_list.cpp
src/duckdb/extension/core_functions/lambda_functions.cpp
src/duckdb/extension/core_functions/core_functions_extension.cpp
src/duckdb/ub_extension_core_functions_scalar_blob.cpp
src/duckdb/extension/core_functions/lambda_functions.cpp
src/duckdb/extension/core_functions/function_list.cpp
src/duckdb/ub_extension_core_functions_aggregate_holistic.cpp
src/duckdb/ub_extension_core_functions_aggregate_nested.cpp
src/duckdb/ub_extension_core_functions_aggregate_distributive.cpp
src/duckdb/ub_extension_core_functions_aggregate_algebraic.cpp
src/duckdb/ub_extension_core_functions_aggregate_regression.cpp
src/duckdb/extension/core_functions/scalar/bit/bitstring.cpp
src/duckdb/ub_extension_core_functions_scalar_generic.cpp
src/duckdb/extension/core_functions/scalar/enum/enum_functions.cpp
src/duckdb/ub_extension_core_functions_scalar_date.cpp
src/duckdb/ub_extension_core_functions_scalar_debug.cpp
src/duckdb/extension/core_functions/scalar/math/numeric.cpp
src/duckdb/ub_extension_core_functions_scalar_array.cpp
src/duckdb/ub_extension_core_functions_scalar_struct.cpp
src/duckdb/ub_extension_core_functions_scalar_list.cpp
src/duckdb/ub_extension_core_functions_scalar_union.cpp
src/duckdb/extension/core_functions/scalar/bit/bitstring.cpp
src/duckdb/ub_extension_core_functions_scalar_debug.cpp
src/duckdb/ub_extension_core_functions_scalar_blob.cpp
src/duckdb/ub_extension_core_functions_scalar_list.cpp
src/duckdb/ub_extension_core_functions_scalar_map.cpp
src/duckdb/ub_extension_core_functions_scalar_generic.cpp
src/duckdb/ub_extension_core_functions_scalar_date.cpp
src/duckdb/extension/core_functions/scalar/operators/bitwise.cpp
src/duckdb/ub_extension_core_functions_scalar_random.cpp
src/duckdb/extension/core_functions/scalar/math/numeric.cpp
src/duckdb/ub_extension_core_functions_scalar_string.cpp
src/duckdb/ub_extension_core_functions_aggregate_distributive.cpp
src/duckdb/ub_extension_core_functions_aggregate_nested.cpp
src/duckdb/ub_extension_core_functions_aggregate_holistic.cpp
src/duckdb/ub_extension_core_functions_aggregate_regression.cpp
src/duckdb/ub_extension_core_functions_aggregate_algebraic.cpp
src/duckdb/ub_extension_core_functions_scalar_struct.cpp
src/duckdb/extension/parquet/parquet_reader.cpp
src/duckdb/extension/parquet/parquet_geometry.cpp
src/duckdb/extension/parquet/parquet_extension.cpp
src/duckdb/extension/parquet/parquet_timestamp.cpp
src/duckdb/extension/parquet/serialize_parquet.cpp
src/duckdb/extension/parquet/parquet_statistics.cpp
src/duckdb/extension/parquet/zstd_file_system.cpp
src/duckdb/extension/parquet/parquet_file_metadata_cache.cpp
src/duckdb/extension/parquet/parquet_multi_file_info.cpp
src/duckdb/extension/parquet/parquet_shredding.cpp
src/duckdb/extension/parquet/parquet_multi_file_info.cpp
src/duckdb/extension/parquet/parquet_metadata.cpp
src/duckdb/extension/parquet/parquet_timestamp.cpp
src/duckdb/extension/parquet/parquet_crypto.cpp
src/duckdb/extension/parquet/serialize_parquet.cpp
src/duckdb/extension/parquet/parquet_column_schema.cpp
src/duckdb/extension/parquet/zstd_file_system.cpp
src/duckdb/extension/parquet/parquet_extension.cpp
src/duckdb/extension/parquet/parquet_field_id.cpp
src/duckdb/extension/parquet/column_writer.cpp
src/duckdb/extension/parquet/column_reader.cpp
src/duckdb/extension/parquet/parquet_float16.cpp
src/duckdb/extension/parquet/parquet_writer.cpp
src/duckdb/extension/parquet/column_writer.cpp
src/duckdb/extension/parquet/parquet_crypto.cpp
src/duckdb/extension/parquet/parquet_metadata.cpp
src/duckdb/ub_extension_parquet_writer.cpp
src/duckdb/ub_extension_parquet_writer_variant.cpp
src/duckdb/extension/parquet/parquet_float16.cpp
src/duckdb/extension/parquet/parquet_geometry.cpp
src/duckdb/ub_extension_parquet_reader.cpp
src/duckdb/ub_extension_parquet_reader_variant.cpp
src/duckdb/ub_extension_parquet_decoder.cpp
src/duckdb/ub_extension_parquet_writer.cpp
src/duckdb/ub_extension_parquet_writer_variant.cpp
src/duckdb/third_party/parquet/parquet_types.cpp
src/duckdb/third_party/thrift/thrift/protocol/TProtocol.cpp
src/duckdb/third_party/thrift/thrift/transport/TTransportException.cpp
Expand Down Expand Up @@ -437,32 +437,32 @@ set(DUCKDB_SRC_FILES
src/duckdb/third_party/brotli/enc/metablock.cpp
src/duckdb/third_party/brotli/enc/static_dict.cpp
src/duckdb/third_party/brotli/enc/utf8_util.cpp
src/duckdb/extension/icu/./icu-current.cpp
src/duckdb/extension/icu/./icu-timezone.cpp
src/duckdb/extension/icu/./icu-datetrunc.cpp
src/duckdb/extension/icu/./icu-dateadd.cpp
src/duckdb/extension/icu/./icu-list-range.cpp
src/duckdb/extension/icu/./icu-timebucket.cpp
src/duckdb/extension/icu/./icu-datepart.cpp
src/duckdb/extension/icu/./icu-datetrunc.cpp
src/duckdb/extension/icu/./icu-current.cpp
src/duckdb/extension/icu/./icu_extension.cpp
src/duckdb/extension/icu/./icu-table-range.cpp
src/duckdb/extension/icu/./icu-timebucket.cpp
src/duckdb/extension/icu/./icu-datesub.cpp
src/duckdb/extension/icu/./icu-makedate.cpp
src/duckdb/extension/icu/./icu-datefunc.cpp
src/duckdb/extension/icu/./icu_extension.cpp
src/duckdb/extension/icu/./icu-makedate.cpp
src/duckdb/extension/icu/./icu-dateadd.cpp
src/duckdb/extension/icu/./icu-timezone.cpp
src/duckdb/extension/icu/./icu-strptime.cpp
src/duckdb/extension/icu/./icu-datesub.cpp
src/duckdb/extension/icu/./icu-list-range.cpp
src/duckdb/ub_extension_icu_third_party_icu_common.cpp
src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp
src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp
src/duckdb/extension/json/json_reader.cpp
src/duckdb/extension/json/json_functions.cpp
src/duckdb/extension/json/json_deserializer.cpp
src/duckdb/extension/json/json_serializer.cpp
src/duckdb/extension/json/json_scan.cpp
src/duckdb/extension/json/serialize_json.cpp
src/duckdb/extension/json/json_extension.cpp
src/duckdb/extension/json/json_enums.cpp
src/duckdb/extension/json/json_scan.cpp
src/duckdb/extension/json/json_common.cpp
src/duckdb/extension/json/json_functions.cpp
src/duckdb/extension/json/json_extension.cpp
src/duckdb/extension/json/json_multi_file_info.cpp
src/duckdb/extension/json/json_deserializer.cpp
src/duckdb/extension/json/json_reader.cpp
src/duckdb/extension/json/json_common.cpp
src/duckdb/ub_extension_json_json_functions.cpp
src/duckdb/generated_extension_loader_package_build.cpp)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,19 @@ void IsHistogramOtherBinFunction(DataChunk &args, ExpressionState &state, Vector
auto v = OtherBucketValue(input_type);
Vector ref(v);
VectorOperations::NotDistinctFrom(args.data[0], ref, result, args.size());

// Set NULL if input is NULL.
UnifiedVectorFormat input_data;
args.data[0].ToUnifiedFormat(args.size(), input_data);
if (!input_data.validity.AllValid()) {
auto &result_validity = FlatVector::Validity(result);
for (idx_t idx = 0; idx < args.size(); ++idx) {
auto input_idx = input_data.sel->get_index(idx);
if (!input_data.validity.RowIsValid(input_idx)) {
result_validity.SetInvalid(idx);
}
}
}
}

template <class OP, class T>
Expand Down
4 changes: 4 additions & 0 deletions src/duckdb/src/common/types/row/tuple_data_collection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ TupleDataCollection::~TupleDataCollection() {

void TupleDataCollection::Initialize() {
D_ASSERT(!layout.GetTypes().empty());
if (TuplesPerBlock() == 0) {
throw NotImplementedException("Too many columns: tuple width exceeds block size of %llu",
allocator->GetBufferManager().GetBlockSize());
}
this->count = 0;
this->data_size = 0;
if (layout.IsSortKeyLayout()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1510,24 +1510,27 @@ void StringValueScanner::ProcessOverBufferValue() {
} else {
value = string_t(over_buffer_string.c_str(), UnsafeNumericCast<uint32_t>(over_buffer_string.size()));
if (result.escaped) {
if (result.parse_chunk.data[result.chunk_col_id].GetType() != LogicalType::VARCHAR) {
// We cant have escapes on non varchar columns
result.current_errors.Insert(CAST_ERROR, result.cur_col_id, result.chunk_col_id,
result.last_position);
if (!result.state_machine.options.IgnoreErrors()) {
// We have to write the cast error message.
std::ostringstream error;
// Casting Error Message
error << "Could not convert string \""
<< std::string(over_buffer_string.c_str(), over_buffer_string.size()) << "\" to \'"
<< LogicalTypeIdToString(result.parse_types[result.chunk_col_id].type_id) << "\'";
auto error_string = error.str();
FullLinePosition::SanitizeError(error_string);
result.current_errors.ModifyErrorMessageOfLastError(error_string);
if (result.cur_col_id >= result.number_of_columns &&
!result.state_machine.state_machine_options.strict_mode.GetValue()) {
result.used_unstrictness = true;
} else if (!result.HandleTooManyColumnsError(over_buffer_string.c_str(), over_buffer_string.size())) {
if (result.parse_chunk.data[result.chunk_col_id].GetType() != LogicalType::VARCHAR) {
// We cant have escapes on non varchar columns
result.current_errors.Insert(CAST_ERROR, result.cur_col_id, result.chunk_col_id,
result.last_position);
if (!result.state_machine.options.IgnoreErrors()) {
// We have to write the cast error message.
std::ostringstream error;
// Casting Error Message
error << "Could not convert string \""
<< std::string(over_buffer_string.c_str(), over_buffer_string.size()) << "\" to \'"
<< LogicalTypeIdToString(result.parse_types[result.chunk_col_id].type_id) << "\'";
auto error_string = error.str();
FullLinePosition::SanitizeError(error_string);
result.current_errors.ModifyErrorMessageOfLastError(error_string);
}
return;
}
return;
}
if (!result.HandleTooManyColumnsError(over_buffer_string.c_str(), over_buffer_string.size())) {
value =
RemoveEscape(over_buffer_string.c_str(), over_buffer_string.size(),
state_machine->dialect_options.state_machine_options.escape.GetValue(),
Expand Down
17 changes: 9 additions & 8 deletions src/duckdb/src/execution/operator/join/physical_hash_join.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -418,13 +418,10 @@ static bool FinalizeSingleThreaded(const HashJoinGlobalSinkState &sink, const bo
}

static idx_t GetTupleWidth(const vector<LogicalType> &types, bool &all_constant) {
idx_t tuple_width = 0;
all_constant = true;
for (auto &type : types) {
tuple_width += GetTypeIdSize(type.InternalType());
all_constant &= TypeIsConstantSize(type.InternalType());
}
return tuple_width + AlignValue(types.size()) / 8 + GetTypeIdSize(PhysicalType::UINT64);
TupleDataLayout layout;
layout.Initialize(types, TupleDataValidityType::CAN_HAVE_NULL_VALUES);
all_constant = layout.AllConstant();
return layout.GetRowWidth();
}

static idx_t GetPartitioningSpaceRequirement(ClientContext &context, const vector<LogicalType> &types,
Expand All @@ -433,7 +430,11 @@ static idx_t GetPartitioningSpaceRequirement(ClientContext &context, const vecto
bool all_constant;
idx_t tuple_width = GetTupleWidth(types, all_constant);

auto tuples_per_block = buffer_manager.GetBlockSize() / tuple_width + 1;
if (tuple_width == 0) {
throw InternalException("GetPartitioningSpaceRequirement: tuple width should not be 0");
}

auto tuples_per_block = MaxValue<idx_t>(buffer_manager.GetBlockSize() / tuple_width, 1);
auto blocks_per_chunk = (STANDARD_VECTOR_SIZE + tuples_per_block) / tuples_per_block + 1;
if (!all_constant) {
blocks_per_chunk += 2;
Expand Down
22 changes: 20 additions & 2 deletions src/duckdb/src/function/table/table_scan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "duckdb/function/table_function.hpp"
#include "duckdb/main/attached_database.hpp"
#include "duckdb/main/client_config.hpp"
#include "duckdb/main/database.hpp"
#include "duckdb/planner/expression.hpp"
#include "duckdb/planner/expression/bound_columnref_expression.hpp"
#include "duckdb/planner/operator/logical_get.hpp"
Expand All @@ -29,6 +30,7 @@
#include "duckdb/planner/filter/conjunction_filter.hpp"
#include "duckdb/common/types/value_map.hpp"
#include "duckdb/main/settings.hpp"
#include "duckdb/transaction/duck_transaction_manager.hpp"

namespace duckdb {

Expand Down Expand Up @@ -115,6 +117,9 @@ class DuckIndexScanState : public TableScanGlobalState {
bool started_last_phase;
//! Synchronize changes to the global index scan state.
mutex index_scan_lock;
//! Synchronize <ART version, SegmentTree<RowGroup>> when vacuum_rebuild_indexes is enabled (since
//! ART indexes are rebuilt during vacuuming with this setting).
unique_ptr<StorageLockKey> vacuum_lock;

public:
unique_ptr<LocalTableFunctionState> InitLocalState(ExecutionContext &context,
Expand Down Expand Up @@ -407,8 +412,10 @@ unique_ptr<GlobalTableFunctionState> DuckTableScanInitGlobal(ClientContext &cont
}

unique_ptr<GlobalTableFunctionState> DuckIndexScanInitGlobal(ClientContext &context, TableFunctionInitInput &input,
const TableScanBindData &bind_data, set<row_t> &row_ids) {
const TableScanBindData &bind_data, set<row_t> &row_ids,
unique_ptr<StorageLockKey> vacuum_lock) {
auto g_state = make_uniq<DuckIndexScanState>(context, input.bind_data.get());
g_state->vacuum_lock = std::move(vacuum_lock);
g_state->finished_first_phase = row_ids.empty() ? true : false;
g_state->started_last_phase = false;

Expand Down Expand Up @@ -693,6 +700,17 @@ unique_ptr<GlobalTableFunctionState> TableScanInitGlobal(ClientContext &context,
bool index_scan = false;
set<row_t> row_ids;

// If vacuum_rebuild_indexes is enabled, grab a shared vacuum lock before
// scanning the index. This prevents the checkpoint from rebuilding the index and swapping
// row groups while we hold row IDs from the ART, ensuring we always see a consistent
// <ART index, SegmentTree<RowGroup> pairing.
unique_ptr<StorageLockKey> vacuum_lock;
auto &db = DatabaseInstance::GetDatabase(context);
if (Settings::Get<VacuumRebuildIndexesSetting>(db) > 0) {
auto &transaction_manager = DuckTransactionManager::Get(storage.GetAttached());
vacuum_lock = transaction_manager.SharedVacuumLock();
}

info->BindIndexes(context, ART::TYPE_NAME);
for (auto &entry : indexes.IndexEntries()) {
auto &index = *entry.index;
Expand All @@ -711,7 +729,7 @@ unique_ptr<GlobalTableFunctionState> TableScanInitGlobal(ClientContext &context,
if (!index_scan) {
return DuckTableScanInitGlobal(context, input, storage, bind_data);
}
return DuckIndexScanInitGlobal(context, input, bind_data, row_ids);
return DuckIndexScanInitGlobal(context, input, bind_data, row_ids, std::move(vacuum_lock));
}

static unique_ptr<BaseStatistics> TableScanStatistics(ClientContext &context, TableFunctionGetStatisticsInput &input) {
Expand Down
6 changes: 3 additions & 3 deletions src/duckdb/src/function/table/version/pragma_version.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#ifndef DUCKDB_PATCH_VERSION
#define DUCKDB_PATCH_VERSION "2-dev419"
#define DUCKDB_PATCH_VERSION "2-dev458"
#endif
#ifndef DUCKDB_MINOR_VERSION
#define DUCKDB_MINOR_VERSION 5
Expand All @@ -8,10 +8,10 @@
#define DUCKDB_MAJOR_VERSION 1
#endif
#ifndef DUCKDB_VERSION
#define DUCKDB_VERSION "v1.5.2-dev419"
#define DUCKDB_VERSION "v1.5.2-dev458"
#endif
#ifndef DUCKDB_SOURCE_ID
#define DUCKDB_SOURCE_ID "c241da23ae"
#define DUCKDB_SOURCE_ID "17491eb887"
#endif
#include "duckdb/function/table/system_functions.hpp"
#include "duckdb/main/database.hpp"
Expand Down
2 changes: 2 additions & 0 deletions src/duckdb/src/include/duckdb/execution/index/bound_index.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,9 @@ class BoundIndex : public Index {

//! Deletes all data from the index. The lock obtained from InitializeLock must be held
virtual void CommitDrop(IndexLock &index_lock) = 0;

//! Deletes all data from the index
// FIXME: we can rename this to ResetStorage().
void CommitDrop() override;
//! Delete a chunk of entries from the index. The lock obtained from InitializeLock must be held.
//! Returns the amount of rows successfully deleted from the index.
Expand Down
Loading
Loading