Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 40 additions & 40 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -348,47 +348,47 @@ set(DUCKDB_SRC_FILES
src/duckdb/third_party/zstd/dict/fastcover.cpp
src/duckdb/third_party/zstd/dict/zdict.cpp
src/duckdb/extension/core_functions/core_functions_extension.cpp
src/duckdb/extension/core_functions/function_list.cpp
src/duckdb/extension/core_functions/lambda_functions.cpp
src/duckdb/ub_extension_core_functions_aggregate_nested.cpp
src/duckdb/ub_extension_core_functions_aggregate_distributive.cpp
src/duckdb/ub_extension_core_functions_aggregate_algebraic.cpp
src/duckdb/ub_extension_core_functions_aggregate_regression.cpp
src/duckdb/ub_extension_core_functions_aggregate_holistic.cpp
src/duckdb/extension/core_functions/function_list.cpp
src/duckdb/ub_extension_core_functions_scalar_operators.cpp
src/duckdb/ub_extension_core_functions_scalar_date.cpp
src/duckdb/ub_extension_core_functions_scalar_math.cpp
src/duckdb/ub_extension_core_functions_scalar_generic.cpp
src/duckdb/ub_extension_core_functions_scalar_bit.cpp
src/duckdb/ub_extension_core_functions_scalar_string.cpp
src/duckdb/ub_extension_core_functions_scalar_debug.cpp
src/duckdb/ub_extension_core_functions_scalar_map.cpp
src/duckdb/ub_extension_core_functions_scalar_struct.cpp
src/duckdb/ub_extension_core_functions_scalar_operators.cpp
src/duckdb/ub_extension_core_functions_scalar_union.cpp
src/duckdb/ub_extension_core_functions_scalar_array.cpp
src/duckdb/ub_extension_core_functions_scalar_blob.cpp
src/duckdb/ub_extension_core_functions_scalar_enum.cpp
src/duckdb/ub_extension_core_functions_scalar_random.cpp
src/duckdb/ub_extension_core_functions_scalar_date.cpp
src/duckdb/ub_extension_core_functions_scalar_bit.cpp
src/duckdb/ub_extension_core_functions_scalar_string.cpp
src/duckdb/ub_extension_core_functions_scalar_array.cpp
src/duckdb/ub_extension_core_functions_scalar_list.cpp
src/duckdb/ub_extension_core_functions_scalar_math.cpp
src/duckdb/ub_extension_core_functions_scalar_union.cpp
src/duckdb/ub_extension_core_functions_scalar_struct.cpp
src/duckdb/ub_extension_core_functions_scalar_map.cpp
src/duckdb/ub_extension_core_functions_scalar_enum.cpp
src/duckdb/ub_extension_core_functions_aggregate_holistic.cpp
src/duckdb/ub_extension_core_functions_aggregate_distributive.cpp
src/duckdb/ub_extension_core_functions_aggregate_algebraic.cpp
src/duckdb/ub_extension_core_functions_aggregate_regression.cpp
src/duckdb/ub_extension_core_functions_aggregate_nested.cpp
src/duckdb/extension/parquet/parquet_file_metadata_cache.cpp
src/duckdb/extension/parquet/parquet_writer.cpp
src/duckdb/extension/parquet/parquet_multi_file_info.cpp
src/duckdb/extension/parquet/parquet_metadata.cpp
src/duckdb/extension/parquet/parquet_crypto.cpp
src/duckdb/extension/parquet/parquet_timestamp.cpp
src/duckdb/extension/parquet/geo_parquet.cpp
src/duckdb/extension/parquet/parquet_extension.cpp
src/duckdb/extension/parquet/column_writer.cpp
src/duckdb/extension/parquet/serialize_parquet.cpp
src/duckdb/extension/parquet/parquet_metadata.cpp
src/duckdb/extension/parquet/parquet_extension.cpp
src/duckdb/extension/parquet/parquet_reader.cpp
src/duckdb/extension/parquet/zstd_file_system.cpp
src/duckdb/extension/parquet/parquet_writer.cpp
src/duckdb/extension/parquet/column_reader.cpp
src/duckdb/extension/parquet/column_writer.cpp
src/duckdb/extension/parquet/parquet_crypto.cpp
src/duckdb/extension/parquet/parquet_float16.cpp
src/duckdb/extension/parquet/zstd_file_system.cpp
src/duckdb/extension/parquet/parquet_timestamp.cpp
src/duckdb/extension/parquet/parquet_statistics.cpp
src/duckdb/ub_extension_parquet_writer.cpp
src/duckdb/ub_extension_parquet_decoder.cpp
src/duckdb/extension/parquet/parquet_multi_file_info.cpp
src/duckdb/ub_extension_parquet_reader.cpp
src/duckdb/ub_extension_parquet_reader_variant.cpp
src/duckdb/ub_extension_parquet_writer.cpp
src/duckdb/ub_extension_parquet_decoder.cpp
src/duckdb/third_party/parquet/parquet_types.cpp
src/duckdb/third_party/thrift/thrift/protocol/TProtocol.cpp
src/duckdb/third_party/thrift/thrift/transport/TTransportException.cpp
Expand Down Expand Up @@ -427,32 +427,32 @@ set(DUCKDB_SRC_FILES
src/duckdb/third_party/brotli/enc/metablock.cpp
src/duckdb/third_party/brotli/enc/static_dict.cpp
src/duckdb/third_party/brotli/enc/utf8_util.cpp
src/duckdb/extension/icu/./icu_extension.cpp
src/duckdb/extension/icu/./icu-table-range.cpp
src/duckdb/extension/icu/./icu-list-range.cpp
src/duckdb/extension/icu/./icu-datepart.cpp
src/duckdb/extension/icu/./icu-dateadd.cpp
src/duckdb/extension/icu/./icu-strptime.cpp
src/duckdb/extension/icu/./icu-datetrunc.cpp
src/duckdb/extension/icu/./icu-current.cpp
src/duckdb/extension/icu/./icu-timezone.cpp
src/duckdb/extension/icu/./icu-datesub.cpp
src/duckdb/extension/icu/./icu-timebucket.cpp
src/duckdb/extension/icu/./icu-makedate.cpp
src/duckdb/extension/icu/./icu-dateadd.cpp
src/duckdb/extension/icu/./icu-datepart.cpp
src/duckdb/extension/icu/./icu-datetrunc.cpp
src/duckdb/extension/icu/./icu-datefunc.cpp
src/duckdb/extension/icu/./icu-list-range.cpp
src/duckdb/extension/icu/./icu-timebucket.cpp
src/duckdb/extension/icu/./icu-timezone.cpp
src/duckdb/extension/icu/./icu-current.cpp
src/duckdb/extension/icu/./icu_extension.cpp
src/duckdb/ub_extension_icu_third_party_icu_common.cpp
src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp
src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp
src/duckdb/extension/json/json_extension.cpp
src/duckdb/extension/json/json_deserializer.cpp
src/duckdb/extension/json/serialize_json.cpp
src/duckdb/extension/json/json_scan.cpp
src/duckdb/extension/json/json_reader.cpp
src/duckdb/extension/json/json_serializer.cpp
src/duckdb/extension/json/json_multi_file_info.cpp
src/duckdb/extension/json/json_common.cpp
src/duckdb/extension/json/json_enums.cpp
src/duckdb/extension/json/json_serializer.cpp
src/duckdb/extension/json/json_extension.cpp
src/duckdb/extension/json/json_reader.cpp
src/duckdb/extension/json/json_common.cpp
src/duckdb/extension/json/json_scan.cpp
src/duckdb/extension/json/json_functions.cpp
src/duckdb/extension/json/json_deserializer.cpp
src/duckdb/ub_extension_json_json_functions.cpp)

set(JEMALLOC_SRC_FILES
Expand Down
4 changes: 4 additions & 0 deletions src/duckdb/src/common/types/row/tuple_data_collection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ TupleDataCollection::~TupleDataCollection() {

void TupleDataCollection::Initialize() {
D_ASSERT(!layout.GetTypes().empty());
if (TuplesPerBlock() == 0) {
throw NotImplementedException("Too many columns: tuple width exceeds block size of %llu",
allocator->GetBufferManager().GetBlockSize());
}
this->count = 0;
this->data_size = 0;
if (layout.IsSortKeyLayout()) {
Expand Down
2 changes: 1 addition & 1 deletion src/duckdb/src/execution/index/art/art_builder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ ARTConflictType ARTBuilder::Build() {
}

reference<Node> ref(entry.node);
auto count = UnsafeNumericCast<uint8_t>(start.len - prefix_depth);
auto count = UnsafeNumericCast<idx_t>(start.len - prefix_depth);
Prefix::New(art, ref, start, prefix_depth, count);

// Inline the row ID.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -529,12 +529,24 @@ void StringValueResult::AddQuotedValue(StringValueResult &result, const idx_t bu
if (!result.unquoted) {
result.current_errors.Insert(UNTERMINATED_QUOTES, result.cur_col_id, result.chunk_col_id, result.last_position);
}
if (buffer_pos <= result.quoted_position + 1) {
AddPossiblyEscapedValue(result, buffer_pos, result.buffer_ptr + result.quoted_position + 1, 0,
buffer_pos < result.last_position.buffer_pos + 2);
result.quoted = false;
return;
}
// remove potential empty values
idx_t length = buffer_pos - result.quoted_position - 1;
while (length > 0 && result.ignore_empty_values &&
result.buffer_ptr[result.quoted_position + 1 + length - 1] == ' ') {
length--;
}
if (length == 0) {
// All content was stripped as empty/space values
AddPossiblyEscapedValue(result, buffer_pos, result.buffer_ptr + result.quoted_position + 1, 0, true);
result.quoted = false;
return;
}
length--;
AddPossiblyEscapedValue(result, buffer_pos, result.buffer_ptr + result.quoted_position + 1, length,
buffer_pos < result.last_position.buffer_pos + 2);
Expand Down
17 changes: 9 additions & 8 deletions src/duckdb/src/execution/operator/join/physical_hash_join.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -419,13 +419,10 @@ static bool FinalizeSingleThreaded(const HashJoinGlobalSinkState &sink, const bo
}

static idx_t GetTupleWidth(const vector<LogicalType> &types, bool &all_constant) {
idx_t tuple_width = 0;
all_constant = true;
for (auto &type : types) {
tuple_width += GetTypeIdSize(type.InternalType());
all_constant &= TypeIsConstantSize(type.InternalType());
}
return tuple_width + AlignValue(types.size()) / 8 + GetTypeIdSize(PhysicalType::UINT64);
TupleDataLayout layout;
layout.Initialize(types, TupleDataValidityType::CAN_HAVE_NULL_VALUES);
all_constant = layout.AllConstant();
return layout.GetRowWidth();
}

static idx_t GetPartitioningSpaceRequirement(ClientContext &context, const vector<LogicalType> &types,
Expand All @@ -434,7 +431,11 @@ static idx_t GetPartitioningSpaceRequirement(ClientContext &context, const vecto
bool all_constant;
idx_t tuple_width = GetTupleWidth(types, all_constant);

auto tuples_per_block = buffer_manager.GetBlockSize() / tuple_width;
if (tuple_width == 0) {
throw InternalException("GetPartitioningSpaceRequirement: tuple width should not be 0");
}

auto tuples_per_block = MaxValue<idx_t>(buffer_manager.GetBlockSize() / tuple_width, 1);
auto blocks_per_chunk = (STANDARD_VECTOR_SIZE + tuples_per_block) / tuples_per_block + 1;
if (!all_constant) {
blocks_per_chunk += 2;
Expand Down
6 changes: 3 additions & 3 deletions src/duckdb/src/function/table/version/pragma_version.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#ifndef DUCKDB_PATCH_VERSION
#define DUCKDB_PATCH_VERSION "5-dev31"
#define DUCKDB_PATCH_VERSION "5-dev79"
#endif
#ifndef DUCKDB_MINOR_VERSION
#define DUCKDB_MINOR_VERSION 4
Expand All @@ -8,10 +8,10 @@
#define DUCKDB_MAJOR_VERSION 1
#endif
#ifndef DUCKDB_VERSION
#define DUCKDB_VERSION "v1.4.5-dev31"
#define DUCKDB_VERSION "v1.4.5-dev79"
#endif
#ifndef DUCKDB_SOURCE_ID
#define DUCKDB_SOURCE_ID "7a3a63c5c0"
#define DUCKDB_SOURCE_ID "a8d8e49b32"
#endif
#include "duckdb/function/table/system_functions.hpp"
#include "duckdb/main/database.hpp"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#pragma once

#include "duckdb/common/typedefs.hpp"
#include "duckdb/common/exception.hpp"
#include <type_traits>

namespace duckdb {
Expand Down Expand Up @@ -37,6 +38,7 @@ struct EncodingUtil {
// Decode unsigned integer, returns the number of bytes read
template <class T>
static idx_t DecodeUnsignedLEB128(const_data_ptr_t source, T &result) {
constexpr idx_t max_shift = sizeof(T) * 8;
static_assert(std::is_integral<T>::value, "Must be integral");
static_assert(std::is_unsigned<T>::value, "Must be unsigned");
static_assert(sizeof(T) <= sizeof(uint64_t), "Must be uint64_t or smaller");
Expand All @@ -47,6 +49,9 @@ struct EncodingUtil {
uint8_t byte;
do {
byte = source[offset++];
if (shift >= max_shift) {
throw IOException("Failed to decode LEB128 integer: data may be corrupt");
}
result |= static_cast<T>(byte & 0x7F) << shift;
shift += 7;
} while (byte & 0x80);
Expand Down Expand Up @@ -81,6 +86,7 @@ struct EncodingUtil {
// Decode signed integer, returns the number of bytes read
template <class T>
static idx_t DecodeSignedLEB128(const_data_ptr_t source, T &result) {
constexpr idx_t max_shift = sizeof(T) * 8;
static_assert(std::is_integral<T>::value, "Must be integral");
static_assert(std::is_signed<T>::value, "Must be signed");
static_assert(sizeof(T) <= sizeof(int64_t), "Must be int64_t or smaller");
Expand All @@ -95,6 +101,9 @@ struct EncodingUtil {
uint8_t byte;
do {
byte = source[offset++];
if (shift >= max_shift) {
throw IOException("Failed to decode LEB128 integer: data may be corrupt");
}
result |= static_cast<unsigned_type>(byte & 0x7F) << shift;
shift += 7;
} while (byte & 0x80);
Expand Down
25 changes: 25 additions & 0 deletions src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,31 @@ struct UncompressedStringStorage {
return count;
}

static void StringRevertAppend(ColumnSegment &segment, idx_t new_count) {
if (new_count >= segment.count) {
return;
}
// we need to decrement the dictionary size by all of the strings we are erasing
auto &buffer_manager = BufferManager::GetBufferManager(segment.db);
auto handle = buffer_manager.Pin(segment.block);
auto handle_ptr = handle.Ptr();
auto result_data = reinterpret_cast<int32_t *>(handle_ptr + DICTIONARY_HEADER_SIZE);
auto dictionary_size = reinterpret_cast<uint32_t *>(handle_ptr);
uint32_t new_dictionary_size;
if (new_count == 0) {
new_dictionary_size = 0;
} else {
auto entry_offset = result_data[new_count - 1];
if (entry_offset < 0) {
// overflow strings store the dict offset negatively - invert size
new_dictionary_size = static_cast<uint32_t>(-entry_offset);
} else {
new_dictionary_size = static_cast<uint32_t>(entry_offset);
}
}
*dictionary_size = new_dictionary_size;
}

static idx_t FinalizeAppend(ColumnSegment &segment, SegmentStatistics &stats);

public:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ struct UpdateInfo;
class CleanupState {
public:
explicit CleanupState(transaction_t lowest_active_transaction);
~CleanupState();

// all tables with indexes that possibly need a vacuum (after e.g. a delete)
unordered_map<string, optional_ptr<DataTable>> indexed_tables;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ struct DuckCleanupInfo {
transaction_t lowest_start_time;
vector<unique_ptr<DuckTransaction>> transactions;

void Cleanup() noexcept;
void Cleanup();
bool ScheduleCleanup() noexcept;
};

Expand Down
4 changes: 4 additions & 0 deletions src/duckdb/src/include/duckdb/transaction/update_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ struct UpdateInfo {
bool AppliesToTransaction(transaction_t start_time, transaction_t transaction_id) {
// these tuples were either committed AFTER this transaction started or are not committed yet, use
// tuples stored in this version
if (version_number == TRANSACTION_ID_START - 1) {
// dummy transaction number for the root element - should always match
return true;
}
return version_number > start_time && version_number != transaction_id;
}

Expand Down
2 changes: 1 addition & 1 deletion src/duckdb/src/parallel/task_scheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@ void TaskScheduler::YieldThread() {
}

idx_t TaskScheduler::GetEstimatedCPUId() {
#if defined(EMSCRIPTEN)
#if defined(__EMSCRIPTEN__)
// FIXME: Wasm + multithreads can likely be implemented as
// return return (idx_t)std::hash<std::thread::id>()(std::this_thread::get_id());
return 0;
Expand Down
23 changes: 12 additions & 11 deletions src/duckdb/src/storage/compression/string_uncompressed.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,17 +268,18 @@ void UncompressedStringStorage::CleanupState(ColumnSegment &segment) {
//===--------------------------------------------------------------------===//
CompressionFunction StringUncompressed::GetFunction(PhysicalType data_type) {
D_ASSERT(data_type == PhysicalType::VARCHAR);
return CompressionFunction(
CompressionType::COMPRESSION_UNCOMPRESSED, data_type, UncompressedStringStorage::StringInitAnalyze,
UncompressedStringStorage::StringAnalyze, UncompressedStringStorage::StringFinalAnalyze,
UncompressedFunctions::InitCompression, UncompressedFunctions::Compress,
UncompressedFunctions::FinalizeCompress, UncompressedStringStorage::StringInitScan,
UncompressedStringStorage::StringScan, UncompressedStringStorage::StringScanPartial,
UncompressedStringStorage::StringFetchRow, UncompressedFunctions::EmptySkip,
UncompressedStringStorage::StringInitSegment, UncompressedStringStorage::StringInitAppend,
UncompressedStringStorage::StringAppend, UncompressedStringStorage::FinalizeAppend, nullptr,
UncompressedStringStorage::SerializeState, UncompressedStringStorage::DeserializeState,
UncompressedStringStorage::CleanupState, UncompressedStringInitPrefetch, UncompressedStringStorage::Select);
return CompressionFunction(CompressionType::COMPRESSION_UNCOMPRESSED, data_type,
UncompressedStringStorage::StringInitAnalyze, UncompressedStringStorage::StringAnalyze,
UncompressedStringStorage::StringFinalAnalyze, UncompressedFunctions::InitCompression,
UncompressedFunctions::Compress, UncompressedFunctions::FinalizeCompress,
UncompressedStringStorage::StringInitScan, UncompressedStringStorage::StringScan,
UncompressedStringStorage::StringScanPartial, UncompressedStringStorage::StringFetchRow,
UncompressedFunctions::EmptySkip, UncompressedStringStorage::StringInitSegment,
UncompressedStringStorage::StringInitAppend, UncompressedStringStorage::StringAppend,
UncompressedStringStorage::FinalizeAppend, UncompressedStringStorage::StringRevertAppend,
UncompressedStringStorage::SerializeState, UncompressedStringStorage::DeserializeState,
UncompressedStringStorage::CleanupState, UncompressedStringInitPrefetch,
UncompressedStringStorage::Select);
}

//===--------------------------------------------------------------------===//
Expand Down
2 changes: 1 addition & 1 deletion src/duckdb/src/storage/table/row_group_collection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ void RowGroupCollection::InitializeAppend(TransactionData transaction, TableAppe

// start writing to the row_groups
auto l = row_groups->Lock();
if (IsEmpty(l) || requires_new_row_group) {
if (IsEmpty(l) || (requires_new_row_group && info->GetIndexes().Empty())) {
// empty row group collection: empty first row group
AppendRowGroup(l, row_start + total_rows);
}
Expand Down
4 changes: 0 additions & 4 deletions src/duckdb/src/transaction/cleanup_state.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,6 @@ CleanupState::CleanupState(transaction_t lowest_active_transaction)
: lowest_active_transaction(lowest_active_transaction), current_table(nullptr), count(0) {
}

CleanupState::~CleanupState() {
Flush();
}

void CleanupState::CleanupEntry(UndoFlags type, data_ptr_t data) {
switch (type) {
case UndoFlags::CATALOG_ENTRY: {
Expand Down
2 changes: 1 addition & 1 deletion src/duckdb/src/transaction/duck_transaction_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

namespace duckdb {

void DuckCleanupInfo::Cleanup() noexcept {
void DuckCleanupInfo::Cleanup() {
for (auto &transaction : transactions) {
if (transaction->awaiting_cleanup) {
transaction->Cleanup(lowest_start_time);
Expand Down
Loading
Loading