Skip to content

Commit 66b7287

Browse files
duckdblabs-botgithub-actions[bot]
authored andcommitted
Update vendored DuckDB sources to 76a3383248
1 parent a9459dd commit 66b7287

83 files changed

Lines changed: 777 additions & 295 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

CMakeLists.txt

Lines changed: 39 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -347,48 +347,48 @@ set(DUCKDB_SRC_FILES
347347
src/duckdb/third_party/zstd/dict/divsufsort.cpp
348348
src/duckdb/third_party/zstd/dict/fastcover.cpp
349349
src/duckdb/third_party/zstd/dict/zdict.cpp
350-
src/duckdb/extension/core_functions/core_functions_extension.cpp
351-
src/duckdb/extension/core_functions/function_list.cpp
352350
src/duckdb/extension/core_functions/lambda_functions.cpp
353-
src/duckdb/ub_extension_core_functions_aggregate_algebraic.cpp
351+
src/duckdb/extension/core_functions/function_list.cpp
352+
src/duckdb/extension/core_functions/core_functions_extension.cpp
353+
src/duckdb/ub_extension_core_functions_aggregate_distributive.cpp
354354
src/duckdb/ub_extension_core_functions_aggregate_nested.cpp
355-
src/duckdb/ub_extension_core_functions_aggregate_holistic.cpp
356355
src/duckdb/ub_extension_core_functions_aggregate_regression.cpp
357-
src/duckdb/ub_extension_core_functions_aggregate_distributive.cpp
358-
src/duckdb/ub_extension_core_functions_scalar_generic.cpp
359-
src/duckdb/ub_extension_core_functions_scalar_array.cpp
360-
src/duckdb/ub_extension_core_functions_scalar_random.cpp
361-
src/duckdb/ub_extension_core_functions_scalar_list.cpp
362-
src/duckdb/ub_extension_core_functions_scalar_struct.cpp
363-
src/duckdb/ub_extension_core_functions_scalar_date.cpp
364-
src/duckdb/ub_extension_core_functions_scalar_enum.cpp
356+
src/duckdb/ub_extension_core_functions_aggregate_algebraic.cpp
357+
src/duckdb/ub_extension_core_functions_aggregate_holistic.cpp
358+
src/duckdb/ub_extension_core_functions_scalar_string.cpp
359+
src/duckdb/ub_extension_core_functions_scalar_bit.cpp
365360
src/duckdb/ub_extension_core_functions_scalar_operators.cpp
361+
src/duckdb/ub_extension_core_functions_scalar_enum.cpp
362+
src/duckdb/ub_extension_core_functions_scalar_map.cpp
363+
src/duckdb/ub_extension_core_functions_scalar_random.cpp
366364
src/duckdb/ub_extension_core_functions_scalar_math.cpp
367-
src/duckdb/ub_extension_core_functions_scalar_string.cpp
365+
src/duckdb/ub_extension_core_functions_scalar_union.cpp
366+
src/duckdb/ub_extension_core_functions_scalar_generic.cpp
367+
src/duckdb/ub_extension_core_functions_scalar_struct.cpp
368+
src/duckdb/ub_extension_core_functions_scalar_list.cpp
369+
src/duckdb/ub_extension_core_functions_scalar_array.cpp
368370
src/duckdb/ub_extension_core_functions_scalar_debug.cpp
369-
src/duckdb/ub_extension_core_functions_scalar_bit.cpp
370371
src/duckdb/ub_extension_core_functions_scalar_blob.cpp
371-
src/duckdb/ub_extension_core_functions_scalar_union.cpp
372-
src/duckdb/ub_extension_core_functions_scalar_map.cpp
373-
src/duckdb/extension/parquet/parquet_crypto.cpp
374-
src/duckdb/extension/parquet/parquet_reader.cpp
375-
src/duckdb/extension/parquet/parquet_metadata.cpp
372+
src/duckdb/ub_extension_core_functions_scalar_date.cpp
376373
src/duckdb/extension/parquet/parquet_writer.cpp
377374
src/duckdb/extension/parquet/zstd_file_system.cpp
375+
src/duckdb/extension/parquet/parquet_crypto.cpp
376+
src/duckdb/extension/parquet/parquet_reader.cpp
378377
src/duckdb/extension/parquet/parquet_timestamp.cpp
378+
src/duckdb/extension/parquet/parquet_float16.cpp
379+
src/duckdb/extension/parquet/parquet_statistics.cpp
379380
src/duckdb/extension/parquet/parquet_multi_file_info.cpp
380-
src/duckdb/extension/parquet/column_writer.cpp
381+
src/duckdb/extension/parquet/column_reader.cpp
381382
src/duckdb/extension/parquet/geo_parquet.cpp
382-
src/duckdb/extension/parquet/parquet_file_metadata_cache.cpp
383-
src/duckdb/extension/parquet/parquet_statistics.cpp
384383
src/duckdb/extension/parquet/parquet_extension.cpp
385-
src/duckdb/extension/parquet/parquet_float16.cpp
384+
src/duckdb/extension/parquet/column_writer.cpp
385+
src/duckdb/extension/parquet/parquet_file_metadata_cache.cpp
386386
src/duckdb/extension/parquet/serialize_parquet.cpp
387-
src/duckdb/extension/parquet/column_reader.cpp
387+
src/duckdb/extension/parquet/parquet_metadata.cpp
388388
src/duckdb/ub_extension_parquet_decoder.cpp
389-
src/duckdb/ub_extension_parquet_writer.cpp
390389
src/duckdb/ub_extension_parquet_reader.cpp
391390
src/duckdb/ub_extension_parquet_reader_variant.cpp
391+
src/duckdb/ub_extension_parquet_writer.cpp
392392
src/duckdb/third_party/parquet/parquet_types.cpp
393393
src/duckdb/third_party/thrift/thrift/protocol/TProtocol.cpp
394394
src/duckdb/third_party/thrift/thrift/transport/TTransportException.cpp
@@ -427,32 +427,32 @@ set(DUCKDB_SRC_FILES
427427
src/duckdb/third_party/brotli/enc/metablock.cpp
428428
src/duckdb/third_party/brotli/enc/static_dict.cpp
429429
src/duckdb/third_party/brotli/enc/utf8_util.cpp
430-
src/duckdb/extension/icu/./icu-datetrunc.cpp
431-
src/duckdb/extension/icu/./icu-timezone.cpp
432-
src/duckdb/extension/icu/./icu-current.cpp
433-
src/duckdb/extension/icu/./icu-list-range.cpp
434-
src/duckdb/extension/icu/./icu-datefunc.cpp
435-
src/duckdb/extension/icu/./icu-strptime.cpp
436-
src/duckdb/extension/icu/./icu-dateadd.cpp
437430
src/duckdb/extension/icu/./icu_extension.cpp
438-
src/duckdb/extension/icu/./icu-timebucket.cpp
439-
src/duckdb/extension/icu/./icu-table-range.cpp
431+
src/duckdb/extension/icu/./icu-strptime.cpp
440432
src/duckdb/extension/icu/./icu-datepart.cpp
441433
src/duckdb/extension/icu/./icu-makedate.cpp
434+
src/duckdb/extension/icu/./icu-datefunc.cpp
435+
src/duckdb/extension/icu/./icu-current.cpp
436+
src/duckdb/extension/icu/./icu-timezone.cpp
437+
src/duckdb/extension/icu/./icu-table-range.cpp
438+
src/duckdb/extension/icu/./icu-dateadd.cpp
439+
src/duckdb/extension/icu/./icu-timebucket.cpp
442440
src/duckdb/extension/icu/./icu-datesub.cpp
441+
src/duckdb/extension/icu/./icu-datetrunc.cpp
442+
src/duckdb/extension/icu/./icu-list-range.cpp
443443
src/duckdb/ub_extension_icu_third_party_icu_common.cpp
444444
src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp
445445
src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp
446+
src/duckdb/extension/json/json_reader.cpp
446447
src/duckdb/extension/json/json_functions.cpp
448+
src/duckdb/extension/json/json_extension.cpp
449+
src/duckdb/extension/json/json_scan.cpp
447450
src/duckdb/extension/json/json_multi_file_info.cpp
451+
src/duckdb/extension/json/json_enums.cpp
448452
src/duckdb/extension/json/serialize_json.cpp
449453
src/duckdb/extension/json/json_common.cpp
450-
src/duckdb/extension/json/json_scan.cpp
451-
src/duckdb/extension/json/json_enums.cpp
452-
src/duckdb/extension/json/json_reader.cpp
453-
src/duckdb/extension/json/json_serializer.cpp
454-
src/duckdb/extension/json/json_extension.cpp
455454
src/duckdb/extension/json/json_deserializer.cpp
455+
src/duckdb/extension/json/json_serializer.cpp
456456
src/duckdb/ub_extension_json_json_functions.cpp)
457457

458458
set(JEMALLOC_SRC_FILES

src/duckdb/extension/core_functions/scalar/generic/current_setting.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,10 +53,7 @@ unique_ptr<FunctionData> CurrentSettingBind(ClientContext &context, ScalarFuncti
5353
if (!context.TryGetCurrentSetting(key, val)) {
5454
auto extension_name = Catalog::AutoloadExtensionByConfigName(context, key);
5555
// If autoloader didn't throw, the config is now available
56-
if (!context.TryGetCurrentSetting(key, val)) {
57-
throw InternalException("Extension %s did not provide the '%s' config setting",
58-
extension_name.ToStdString(), key);
59-
}
56+
context.TryGetCurrentSetting(key, val);
6057
}
6158

6259
bound_function.return_type = val.type();

src/duckdb/extension/icu/icu-strptime.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,8 +221,9 @@ struct ICUStrptime : public ICUDateFunc {
221221
if (!error.empty()) {
222222
throw InvalidInputException("Failed to parse format specifier %s: %s", format_string, error);
223223
}
224-
// If any format has UTC offsets, then we have to produce TSTZ
224+
// If any format has UTC offsets or names, then we have to produce TSTZ
225225
has_tz = has_tz || format.HasFormatSpecifier(StrTimeSpecifier::TZ_NAME);
226+
has_tz = has_tz || format.HasFormatSpecifier(StrTimeSpecifier::UTC_OFFSET);
226227
formats.emplace_back(format);
227228
}
228229
if (has_tz) {

src/duckdb/extension/json/include/json_common.hpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "duckdb/common/operator/string_cast.hpp"
1414
#include "duckdb/planner/expression/bound_function_expression.hpp"
1515
#include "yyjson.hpp"
16+
#include "duckdb/common/types/blob.hpp"
1617

1718
using namespace duckdb_yyjson; // NOLINT
1819

@@ -228,11 +229,8 @@ struct JSONCommon {
228229

229230
static string FormatParseError(const char *data, idx_t length, yyjson_read_err &error, const string &extra = "") {
230231
D_ASSERT(error.code != YYJSON_READ_SUCCESS);
231-
// Go to blob so we can have a better error message for weird strings
232-
auto blob = Value::BLOB(string(data, length));
233232
// Truncate, so we don't print megabytes worth of JSON
234-
string input = blob.ToString();
235-
input = input.length() > 50 ? string(input.c_str(), 47) + "..." : input;
233+
auto input = length > 50 ? string(data, 47) + "..." : string(data, length);
236234
// Have to replace \r, otherwise output is unreadable
237235
input = StringUtil::Replace(input, "\r", "\\r");
238236
return StringUtil::Format("Malformed JSON at byte %lld of input: %s. %s Input: \"%s\"", error.pos, error.msg,

src/duckdb/extension/json/json_functions.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -394,7 +394,11 @@ void JSONFunctions::RegisterSimpleCastFunctions(ExtensionLoader &loader) {
394394
loader.RegisterCastFunction(LogicalType::LIST(LogicalType::JSON()), LogicalTypeId::VARCHAR, CastJSONListToVarchar,
395395
json_list_to_varchar_cost);
396396

397-
// VARCHAR to JSON[] (also needs a special case otherwise get a VARCHAR -> VARCHAR[] cast first)
397+
// JSON[] to JSON is allowed implicitly
398+
loader.RegisterCastFunction(LogicalType::LIST(LogicalType::JSON()), LogicalType::JSON(), CastJSONListToVarchar,
399+
100);
400+
401+
// VARCHAR to JSON[] (also needs a special case otherwise we get a VARCHAR -> VARCHAR[] cast first)
398402
const auto varchar_to_json_list_cost =
399403
CastFunctionSet::ImplicitCastCost(db, LogicalType::VARCHAR, LogicalType::LIST(LogicalType::JSON())) - 1;
400404
BoundCastInfo varchar_to_json_list_info(CastVarcharToJSONList, nullptr, JSONFunctionLocalState::InitCastLocalState);

src/duckdb/extension/parquet/column_writer.cpp

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -187,9 +187,12 @@ void ColumnWriter::HandleRepeatLevels(ColumnWriterState &state, ColumnWriterStat
187187
// no repeat levels without a parent node
188188
return;
189189
}
190-
while (state.repetition_levels.size() < parent->repetition_levels.size()) {
191-
state.repetition_levels.push_back(parent->repetition_levels[state.repetition_levels.size()]);
190+
if (state.repetition_levels.size() >= parent->repetition_levels.size()) {
191+
return;
192192
}
193+
state.repetition_levels.insert(state.repetition_levels.end(),
194+
parent->repetition_levels.begin() + state.repetition_levels.size(),
195+
parent->repetition_levels.end());
193196
}
194197

195198
void ColumnWriter::HandleDefineLevels(ColumnWriterState &state, ColumnWriterState *parent, const ValidityMask &validity,
@@ -200,36 +203,41 @@ void ColumnWriter::HandleDefineLevels(ColumnWriterState &state, ColumnWriterStat
200203
while (state.definition_levels.size() < parent->definition_levels.size()) {
201204
idx_t current_index = state.definition_levels.size();
202205
if (parent->definition_levels[current_index] != PARQUET_DEFINE_VALID) {
206+
//! Inherit nulls from parent
203207
state.definition_levels.push_back(parent->definition_levels[current_index]);
204208
state.parent_null_count++;
205209
} else if (validity.RowIsValid(vector_index)) {
210+
//! Produce a non-null define
206211
state.definition_levels.push_back(define_value);
207212
} else {
213+
//! Produce a null define
208214
if (!can_have_nulls) {
209215
throw IOException("Parquet writer: map key column is not allowed to contain NULL values");
210216
}
211217
state.null_count++;
212218
state.definition_levels.push_back(null_value);
213219
}
220+
D_ASSERT(parent->is_empty.empty() || current_index < parent->is_empty.size());
214221
if (parent->is_empty.empty() || !parent->is_empty[current_index]) {
215222
vector_index++;
216223
}
217224
}
225+
return;
226+
}
227+
228+
// no parent: set definition levels only from this validity mask
229+
if (validity.AllValid()) {
230+
state.definition_levels.insert(state.definition_levels.end(), count, define_value);
218231
} else {
219-
// no parent: set definition levels only from this validity mask
220-
if (validity.AllValid()) {
221-
state.definition_levels.insert(state.definition_levels.end(), count, define_value);
222-
} else {
223-
for (idx_t i = 0; i < count; i++) {
224-
const auto is_null = !validity.RowIsValid(i);
225-
state.definition_levels.emplace_back(is_null ? null_value : define_value);
226-
state.null_count += is_null;
227-
}
228-
}
229-
if (!can_have_nulls && state.null_count != 0) {
230-
throw IOException("Parquet writer: map key column is not allowed to contain NULL values");
232+
for (idx_t i = 0; i < count; i++) {
233+
const auto is_null = !validity.RowIsValid(i);
234+
state.definition_levels.emplace_back(is_null ? null_value : define_value);
235+
state.null_count += is_null;
231236
}
232237
}
238+
if (!can_have_nulls && state.null_count != 0) {
239+
throw IOException("Parquet writer: map key column is not allowed to contain NULL values");
240+
}
233241
}
234242

235243
//===--------------------------------------------------------------------===//
@@ -368,6 +376,7 @@ ParquetColumnSchema ColumnWriter::FillParquetSchema(vector<duckdb_parquet::Schem
368376
}
369377
return map_column;
370378
}
379+
371380
duckdb_parquet::SchemaElement schema_element;
372381
schema_element.type = ParquetWriter::DuckDBTypeToParquetType(type);
373382
schema_element.repetition_type = null_type;

src/duckdb/extension/parquet/include/column_writer.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ class ColumnWriterState {
2727

2828
unsafe_vector<uint16_t> definition_levels;
2929
unsafe_vector<uint16_t> repetition_levels;
30-
vector<bool> is_empty;
30+
unsafe_vector<uint8_t> is_empty;
3131
idx_t parent_null_count = 0;
3232
idx_t null_count = 0;
3333

src/duckdb/extension/parquet/include/reader/string_column_reader.hpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,25 @@
1414
namespace duckdb {
1515

1616
class StringColumnReader : public ColumnReader {
17+
enum class StringColumnType : uint8_t { VARCHAR, JSON, OTHER };
18+
19+
static StringColumnType GetStringColumnType(const LogicalType &type) {
20+
if (type.IsJSONType()) {
21+
return StringColumnType::JSON;
22+
}
23+
if (type.id() == LogicalTypeId::VARCHAR) {
24+
return StringColumnType::VARCHAR;
25+
}
26+
return StringColumnType::OTHER;
27+
}
28+
1729
public:
1830
static constexpr const PhysicalType TYPE = PhysicalType::VARCHAR;
1931

2032
public:
2133
StringColumnReader(ParquetReader &reader, const ParquetColumnSchema &schema);
2234
idx_t fixed_width_string_length;
35+
const StringColumnType string_column_type;
2336

2437
public:
2538
static void VerifyString(const char *str_data, uint32_t str_len, const bool isVarchar);

src/duckdb/extension/parquet/include/writer/array_column_writer.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ class ArrayColumnWriter : public ListColumnWriter {
2525
void Prepare(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count,
2626
bool vector_can_span_multiple_pages) override;
2727
void Write(ColumnWriterState &state, Vector &vector, idx_t count) override;
28+
29+
protected:
30+
void WriteArrayState(ListColumnWriterState &state, idx_t array_size, uint16_t first_repeat_level,
31+
idx_t define_value, const bool is_empty = false);
2832
};
2933

3034
} // namespace duckdb

src/duckdb/extension/parquet/parquet_reader.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -570,7 +570,10 @@ ParquetColumnSchema ParquetReader::ParseSchemaRecursive(idx_t depth, idx_t max_d
570570

571571
auto file_meta_data = GetFileMetadata();
572572
D_ASSERT(file_meta_data);
573-
D_ASSERT(next_schema_idx < file_meta_data->schema.size());
573+
if (next_schema_idx >= file_meta_data->schema.size()) {
574+
throw InvalidInputException("Malformed Parquet schema in file \"%s\": invalid schema index %d", file.path,
575+
next_schema_idx);
576+
}
574577
auto &s_ele = file_meta_data->schema[next_schema_idx];
575578
auto this_idx = next_schema_idx;
576579

0 commit comments

Comments
 (0)