Skip to content

Commit 362ade9

Browse files
duckdblabs-botgithub-actions[bot]
authored andcommitted
Update vendored DuckDB sources to 13e1f2229c
1 parent 3b3afd4 commit 362ade9

79 files changed

Lines changed: 644 additions & 715 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

src/duckdb/extension/json/include/json_reader.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -230,9 +230,9 @@ class JSONReader : public BaseFileReader {
230230

231231
void Initialize(Allocator &allocator, idx_t buffer_size);
232232
bool InitializeScan(JSONReaderScanState &state, JSONFileReadType file_read_type);
233-
void ParseJSON(JSONReaderScanState &scan_state, char *const json_start, const idx_t json_size,
233+
bool ParseJSON(JSONReaderScanState &scan_state, char *const json_start, const idx_t json_size,
234234
const idx_t remaining);
235-
void ParseNextChunk(JSONReaderScanState &scan_state);
235+
bool ParseNextChunk(JSONReaderScanState &scan_state);
236236
idx_t Scan(JSONReaderScanState &scan_state);
237237
bool ReadNextBuffer(JSONReaderScanState &scan_state);
238238
bool PrepareBufferForRead(JSONReaderScanState &scan_state);

src/duckdb/extension/json/json_reader.cpp

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -618,7 +618,7 @@ static pair<JSONFormat, JSONRecordType> DetectFormatAndRecordType(char *const bu
618618
return make_pair(JSONFormat::ARRAY, JSONRecordType::VALUES);
619619
}
620620

621-
void JSONReader::ParseJSON(JSONReaderScanState &scan_state, char *const json_start, const idx_t json_size,
621+
bool JSONReader::ParseJSON(JSONReaderScanState &scan_state, char *const json_start, const idx_t json_size,
622622
const idx_t remaining) {
623623
yyjson_doc *doc;
624624
yyjson_read_err err;
@@ -640,7 +640,7 @@ void JSONReader::ParseJSON(JSONReaderScanState &scan_state, char *const json_sta
640640
}
641641
if (!can_ignore_this_error) {
642642
AddParseError(scan_state, scan_state.lines_or_objects_in_buffer, err, extra);
643-
return;
643+
return false;
644644
}
645645
}
646646

@@ -652,7 +652,7 @@ void JSONReader::ParseJSON(JSONReaderScanState &scan_state, char *const json_sta
652652
err.msg = "unexpected end of data";
653653
err.pos = json_size;
654654
AddParseError(scan_state, scan_state.lines_or_objects_in_buffer, err, "Try auto-detecting the JSON format");
655-
return;
655+
return false;
656656
} else if (!options.ignore_errors && read_size < json_size) {
657657
idx_t off = read_size;
658658
idx_t rem = json_size;
@@ -662,20 +662,21 @@ void JSONReader::ParseJSON(JSONReaderScanState &scan_state, char *const json_sta
662662
err.msg = "unexpected content after document";
663663
err.pos = read_size;
664664
AddParseError(scan_state, scan_state.lines_or_objects_in_buffer, err, "Try auto-detecting the JSON format");
665-
return;
665+
return false;
666666
}
667667
}
668668

669669
scan_state.lines_or_objects_in_buffer++;
670670
if (!doc) {
671671
scan_state.values[scan_state.scan_count] = nullptr;
672-
return;
672+
return true;
673673
}
674674

675675
// Set the JSONLine and trim
676676
scan_state.units[scan_state.scan_count] = JSONString(json_start, json_size);
677677
TrimWhitespace(scan_state.units[scan_state.scan_count]);
678678
scan_state.values[scan_state.scan_count] = doc->root;
679+
return true;
679680
}
680681

681682
void JSONReader::AutoDetect(Allocator &allocator, idx_t buffer_capacity) {
@@ -762,7 +763,7 @@ bool JSONReader::CopyRemainderFromPreviousBuffer(JSONReaderScanState &scan_state
762763
return true;
763764
}
764765

765-
void JSONReader::ParseNextChunk(JSONReaderScanState &scan_state) {
766+
bool JSONReader::ParseNextChunk(JSONReaderScanState &scan_state) {
766767
const auto format = GetFormat();
767768
auto &buffer_ptr = scan_state.buffer_ptr;
768769
auto &buffer_offset = scan_state.buffer_offset;
@@ -796,7 +797,9 @@ void JSONReader::ParseNextChunk(JSONReaderScanState &scan_state) {
796797
}
797798

798799
idx_t json_size = json_end - json_start;
799-
ParseJSON(scan_state, json_start, json_size, remaining);
800+
if (!ParseJSON(scan_state, json_start, json_size, remaining)) {
801+
return false;
802+
}
800803
buffer_offset += json_size;
801804

802805
if (format == JSONFormat::ARRAY) {
@@ -809,11 +812,12 @@ void JSONReader::ParseNextChunk(JSONReaderScanState &scan_state) {
809812
err.msg = "unexpected character";
810813
err.pos = json_size;
811814
AddParseError(scan_state, scan_state.lines_or_objects_in_buffer, err);
812-
return;
815+
return false;
813816
}
814817
}
815818
SkipWhitespace(buffer_ptr, buffer_offset, buffer_size);
816819
}
820+
return true;
817821
}
818822

819823
void JSONReader::Initialize(Allocator &allocator, idx_t buffer_size) {
@@ -868,7 +872,10 @@ idx_t JSONReader::Scan(JSONReaderScanState &scan_state) {
868872
return 0;
869873
}
870874
}
871-
ParseNextChunk(scan_state);
875+
if (!ParseNextChunk(scan_state)) {
876+
// found an error but we can't handle it - return
877+
return 0;
878+
}
872879
}
873880
return scan_state.scan_count;
874881
}

src/duckdb/extension/parquet/include/reader/string_column_reader.hpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,9 @@ class StringColumnReader : public ColumnReader {
3636
const StringColumnType string_column_type;
3737

3838
public:
39-
static void VerifyString(const char *str_data, uint32_t str_len, const bool isVarchar);
39+
static bool IsValid(const char *str_data, uint32_t str_len, bool is_varchar);
40+
static bool IsValid(const string &str, bool is_varchar);
41+
static void VerifyString(const char *str_data, uint32_t str_len, bool is_varchar);
4042
void VerifyString(const char *str_data, uint32_t str_len) const;
4143

4244
static void ReferenceBlock(Vector &result, shared_ptr<ResizeableBuffer> &block);

src/duckdb/extension/parquet/parquet_statistics.cpp

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -395,18 +395,14 @@ unique_ptr<BaseStatistics> ParquetStatisticsUtils::TransformColumnStatistics(con
395395
break;
396396
case LogicalTypeId::VARCHAR: {
397397
auto string_stats = StringStats::CreateUnknown(type);
398-
if (parquet_stats.__isset.min_value) {
399-
StringColumnReader::VerifyString(parquet_stats.min_value.c_str(), parquet_stats.min_value.size(), true);
398+
if (parquet_stats.__isset.min_value && StringColumnReader::IsValid(parquet_stats.min_value, true)) {
400399
StringStats::SetMin(string_stats, parquet_stats.min_value);
401-
} else if (parquet_stats.__isset.min) {
402-
StringColumnReader::VerifyString(parquet_stats.min.c_str(), parquet_stats.min.size(), true);
400+
} else if (parquet_stats.__isset.min && StringColumnReader::IsValid(parquet_stats.min, true)) {
403401
StringStats::SetMin(string_stats, parquet_stats.min);
404402
}
405-
if (parquet_stats.__isset.max_value) {
406-
StringColumnReader::VerifyString(parquet_stats.max_value.c_str(), parquet_stats.max_value.size(), true);
403+
if (parquet_stats.__isset.max_value && StringColumnReader::IsValid(parquet_stats.max_value, true)) {
407404
StringStats::SetMax(string_stats, parquet_stats.max_value);
408-
} else if (parquet_stats.__isset.max) {
409-
StringColumnReader::VerifyString(parquet_stats.max.c_str(), parquet_stats.max.size(), true);
405+
} else if (parquet_stats.__isset.max && StringColumnReader::IsValid(parquet_stats.max, true)) {
410406
StringStats::SetMax(string_stats, parquet_stats.max);
411407
}
412408
row_group_stats = string_stats.ToUnique();

src/duckdb/extension/parquet/reader/string_column_reader.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,23 @@ StringColumnReader::StringColumnReader(ParquetReader &reader, const ParquetColum
1616
}
1717
}
1818

19-
void StringColumnReader::VerifyString(const char *str_data, uint32_t str_len, const bool is_varchar) {
19+
bool StringColumnReader::IsValid(const char *str_data, uint32_t str_len, const bool is_varchar) {
2020
if (!is_varchar) {
21-
return;
21+
return true;
2222
}
2323
// verify if a string is actually UTF8, and if there are no null bytes in the middle of the string
2424
// technically Parquet should guarantee this, but reality is often disappointing
2525
UnicodeInvalidReason reason;
2626
size_t pos;
2727
auto utf_type = Utf8Proc::Analyze(str_data, str_len, &reason, &pos);
28-
if (utf_type == UnicodeType::INVALID) {
28+
return utf_type != UnicodeType::INVALID;
29+
}
30+
31+
bool StringColumnReader::IsValid(const string &str, bool is_varchar) {
32+
return IsValid(str.c_str(), str.size(), is_varchar);
33+
}
34+
void StringColumnReader::VerifyString(const char *str_data, uint32_t str_len, const bool is_varchar) {
35+
if (!IsValid(str_data, str_len, is_varchar)) {
2936
throw InvalidInputException("Invalid string encoding found in Parquet file: value \"%s\" is not valid UTF8!",
3037
Blob::ToString(string_t(str_data, str_len)));
3138
}

src/duckdb/src/catalog/catalog_search_path.cpp

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -197,8 +197,15 @@ void CatalogSearchPath::Set(CatalogSearchEntry new_value, CatalogSetPathType set
197197
Set(std::move(new_paths), set_type);
198198
}
199199

200-
const vector<CatalogSearchEntry> &CatalogSearchPath::Get() const {
201-
return paths;
200+
vector<CatalogSearchEntry> CatalogSearchPath::Get() const {
201+
vector<CatalogSearchEntry> res;
202+
for (auto &path : paths) {
203+
if (path.schema.empty()) {
204+
continue;
205+
}
206+
res.emplace_back(path);
207+
}
208+
return res;
202209
}
203210

204211
string CatalogSearchPath::GetDefaultSchema(const string &catalog) const {
@@ -250,7 +257,7 @@ vector<string> CatalogSearchPath::GetCatalogsForSchema(const string &schema) con
250257
catalogs.push_back(SYSTEM_CATALOG);
251258
} else {
252259
for (auto &path : paths) {
253-
if (StringUtil::CIEquals(path.schema, schema)) {
260+
if (StringUtil::CIEquals(path.schema, schema) || path.schema.empty()) {
254261
catalogs.push_back(path.catalog);
255262
}
256263
}
@@ -261,24 +268,24 @@ vector<string> CatalogSearchPath::GetCatalogsForSchema(const string &schema) con
261268
vector<string> CatalogSearchPath::GetSchemasForCatalog(const string &catalog) const {
262269
vector<string> schemas;
263270
for (auto &path : paths) {
264-
if (StringUtil::CIEquals(path.catalog, catalog)) {
271+
if (!path.schema.empty() && StringUtil::CIEquals(path.catalog, catalog)) {
265272
schemas.push_back(path.schema);
266273
}
267274
}
268275
return schemas;
269276
}
270277

271278
const CatalogSearchEntry &CatalogSearchPath::GetDefault() const {
272-
const auto &paths = Get();
273279
D_ASSERT(paths.size() >= 2);
280+
D_ASSERT(!paths[1].schema.empty());
274281
return paths[1];
275282
}
276283

277284
void CatalogSearchPath::SetPathsInternal(vector<CatalogSearchEntry> new_paths) {
278285
this->set_paths = std::move(new_paths);
279286

280287
paths.clear();
281-
paths.reserve(set_paths.size() + 3);
288+
paths.reserve(set_paths.size() + 4);
282289
paths.emplace_back(TEMP_CATALOG, DEFAULT_SCHEMA);
283290
for (auto &path : set_paths) {
284291
paths.push_back(path);

0 commit comments

Comments
 (0)