Skip to content

Commit 77e7018

Browse files
Update vendored DuckDB sources to 344094eb03
1 parent 966cb41 commit 77e7018

211 files changed

Lines changed: 15530 additions & 10654 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

src/duckdb/extension/core_functions/aggregate/distributive/sum.cpp

Lines changed: 50 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,37 @@ struct HugeintSumOperation : public BaseSumOperation<SumSetOperation, HugeintAdd
7373
}
7474
};
7575

76+
template <class T>
77+
static LogicalType GetValueLogicalType();
78+
79+
template <>
80+
LogicalType GetValueLogicalType<int64_t>() {
81+
return LogicalType::BIGINT;
82+
}
83+
template <>
84+
LogicalType GetValueLogicalType<hugeint_t>() {
85+
return LogicalType::HUGEINT;
86+
}
87+
template <>
88+
LogicalType GetValueLogicalType<double>() {
89+
return LogicalType::DOUBLE;
90+
}
91+
92+
template <class T>
93+
LogicalType GetSumStateType(const AggregateFunction &function) {
94+
child_list_t<LogicalType> child_types;
95+
child_types.emplace_back("isset", LogicalType::BOOLEAN);
96+
97+
LogicalType value_type = GetValueLogicalType<T>();
98+
// Use the return type when its physical representation matches the state type
99+
if (function.return_type.InternalType() == value_type.InternalType()) {
100+
value_type = function.return_type;
101+
}
102+
child_types.emplace_back("value", value_type);
103+
104+
return LogicalType::STRUCT(std::move(child_types));
105+
}
106+
76107
unique_ptr<FunctionData> SumNoOverflowBind(ClientContext &context, AggregateFunction &function,
77108
vector<unique_ptr<Expression>> &arguments) {
78109
throw BinderException("sum_no_overflow is for internal use only!");
@@ -98,7 +129,7 @@ AggregateFunction GetSumAggregateNoOverflow(PhysicalType type) {
98129
function.SetBindCallback(SumNoOverflowBind);
99130
function.SetSerializeCallback(SumNoOverflowSerialize);
100131
function.SetDeserializeCallback(SumNoOverflowDeserialize);
101-
return function;
132+
return function.SetStructStateExport(GetSumStateType<int64_t>);
102133
}
103134
case PhysicalType::INT64: {
104135
auto function = AggregateFunction::UnaryAggregate<SumState<int64_t>, int64_t, hugeint_t, IntegerSumOperation>(
@@ -108,7 +139,7 @@ AggregateFunction GetSumAggregateNoOverflow(PhysicalType type) {
108139
function.SetBindCallback(SumNoOverflowBind);
109140
function.SetSerializeCallback(SumNoOverflowSerialize);
110141
function.SetDeserializeCallback(SumNoOverflowDeserialize);
111-
return function;
142+
return function.SetStructStateExport(GetSumStateType<int64_t>);
112143
}
113144
default:
114145
throw BinderException("Unsupported internal type for sum_no_overflow");
@@ -164,13 +195,13 @@ AggregateFunction GetSumAggregate(PhysicalType type) {
164195
auto function = AggregateFunction::UnaryAggregate<SumState<int64_t>, bool, hugeint_t, IntegerSumOperation>(
165196
LogicalType::BOOLEAN, LogicalType::HUGEINT);
166197
function.SetOrderDependent(AggregateOrderDependent::NOT_ORDER_DEPENDENT);
167-
return function;
198+
return function.SetStructStateExport(GetSumStateType<int64_t>);
168199
}
169200
case PhysicalType::INT16: {
170201
auto function = AggregateFunction::UnaryAggregate<SumState<int64_t>, int16_t, hugeint_t, IntegerSumOperation>(
171202
LogicalType::SMALLINT, LogicalType::HUGEINT);
172203
function.SetOrderDependent(AggregateOrderDependent::NOT_ORDER_DEPENDENT);
173-
return function;
204+
return function.SetStructStateExport(GetSumStateType<int64_t>);
174205
}
175206

176207
case PhysicalType::INT32: {
@@ -179,22 +210,22 @@ AggregateFunction GetSumAggregate(PhysicalType type) {
179210
LogicalType::INTEGER, LogicalType::HUGEINT);
180211
function.SetStatisticsCallback(SumPropagateStats);
181212
function.SetOrderDependent(AggregateOrderDependent::NOT_ORDER_DEPENDENT);
182-
return function;
213+
return function.SetStructStateExport(GetSumStateType<hugeint_t>);
183214
}
184215
case PhysicalType::INT64: {
185216
auto function =
186217
AggregateFunction::UnaryAggregate<SumState<hugeint_t>, int64_t, hugeint_t, SumToHugeintOperation>(
187218
LogicalType::BIGINT, LogicalType::HUGEINT);
188219
function.SetStatisticsCallback(SumPropagateStats);
189220
function.SetOrderDependent(AggregateOrderDependent::NOT_ORDER_DEPENDENT);
190-
return function;
221+
return function.SetStructStateExport(GetSumStateType<hugeint_t>);
191222
}
192223
case PhysicalType::INT128: {
193224
auto function =
194225
AggregateFunction::UnaryAggregate<SumState<hugeint_t>, hugeint_t, hugeint_t, HugeintSumOperation>(
195226
LogicalType::HUGEINT, LogicalType::HUGEINT);
196227
function.SetOrderDependent(AggregateOrderDependent::NOT_ORDER_DEPENDENT);
197-
return function;
228+
return function.SetStructStateExport(GetSumStateType<hugeint_t>);
198229
}
199230
default:
200231
throw InternalException("Unimplemented sum aggregate");
@@ -283,7 +314,8 @@ AggregateFunctionSet SumFun::GetFunctions() {
283314
sum.AddFunction(GetSumAggregate(PhysicalType::INT64));
284315
sum.AddFunction(GetSumAggregate(PhysicalType::INT128));
285316
sum.AddFunction(AggregateFunction::UnaryAggregate<SumState<double>, double, double, NumericSumOperation>(
286-
LogicalType::DOUBLE, LogicalType::DOUBLE));
317+
LogicalType::DOUBLE, LogicalType::DOUBLE)
318+
.SetStructStateExport(GetSumStateType<double>));
287319
sum.AddFunction(AggregateFunction::UnaryAggregate<BignumState, bignum_t, bignum_t, BignumOperation>(
288320
LogicalType::BIGNUM, LogicalType::BIGNUM));
289321
return sum;
@@ -301,9 +333,18 @@ AggregateFunctionSet SumNoOverflowFun::GetFunctions() {
301333
return sum_no_overflow;
302334
}
303335

336+
LogicalType GetKahanSumStateType(const AggregateFunction &function) {
337+
child_list_t<LogicalType> children;
338+
children.emplace_back("isset", LogicalType::BOOLEAN);
339+
children.emplace_back("value", LogicalType::DOUBLE);
340+
children.emplace_back("err", LogicalType::DOUBLE);
341+
return LogicalType::STRUCT(std::move(children));
342+
}
343+
304344
AggregateFunction KahanSumFun::GetFunction() {
305345
return AggregateFunction::UnaryAggregate<KahanSumState, double, double, KahanSumOperation>(LogicalType::DOUBLE,
306-
LogicalType::DOUBLE);
346+
LogicalType::DOUBLE)
347+
.SetStructStateExport(GetKahanSumStateType);
307348
}
308349

309350
} // namespace duckdb

src/duckdb/extension/json/include/json_structure.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,9 @@ struct JSONStructureDescription {
8080

8181
//! Candidate types (if auto-detecting and type == LogicalTypeId::VARCHAR)
8282
vector<LogicalTypeId> candidate_types;
83+
84+
//! Whether any UBIGINT value exceeds the BIGINT range (i.e., >= 2^63)
85+
bool has_large_ubigint = false;
8386
};
8487

8588
struct JSONStructure {

src/duckdb/extension/json/json_functions/copy_json.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ static BoundStatement CopyToJSONPlan(Binder &binder, CopyStatement &stmt) {
5555
csv_copy_options["suffix"] = {"\n]\n"};
5656
csv_copy_options["new_line"] = {",\n\t"};
5757
}
58+
} else if (loption == "file_extension") {
59+
// Since we set the file extension to "json" above, we need to override it
60+
csv_copy_options["file_extension"] = {StringValue::Get(kv.second.back())};
5861
} else if (SUPPORTED_BASE_OPTIONS.find(loption) != SUPPORTED_BASE_OPTIONS.end()) {
5962
// We support these base options
6063
csv_copy_options.insert(kv);

src/duckdb/extension/json/json_functions/json_create.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -566,7 +566,7 @@ static void CreateValues(const StructNames &names, yyjson_mut_doc *doc, yyjson_m
566566
break;
567567
case LogicalTypeId::BIT:
568568
case LogicalTypeId::BLOB:
569-
case LogicalTypeId::AGGREGATE_STATE:
569+
case LogicalTypeId::LEGACY_AGGREGATE_STATE:
570570
case LogicalTypeId::ENUM:
571571
case LogicalTypeId::DATE:
572572
case LogicalTypeId::INTERVAL:
@@ -617,6 +617,7 @@ static void CreateValues(const StructNames &names, yyjson_mut_doc *doc, yyjson_m
617617
case LogicalTypeId::TABLE:
618618
case LogicalTypeId::LAMBDA:
619619
case LogicalTypeId::GEOMETRY: // TODO! Add support for GEOMETRY
620+
case LogicalTypeId::AGGREGATE_STATE:
620621
throw InternalException("Unsupported type arrived at JSON create function");
621622
}
622623
}

src/duckdb/extension/json/json_functions/json_structure.cpp

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,23 @@
99
namespace duckdb {
1010

1111
static bool IsNumeric(LogicalTypeId type) {
12-
return type == LogicalTypeId::DOUBLE || type == LogicalTypeId::UBIGINT || type == LogicalTypeId::BIGINT;
12+
return type == LogicalTypeId::DOUBLE || type == LogicalTypeId::UBIGINT || type == LogicalTypeId::BIGINT ||
13+
type == LogicalTypeId::HUGEINT;
1314
}
1415

1516
static LogicalTypeId MaxNumericType(const LogicalTypeId &a, const LogicalTypeId &b) {
1617
D_ASSERT(a != b);
1718
if (a == LogicalTypeId::DOUBLE || b == LogicalTypeId::DOUBLE) {
1819
return LogicalTypeId::DOUBLE;
1920
}
21+
if (a == LogicalTypeId::HUGEINT || b == LogicalTypeId::HUGEINT) {
22+
return LogicalTypeId::HUGEINT;
23+
}
24+
// One is BIGINT and the other is UBIGINT - need HUGEINT to represent both ranges
25+
if ((a == LogicalTypeId::BIGINT && b == LogicalTypeId::UBIGINT) ||
26+
(a == LogicalTypeId::UBIGINT && b == LogicalTypeId::BIGINT)) {
27+
return LogicalTypeId::HUGEINT;
28+
}
2029
return LogicalTypeId::BIGINT;
2130
}
2231

@@ -343,6 +352,7 @@ static void SwapJSONStructureDescription(JSONStructureDescription &a, JSONStruct
343352
std::swap(a.key_map, b.key_map);
344353
std::swap(a.children, b.children);
345354
std::swap(a.candidate_types, b.candidate_types);
355+
std::swap(a.has_large_ubigint, b.has_large_ubigint);
346356
}
347357

348358
JSONStructureDescription::JSONStructureDescription(JSONStructureDescription &&other) noexcept {
@@ -427,7 +437,11 @@ static void ExtractStructureObject(yyjson_val *obj, JSONStructureNode &node, con
427437

428438
static void ExtractStructureVal(yyjson_val *val, JSONStructureNode &node) {
429439
D_ASSERT(!yyjson_is_arr(val) && !yyjson_is_obj(val));
430-
node.GetOrCreateDescription(JSONCommon::ValTypeToLogicalTypeId(val));
440+
auto &desc = node.GetOrCreateDescription(JSONCommon::ValTypeToLogicalTypeId(val));
441+
if (desc.type == LogicalTypeId::UBIGINT &&
442+
unsafe_yyjson_get_uint(val) > static_cast<uint64_t>(NumericLimits<int64_t>::Maximum())) {
443+
desc.has_large_ubigint = true;
444+
}
431445
}
432446

433447
void JSONStructure::ExtractStructure(yyjson_val *val, JSONStructureNode &node, const bool ignore_errors) {
@@ -558,6 +572,9 @@ static void MergeNodeVal(JSONStructureNode &merged, const JSONStructureDescripti
558572
const bool node_initialized) {
559573
D_ASSERT(child_desc.type != LogicalTypeId::LIST && child_desc.type != LogicalTypeId::STRUCT);
560574
auto &merged_desc = merged.GetOrCreateDescription(child_desc.type);
575+
if (child_desc.has_large_ubigint) {
576+
merged_desc.has_large_ubigint = true;
577+
}
561578
if (merged_desc.type != LogicalTypeId::VARCHAR || !node_initialized || merged.descriptions.size() != 1) {
562579
return;
563580
}
@@ -798,7 +815,10 @@ LogicalType JSONStructure::StructureToType(ClientContext &context, const JSONStr
798815
case LogicalTypeId::VARCHAR:
799816
return StructureToTypeString(node);
800817
case LogicalTypeId::UBIGINT:
801-
return LogicalTypeId::BIGINT; // We prefer not to return UBIGINT in our type auto-detection
818+
if (desc.has_large_ubigint) {
819+
return LogicalTypeId::HUGEINT;
820+
}
821+
return LogicalTypeId::BIGINT;
802822
case LogicalTypeId::SQLNULL:
803823
return null_type;
804824
default:

src/duckdb/extension/json/json_reader.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
#include "json_reader.hpp"
22

3+
#include <utility>
4+
35
#include "duckdb/common/file_opener.hpp"
6+
#include "duckdb/common/file_open_flags.hpp"
47
#include "duckdb/common/serializer/deserializer.hpp"
58
#include "duckdb/common/serializer/serializer.hpp"
9+
#include "duckdb/storage/caching_mode.hpp"
610
#include "json_scan.hpp"
7-
#include <utility>
811

912
namespace duckdb {
1013

@@ -183,7 +186,9 @@ void JSONReader::OpenJSONFile() {
183186
lock_guard<mutex> guard(lock);
184187
if (!IsOpen()) {
185188
auto &fs = FileSystem::GetFileSystem(context);
186-
auto regular_file_handle = fs.OpenFile(file, FileFlags::FILE_FLAGS_READ | options.compression);
189+
FileOpenFlags flags = FileFlags::FILE_FLAGS_READ | options.compression;
190+
flags.SetCachingMode(CachingMode::CACHE_REMOTE_ONLY);
191+
auto regular_file_handle = fs.OpenFile(file, flags);
187192
file_handle = make_uniq<JSONFileHandle>(context, std::move(regular_file_handle), BufferAllocator::Get(context));
188193
}
189194
Reset();
@@ -1052,8 +1057,9 @@ void JSONReader::ReadNextBufferSeek(JSONReaderScanState &scan_state) {
10521057
if (!raw_handle.OnDiskFile() && raw_handle.CanSeek()) {
10531058
if (!scan_state.thread_local_filehandle ||
10541059
scan_state.thread_local_filehandle->GetPath() != raw_handle.GetPath()) {
1055-
scan_state.thread_local_filehandle = scan_state.fs.OpenFile(
1056-
raw_handle.GetPath(), FileFlags::FILE_FLAGS_READ | FileFlags::FILE_FLAGS_DIRECT_IO);
1060+
FileOpenFlags flags = FileFlags::FILE_FLAGS_READ | FileFlags::FILE_FLAGS_DIRECT_IO;
1061+
flags.SetCachingMode(CachingMode::CACHE_REMOTE_ONLY);
1062+
scan_state.thread_local_filehandle = scan_state.fs.OpenFile(raw_handle.GetPath(), flags);
10571063
}
10581064
} else if (scan_state.thread_local_filehandle) {
10591065
scan_state.thread_local_filehandle = nullptr;

src/duckdb/extension/parquet/include/parquet_geometry.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,14 +85,14 @@ class GeoParquetFileMetadata {
8585
public:
8686
explicit GeoParquetFileMetadata(GeoParquetVersion geo_parquet_version) : version(geo_parquet_version) {
8787
}
88-
void AddGeoParquetStats(const string &column_name, const LogicalType &type, const GeometryStatsData &stats,
89-
GeoParquetVersion version);
88+
void AddGeoParquetStats(ClientContext &context, const string &column_name, const LogicalType &type,
89+
const GeometryStatsData &stats, GeoParquetVersion version);
9090
void Write(duckdb_parquet::FileMetaData &file_meta_data);
9191

9292
// Try to read GeoParquet metadata. Returns nullptr if not found, invalid or the required spatial extension is not
9393
// available.
9494
static unique_ptr<GeoParquetFileMetadata> TryRead(const duckdb_parquet::FileMetaData &file_meta_data,
95-
const ClientContext &context);
95+
ClientContext &context);
9696
const unordered_map<string, GeoParquetColumnMetadata> &GetColumnMeta() const;
9797
optional_ptr<const GeoParquetColumnMetadata> GetColumnMeta(const string &column_name) const;
9898

src/duckdb/extension/parquet/include/parquet_reader.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,6 @@ struct ParquetOptions {
109109
bool binary_as_string = false;
110110
bool file_row_number = false;
111111
shared_ptr<ParquetEncryptionConfig> encryption_config;
112-
bool debug_use_openssl = true;
113112

114113
vector<ParquetColumnDefinition> schema;
115114
idx_t explicit_cardinality = 0;

src/duckdb/extension/parquet/include/parquet_writer.hpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,8 @@ class ParquetWriter {
110110
ShreddingType shredding_types, const vector<pair<string, string>> &kv_metadata,
111111
shared_ptr<ParquetEncryptionConfig> encryption_config, optional_idx dictionary_size_limit,
112112
idx_t string_dictionary_page_size_limit, bool enable_bloom_filters,
113-
double bloom_filter_false_positive_ratio, int64_t compression_level, bool debug_use_openssl,
114-
ParquetVersion parquet_version, GeoParquetVersion geoparquet_version);
113+
double bloom_filter_false_positive_ratio, int64_t compression_level, ParquetVersion parquet_version,
114+
GeoParquetVersion geoparquet_version);
115115
~ParquetWriter();
116116

117117
public:
@@ -123,7 +123,7 @@ class ParquetWriter {
123123

124124
static duckdb_parquet::Type::type DuckDBTypeToParquetType(const LogicalType &duckdb_type);
125125
static void SetSchemaProperties(const LogicalType &duckdb_type, duckdb_parquet::SchemaElement &schema_ele,
126-
bool allow_geometry);
126+
bool allow_geometry, ClientContext &context);
127127

128128
ClientContext &GetContext() {
129129
return context;
@@ -207,7 +207,6 @@ class ParquetWriter {
207207
bool enable_bloom_filters;
208208
double bloom_filter_false_positive_ratio;
209209
int64_t compression_level;
210-
bool debug_use_openssl;
211210
shared_ptr<EncryptionUtil> encryption_util;
212211
ParquetVersion parquet_version;
213212
GeoParquetVersion geoparquet_version;

0 commit comments

Comments
 (0)