diff --git a/src/duckdb/src/common/arrow/appender/union_data.cpp b/src/duckdb/src/common/arrow/appender/union_data.cpp index 4ca4ebf67..29faa5222 100644 --- a/src/duckdb/src/common/arrow/appender/union_data.cpp +++ b/src/duckdb/src/common/arrow/appender/union_data.cpp @@ -28,7 +28,8 @@ void ArrowUnionData::Append(ArrowAppendData &append_data, Vector &input, idx_t f child_vectors.emplace_back(child.second, size); } - for (idx_t input_idx = from; input_idx < to; input_idx++) { + for (idx_t i = 0; i < size; i++) { + auto input_idx = from + i; const auto &val = input.GetValue(input_idx); idx_t tag = 0; @@ -40,7 +41,7 @@ void ArrowUnionData::Append(ArrowAppendData &append_data, Vector &input, idx_t f } for (idx_t child_idx = 0; child_idx < child_vectors.size(); child_idx++) { - child_vectors[child_idx].SetValue(input_idx, child_idx == tag ? resolved_value : Value(nullptr)); + child_vectors[child_idx].SetValue(i, child_idx == tag ? resolved_value : Value(nullptr)); } types_buffer.push_back(NumericCast(tag)); } @@ -48,7 +49,7 @@ void ArrowUnionData::Append(ArrowAppendData &append_data, Vector &input, idx_t f for (idx_t child_idx = 0; child_idx < child_vectors.size(); child_idx++) { auto &child_buffer = append_data.child_data[child_idx]; auto &child = child_vectors[child_idx]; - child_buffer->append_vector(*child_buffer, child, from, to, size); + child_buffer->append_vector(*child_buffer, child, 0, size, size); } append_data.row_count += size; } diff --git a/src/duckdb/src/common/arrow/arrow_converter.cpp b/src/duckdb/src/common/arrow/arrow_converter.cpp index 85632a38c..7e97ceb27 100644 --- a/src/duckdb/src/common/arrow/arrow_converter.cpp +++ b/src/duckdb/src/common/arrow/arrow_converter.cpp @@ -417,37 +417,47 @@ void ArrowConverter::ToArrowSchema(ArrowSchema *out_schema, const vector &names, ClientProperties &options) { D_ASSERT(out_schema); D_ASSERT(types.size() == names.size()); - const idx_t column_count = types.size(); - // Allocate as unique_ptr first to clean-up properly on error - auto root_holder = make_uniq(); + D_ASSERT(options.client_context); + auto get_schema_func = [&types, &out_schema, &names, &options]() { + const idx_t column_count = types.size(); + // Allocate as unique_ptr first to clean-up properly on error + auto root_holder = make_uniq(); - // Allocate the children - root_holder->children.resize(column_count); - root_holder->children_ptrs.resize(column_count, nullptr); - for (size_t i = 0; i < column_count; ++i) { - root_holder->children_ptrs[i] = &root_holder->children[i]; - } - out_schema->children = root_holder->children_ptrs.data(); - out_schema->n_children = NumericCast(column_count); + // Allocate the children + root_holder->children.resize(column_count); + root_holder->children_ptrs.resize(column_count, nullptr); + for (size_t i = 0; i < column_count; ++i) { + root_holder->children_ptrs[i] = &root_holder->children[i]; + } + out_schema->children = root_holder->children_ptrs.data(); + out_schema->n_children = NumericCast(column_count); - // Store the schema - out_schema->format = "+s"; // struct apparently - out_schema->flags = 0; - out_schema->metadata = nullptr; - out_schema->name = "duckdb_query_result"; - out_schema->dictionary = nullptr; + // Store the schema + out_schema->format = "+s"; // struct apparently + out_schema->flags = 0; + out_schema->metadata = nullptr; + out_schema->name = "duckdb_query_result"; + out_schema->dictionary = nullptr; - // Configure all child schemas - for (idx_t col_idx = 0; col_idx < column_count; col_idx++) { - root_holder->owned_column_names.push_back(AddName(names[col_idx])); - auto &child = root_holder->children[col_idx]; - InitializeChild(child, *root_holder, names[col_idx]); - SetArrowFormat(*root_holder, child, types[col_idx], options, *options.client_context); - } + // Configure all child schemas + for (idx_t col_idx = 0; col_idx < column_count; col_idx++) { + root_holder->owned_column_names.push_back(AddName(names[col_idx])); + auto &child = root_holder->children[col_idx]; + InitializeChild(child, *root_holder, names[col_idx]); + SetArrowFormat(*root_holder, child, types[col_idx], options, *options.client_context); + } - // Release ownership to caller - out_schema->private_data = root_holder.release(); - out_schema->release = ReleaseDuckDBArrowSchema; + // Release ownership to caller + out_schema->private_data = root_holder.release(); + out_schema->release = ReleaseDuckDBArrowSchema; + }; + auto &context = *options.client_context; + if (context.transaction.HasActiveTransaction()) { + get_schema_func(); + } else { + // We need to run this in a transaction. The arrow schema callback might use the catalog to do lookups. + options.client_context->RunFunctionInTransaction(get_schema_func); + } } } // namespace duckdb diff --git a/src/duckdb/src/common/operator/cast_operators.cpp b/src/duckdb/src/common/operator/cast_operators.cpp index 233bcf619..f2cdb8bb5 100644 --- a/src/duckdb/src/common/operator/cast_operators.cpp +++ b/src/duckdb/src/common/operator/cast_operators.cpp @@ -1583,7 +1583,9 @@ bool TryCastBlobToUUID::Operation(string_t input, hugeint_t &result, bool strict //===--------------------------------------------------------------------===// template <> bool TryCastToGeometry::Operation(string_t input, string_t &result, Vector &result_vector, CastParameters ¶meters) { - return Geometry::FromString(input, result, result_vector, parameters.strict); + // Pass the query location of the cast source if available. + return Geometry::FromString(input, result, result_vector, parameters.strict, + parameters.cast_source ? parameters.cast_source->query_location : optional_idx()); } //===--------------------------------------------------------------------===// diff --git a/src/duckdb/src/common/types/geometry.cpp b/src/duckdb/src/common/types/geometry.cpp index 10ea22ff5..ce668a66c 100644 --- a/src/duckdb/src/common/types/geometry.cpp +++ b/src/duckdb/src/common/types/geometry.cpp @@ -262,13 +262,21 @@ class TextReader { void Match(const char *str) { if (!TryMatch(str)) { - throw InvalidInputException("Expected '%s' but got '%c' at position %zu", str, *pos, pos - beg); + // Check if this would go EOF + if (pos + strlen(str) >= end) { + throw MakeError("Expected '%s' but got end of input", str); + } + + throw MakeError("Expected '%s' but got '%c'", str, *pos); } } void Match(char c) { if (!TryMatch(c)) { - throw InvalidInputException("Expected '%c' but got '%c' at position %zu", c, *pos, pos - beg); + if (pos >= end) { + throw MakeError("Expected '%c' but got end of input", c); + } + throw MakeError("Expected '%c' but got '%c'", c, *pos); } } @@ -277,7 +285,7 @@ class TextReader { double num; const auto res = duckdb_fast_float::from_chars(pos, end, num); if (res.ec != std::errc()) { - throw InvalidInputException("Expected number at position %zu", pos - beg); + throw MakeError("Expected number"); } pos = res.ptr; // update position to the end of the parsed number @@ -294,25 +302,52 @@ class TextReader { pos = beg; } -private: + template + InvalidInputException MakeError(const char *raw_msg, ARGS... args) const { + const auto byte_offset = UnsafeNumericCast(pos - beg); + auto msg = StringUtil::Format("Failed to parse geometry: %s at offset %lu", + StringUtil::Format(raw_msg, args...), byte_offset); + if (query_location.IsValid()) { + const auto expr_offset = optional_idx(query_location.GetIndex() + byte_offset); + return InvalidInputException(Exception::InitializeExtraInfo(expr_offset), msg); + } else { + return InvalidInputException(msg); + } + } + + void SetQueryLocation(optional_idx location) { + query_location = location; + } + void SkipWhitespace() { while (pos < end && isspace(*pos)) { pos++; } } +private: const char *beg; const char *pos; const char *end; + optional_idx query_location; }; void FromStringRecursive(TextReader &reader, BlobWriter &writer, uint32_t depth, bool parent_has_z, bool parent_has_m) { if (depth == Geometry::MAX_RECURSION_DEPTH) { - throw InvalidInputException("Geometry string exceeds maximum recursion depth of %d", - Geometry::MAX_RECURSION_DEPTH); + throw reader.MakeError("Geometry string exceeds maximum recursion depth of %d", Geometry::MAX_RECURSION_DEPTH); + } + + // Skip leading whitespace + reader.SkipWhitespace(); + + // EWKT dialect (ignore SRID if present) + if (reader.TryMatch("SRID")) { + reader.Match('='); + reader.MatchNumber(); + reader.Match(';'); } - GeometryType type; + GeometryType type = GeometryType::INVALID; if (reader.TryMatch("point")) { type = GeometryType::POINT; @@ -329,7 +364,7 @@ void FromStringRecursive(TextReader &reader, BlobWriter &writer, uint32_t depth, } else if (reader.TryMatch("geometrycollection")) { type = GeometryType::GEOMETRYCOLLECTION; } else { - throw InvalidInputException("Unknown geometry type at position %zu", reader.GetPosition()); + throw reader.MakeError("Unknown geometry type"); } const auto has_z = reader.TryMatch("z"); @@ -338,8 +373,7 @@ void FromStringRecursive(TextReader &reader, BlobWriter &writer, uint32_t depth, const auto is_empty = reader.TryMatch("empty"); if ((depth != 0) && ((parent_has_z != has_z) || (parent_has_m != has_m))) { - throw InvalidInputException("Geometry has inconsistent Z/M dimensions, starting at position %zu", - reader.GetPosition()); + throw reader.MakeError("Geometry has inconsistent Z/M dimensions"); } // How many dimensions does this geometry have? @@ -438,6 +472,7 @@ void FromStringRecursive(TextReader &reader, BlobWriter &writer, uint32_t depth, } part_count.value++; } while (reader.TryMatch(',')); + reader.Match(')'); writer.Write(part_count); } break; case GeometryType::MULTILINESTRING: { @@ -453,18 +488,23 @@ void FromStringRecursive(TextReader &reader, BlobWriter &writer, uint32_t depth, writer.Write(1); writer.Write(part_meta); - auto vert_count = writer.Reserve(); - reader.Match('('); - do { - for (uint32_t d_idx = 0; d_idx < dims; d_idx++) { - auto value = reader.MatchNumber(); - writer.Write(value); - } - vert_count.value++; - } while (reader.TryMatch(',')); - reader.Match(')'); - writer.Write(vert_count); - part_count.value++; + if (reader.TryMatch("EMPTY")) { + writer.Write(0); // No vertices in empty linestring + part_count.value++; + } else { + auto vert_count = writer.Reserve(); + reader.Match('('); + do { + for (uint32_t d_idx = 0; d_idx < dims; d_idx++) { + auto value = reader.MatchNumber(); + writer.Write(value); + } + vert_count.value++; + } while (reader.TryMatch(',')); + reader.Match(')'); + writer.Write(vert_count); + part_count.value++; + } } while (reader.TryMatch(',')); reader.Match(')'); writer.Write(part_count); @@ -482,25 +522,30 @@ void FromStringRecursive(TextReader &reader, BlobWriter &writer, uint32_t depth, writer.Write(1); writer.Write(part_meta); - auto ring_count = writer.Reserve(); - reader.Match('('); - do { - auto vert_count = writer.Reserve(); + if (reader.TryMatch("EMPTY")) { + writer.Write(0); // No rings in empty polygon + part_count.value++; + } else { + auto ring_count = writer.Reserve(); reader.Match('('); do { - for (uint32_t d_idx = 0; d_idx < dims; d_idx++) { - auto value = reader.MatchNumber(); - writer.Write(value); - } - vert_count.value++; + auto vert_count = writer.Reserve(); + reader.Match('('); + do { + for (uint32_t d_idx = 0; d_idx < dims; d_idx++) { + auto value = reader.MatchNumber(); + writer.Write(value); + } + vert_count.value++; + } while (reader.TryMatch(',')); + reader.Match(')'); + writer.Write(vert_count); + ring_count.value++; } while (reader.TryMatch(',')); reader.Match(')'); - writer.Write(vert_count); - ring_count.value++; - } while (reader.TryMatch(',')); - reader.Match(')'); - writer.Write(ring_count); - part_count.value++; + writer.Write(ring_count); + part_count.value++; + } } while (reader.TryMatch(',')); reader.Match(')'); writer.Write(part_count); @@ -521,8 +566,7 @@ void FromStringRecursive(TextReader &reader, BlobWriter &writer, uint32_t depth, writer.Write(part_count); } break; default: - throw InvalidInputException("Unknown geometry type %d at position %zu", static_cast(type), - reader.GetPosition()); + throw reader.MakeError("Unknown geometry type %d", static_cast(type)); } } @@ -1047,8 +1091,10 @@ void Geometry::ToBinary(Vector &source, Vector &result, idx_t count) { result.Reinterpret(source); } -bool Geometry::FromString(const string_t &wkt_text, string_t &result, Vector &result_vector, bool strict) { +bool Geometry::FromString(const string_t &wkt_text, string_t &result, Vector &result_vector, bool strict, + optional_idx query_location) { TextReader reader(wkt_text.GetData(), static_cast(wkt_text.GetSize())); + reader.SetQueryLocation(query_location); BlobWriter writer; FromStringRecursive(reader, writer, 0, false, false); @@ -1058,6 +1104,10 @@ bool Geometry::FromString(const string_t &wkt_text, string_t &result, Vector &re return true; } +bool Geometry::FromString(const string_t &wkt_text, string_t &result, Vector &result_vector, bool strict) { + return FromString(wkt_text, result, result_vector, strict, optional_idx::Invalid()); +} + string_t Geometry::ToString(Vector &result, const string_t &geom) { BlobReader reader(geom.GetData(), static_cast(geom.GetSize())); TextWriter writer; diff --git a/src/duckdb/src/common/types/interval.cpp b/src/duckdb/src/common/types/interval.cpp index 586126e46..30299706b 100644 --- a/src/duckdb/src/common/types/interval.cpp +++ b/src/duckdb/src/common/types/interval.cpp @@ -11,9 +11,6 @@ #include "duckdb/common/operator/subtract.hpp" #include "duckdb/common/string_util.hpp" -#include "duckdb/common/serializer/serializer.hpp" -#include "duckdb/common/serializer/deserializer.hpp" - namespace duckdb { bool Interval::FromString(const string &str, interval_t &result) { @@ -275,6 +272,10 @@ interval_parse_time : { } } // invert all the values + if (result.months == NumericLimits::Minimum() || result.days == NumericLimits::Minimum()) { + throw OutOfRangeException("AGO interval value is out of range"); + } + result.months = -result.months; result.days = -result.days; result.micros = -result.micros; diff --git a/src/duckdb/src/execution/expression_executor/execute_operator.cpp b/src/duckdb/src/execution/expression_executor/execute_operator.cpp index 8d3cfee46..4817f1741 100644 --- a/src/duckdb/src/execution/expression_executor/execute_operator.cpp +++ b/src/duckdb/src/execution/expression_executor/execute_operator.cpp @@ -120,11 +120,7 @@ void ExpressionExecutor::Execute(const BoundOperatorExpression &expr, Expression result.Reference(try_result); return; } - if (sel) { - VectorOperations::Copy(try_result, result, *sel, count, 0, 0, count); - } else { - VectorOperations::Copy(try_result, result, count, 0, 0); - } + VectorOperations::Copy(try_result, result, count, 0, 0); return; } catch (std::exception &ex) { ErrorData error(ex); diff --git a/src/duckdb/src/execution/index/art/art_builder.cpp b/src/duckdb/src/execution/index/art/art_builder.cpp index f6721a943..3a816db0e 100644 --- a/src/duckdb/src/execution/index/art/art_builder.cpp +++ b/src/duckdb/src/execution/index/art/art_builder.cpp @@ -36,7 +36,7 @@ ARTConflictType ARTBuilder::Build() { } reference ref(entry.node); - auto count = UnsafeNumericCast(start.len - prefix_depth); + auto count = UnsafeNumericCast(start.len - prefix_depth); Prefix::New(art, ref, start, prefix_depth, count); // Inline the row ID. diff --git a/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp index 3e824fd4c..e9d678fd5 100644 --- a/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +++ b/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp @@ -834,7 +834,16 @@ bool StringValueResult::AddRowInternal() { } } + const auto chunk_col_id_before = chunk_col_id; if (current_errors.HandleErrors(*this)) { + // Before we add row, invalid all columns that are not populated for this row (i.e., CSV rows have fewer fields + // than expected). Otherwise, uninitialized string_t with valid bits set would lead invalid memory access. + if (borked_rows.find(static_cast(number_of_rows)) != borked_rows.end()) { + for (idx_t cur_col_idx = chunk_col_id_before; cur_col_idx < validity_mask.size(); ++cur_col_idx) { + validity_mask[cur_col_idx]->SetInvalid(static_cast(number_of_rows)); + } + } + D_ASSERT(buffer_handles.find(current_line_position.begin.buffer_idx) != buffer_handles.end()); D_ASSERT(buffer_handles.find(current_line_position.end.buffer_idx) != buffer_handles.end()); line_positions_per_row[static_cast(number_of_rows)] = current_line_position; diff --git a/src/duckdb/src/function/table/arrow_conversion.cpp b/src/duckdb/src/function/table/arrow_conversion.cpp index 4a24000f4..cc262c6d5 100644 --- a/src/duckdb/src/function/table/arrow_conversion.cpp +++ b/src/duckdb/src/function/table/arrow_conversion.cpp @@ -1197,7 +1197,10 @@ void ArrowToDuckDBConversion::ColumnArrowToDuckDB(Vector &vector, ArrowArray &ar break; } case LogicalTypeId::UNION: { - auto type_ids = ArrowBufferData(array, array.n_buffers == 1 ? 0 : 1); + auto type_ids_buffer_idx = array.n_buffers == 1 ? 0 : 1; + auto effective_offset = + GetEffectiveOffset(array, NumericCast(parent_offset), chunk_offset, nested_offset); + auto type_ids = ArrowBufferData(array, type_ids_buffer_idx) + effective_offset; D_ASSERT(type_ids); auto members = UnionType::CopyMemberTypes(vector.GetType()); diff --git a/src/duckdb/src/function/table/system/test_all_types.cpp b/src/duckdb/src/function/table/system/test_all_types.cpp index 7fa69eb97..f55736bda 100644 --- a/src/duckdb/src/function/table/system/test_all_types.cpp +++ b/src/duckdb/src/function/table/system/test_all_types.cpp @@ -320,6 +320,49 @@ vector TestAllTypesFun::GetTestTypes(const bool use_large_enum, const result.emplace_back(LogicalType::TIME_NS, "time_ns"); + // GEOMETRY + // - For min, use a regular empty point + // - For max, use some complicated nested geometry collection with a variety of empty and non-empty geometries, + // to cover as many code paths as possible + + constexpr auto big_geom_wkt = R"WKT_LITERAL( + GEOMETRYCOLLECTION ( + POINT (1 2), + POINT EMPTY, + LINESTRING (0 0, 1 1), + LINESTRING EMPTY, + POLYGON ((0 0, 0 1, 1 1, 1 0, 0 0)), + POLYGON EMPTY, + MULTIPOINT ( + 5 6, + EMPTY + ), + MULTILINESTRING ( + (0 0, 1 1), + EMPTY, + (2 2, 3 3), + EMPTY + ), + MULTILINESTRING EMPTY, + MULTIPOLYGON ( + ((0 0, 0 1, 1 1, 1 0, 0 0)), + EMPTY, + ((0 0, 0 2, 2 2, 2 0, 0 0)), + EMPTY + ), + MULTIPOLYGON EMPTY, + GEOMETRYCOLLECTION ( + POINT (5 6) + ), + GEOMETRYCOLLECTION EMPTY + ) + )WKT_LITERAL"; + + auto min_geometry = Value("POINT EMPTY").DefaultCastAs(LogicalType::GEOMETRY()); + auto max_geometry = Value(big_geom_wkt).DefaultCastAs(LogicalType::GEOMETRY()); + + result.emplace_back(LogicalType::GEOMETRY(), "geometry", min_geometry, max_geometry); + return result; } @@ -354,7 +397,7 @@ static unique_ptr TestAllTypesBind(ClientContext &context, TableFu return std::move(result); } -unique_ptr TestAllTypesInit(ClientContext &context, TableFunctionInitInput &input) { +static unique_ptr TestAllTypesInit(ClientContext &context, TableFunctionInitInput &input) { auto &bind_data = input.bind_data->Cast(); auto result = make_uniq(); // 3 rows: min, max and NULL @@ -368,7 +411,7 @@ unique_ptr TestAllTypesInit(ClientContext &context, Ta return std::move(result); } -void TestAllTypesFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) { +static void TestAllTypesFunction(ClientContext &context, TableFunctionInput &data_p, DataChunk &output) { auto &data = data_p.global_state->Cast(); if (data.offset >= data.entries.size()) { // finished returning values diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp index 79ca65f3c..a02fd1a79 100644 --- a/src/duckdb/src/function/table/version/pragma_version.cpp +++ b/src/duckdb/src/function/table/version/pragma_version.cpp @@ -1,5 +1,5 @@ #ifndef DUCKDB_PATCH_VERSION -#define DUCKDB_PATCH_VERSION "2-dev458" +#define DUCKDB_PATCH_VERSION "2-dev512" #endif #ifndef DUCKDB_MINOR_VERSION #define DUCKDB_MINOR_VERSION 5 @@ -8,10 +8,10 @@ #define DUCKDB_MAJOR_VERSION 1 #endif #ifndef DUCKDB_VERSION -#define DUCKDB_VERSION "v1.5.2-dev458" +#define DUCKDB_VERSION "v1.5.2-dev512" #endif #ifndef DUCKDB_SOURCE_ID -#define DUCKDB_SOURCE_ID "17491eb887" +#define DUCKDB_SOURCE_ID "d13a3403b3" #endif #include "duckdb/function/table/system_functions.hpp" #include "duckdb/main/database.hpp" diff --git a/src/duckdb/src/include/duckdb/common/http_util.hpp b/src/duckdb/src/include/duckdb/common/http_util.hpp index 1803ec3d2..bf57b8a7b 100644 --- a/src/duckdb/src/include/duckdb/common/http_util.hpp +++ b/src/duckdb/src/include/duckdb/common/http_util.hpp @@ -217,6 +217,9 @@ struct PostRequestInfo : public BaseRequest { class HTTPClient { public: + HTTPClient() = default; + explicit HTTPClient(const string &proto_host_port) : base_url(proto_host_port) { + } virtual ~HTTPClient() = default; virtual void Initialize(HTTPParams &http_params) = 0; @@ -228,6 +231,14 @@ class HTTPClient { virtual void Cleanup() {}; unique_ptr Request(BaseRequest &request); + + const string &GetBaseUrl() const { + return base_url; + } + +private: + //! The base URL (scheme + host + port) this client was created for + const string base_url; }; class HTTPUtil { @@ -250,6 +261,9 @@ class HTTPUtil { virtual unique_ptr InitializeClient(HTTPParams &http_params, const string &proto_host_port); + //! Close a client — implementations may cache it for reuse + virtual void CloseClient(unique_ptr &&client); + unique_ptr Request(BaseRequest &request); unique_ptr Request(BaseRequest &request, unique_ptr &client); diff --git a/src/duckdb/src/include/duckdb/common/types/geometry.hpp b/src/duckdb/src/include/duckdb/common/types/geometry.hpp index cd7bdf3eb..faaa4d614 100644 --- a/src/duckdb/src/include/duckdb/common/types/geometry.hpp +++ b/src/duckdb/src/include/duckdb/common/types/geometry.hpp @@ -251,6 +251,8 @@ class Geometry { //! Convert from WKT DUCKDB_API static bool FromString(const string_t &wkt_text, string_t &result, Vector &result_vector, bool strict); + DUCKDB_API static bool FromString(const string_t &wkt_text, string_t &result, Vector &result_vector, bool strict, + optional_idx query_location); //! Convert to WKT DUCKDB_API static string_t ToString(Vector &result, const string_t &geom); diff --git a/src/duckdb/src/include/duckdb/main/extension_entries.hpp b/src/duckdb/src/include/duckdb/main/extension_entries.hpp index 5503716d7..c60185146 100644 --- a/src/duckdb/src/include/duckdb/main/extension_entries.hpp +++ b/src/duckdb/src/include/duckdb/main/extension_entries.hpp @@ -127,6 +127,7 @@ static constexpr ExtensionFunctionEntry EXTENSION_FUNCTIONS[] = { {"ceiling", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY}, {"check_peg_parser", "autocomplete", CatalogType::TABLE_FUNCTION_ENTRY}, {"chr", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY}, + {"clear_httpfs_connection_cache", "httpfs", CatalogType::TABLE_FUNCTION_ENTRY}, {"corr", "core_functions", CatalogType::AGGREGATE_FUNCTION_ENTRY}, {"cos", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY}, {"cosh", "core_functions", CatalogType::SCALAR_FUNCTION_ENTRY}, @@ -1081,6 +1082,7 @@ static constexpr ExtensionEntry EXTENSION_SETTINGS[] = { {"http_retry_wait_ms", "httpfs"}, {"http_timeout", "httpfs"}, {"httpfs_client_implementation", "httpfs"}, + {"httpfs_connection_caching", "httpfs"}, {"iceberg_test_force_token_expiry", "iceberg"}, {"iceberg_via_aws_sdk_for_catalog_interactions", "iceberg"}, {"merge_http_secret_into_s3_request", "httpfs"}, diff --git a/src/duckdb/src/main/capi/arrow-c.cpp b/src/duckdb/src/main/capi/arrow-c.cpp index cf8037e5c..22daa69a2 100644 --- a/src/duckdb/src/main/capi/arrow-c.cpp +++ b/src/duckdb/src/main/capi/arrow-c.cpp @@ -10,6 +10,7 @@ using duckdb::ArrowResultWrapper; using duckdb::CClientArrowOptionsWrapper; using duckdb::Connection; using duckdb::DataChunk; +using duckdb::ErrorData; using duckdb::LogicalType; using duckdb::MaterializedQueryResult; using duckdb::PreparedStatementWrapper; @@ -180,7 +181,11 @@ duckdb_state duckdb_query_arrow_schema(duckdb_arrow result, duckdb_arrow_schema try { ArrowConverter::ToArrowSchema((ArrowSchema *)*out_schema, wrapper->result->types, wrapper->result->names, wrapper->result->client_properties); + } catch (std::exception &ex) { + wrapper->result->SetError(ErrorData(ex)); + return DuckDBError; } catch (...) { + wrapper->result->SetError(ErrorData("Unknown error in duckdb_query_arrow_schema")); return DuckDBError; } return DuckDBSuccess; diff --git a/src/duckdb/src/main/http/http_util.cpp b/src/duckdb/src/main/http/http_util.cpp index 9185a4941..340764ea0 100644 --- a/src/duckdb/src/main/http/http_util.cpp +++ b/src/duckdb/src/main/http/http_util.cpp @@ -133,7 +133,7 @@ BaseRequest::BaseRequest(RequestType type, const string &url, const HTTPHeaders class HTTPLibClient : public HTTPClient { public: - HTTPLibClient(HTTPParams &http_params, const string &proto_host_port) { + HTTPLibClient(HTTPParams &http_params, const string &proto_host_port) : HTTPClient(proto_host_port) { client = make_uniq(proto_host_port); Initialize(http_params); } @@ -225,6 +225,10 @@ unique_ptr HTTPUtil::InitializeClient(HTTPParams &http_params, const return make_uniq(http_params, proto_host_port); } +void HTTPUtil::CloseClient(unique_ptr &&) { + // default: no-op, client is destroyed +} + unique_ptr HTTPUtil::SendRequest(BaseRequest &request, unique_ptr &client) { if (!client) { client = InitializeClient(request.params, request.proto_host_port); diff --git a/src/duckdb/src/optimizer/common_subplan_optimizer.cpp b/src/duckdb/src/optimizer/common_subplan_optimizer.cpp index 697b5d43f..3c50237e2 100644 --- a/src/duckdb/src/optimizer/common_subplan_optimizer.cpp +++ b/src/duckdb/src/optimizer/common_subplan_optimizer.cpp @@ -835,9 +835,21 @@ class CommonSubplanFinder { // Create the materialized CTE and replace the common subplans with references to it auto &lowest_common_ancestor = subplan_info.lowest_common_ancestor.get(); - auto cte = make_uniq( - cte_name, cte_index, types.size(), std::move(primary_subplan.op.get()), - std::move(lowest_common_ancestor), CTEMaterialize::CTE_MATERIALIZE_DEFAULT); + const auto materialized_column_count = types.size(); + auto materialized_subplan = std::move(primary_subplan.op.get()); + auto remainder = std::move(lowest_common_ancestor); + vector> materialized_select_list; + const auto materialized_bindings = materialized_subplan->GetColumnBindings(); + for (idx_t i = 0; i < materialized_bindings.size(); i++) { + materialized_select_list.emplace_back( + make_uniq(types[i], materialized_bindings[i])); + } + auto materialized_projection = make_uniq(optimizer.binder.GenerateTableIndex(), + std::move(materialized_select_list)); + materialized_projection->children.emplace_back(std::move(materialized_subplan)); + auto cte = make_uniq(cte_name, cte_index, materialized_column_count, + std::move(materialized_projection), std::move(remainder), + CTEMaterialize::CTE_MATERIALIZE_DEFAULT); for (idx_t subplan_idx = 0; subplan_idx < subplan_info.subplans.size(); subplan_idx++) { const auto &subplan = subplan_info.subplans[subplan_idx]; subplan.op.get() = std::move(cte_refs[subplan_idx]); diff --git a/src/duckdb/src/parser/parser.cpp b/src/duckdb/src/parser/parser.cpp index d2ee7858d..87a807c30 100644 --- a/src/duckdb/src/parser/parser.cpp +++ b/src/duckdb/src/parser/parser.cpp @@ -233,6 +233,8 @@ void Parser::ParseQuery(const string &query) { } { if (options.extensions) { + bool has_strict_extension_error = false; + ErrorData last_strict_extension_error; for (auto &ext : options.extensions->ParserExtensions()) { if (!ext.parser_override) { continue; @@ -247,11 +249,20 @@ void Parser::ParseQuery(const string &query) { return; } if (options.parser_override_setting == AllowParserOverride::STRICT_OVERRIDE) { - ThrowParserOverrideError(result); + if (result.type == ParserExtensionResultType::DISPLAY_EXTENSION_ERROR) { + has_strict_extension_error = true; + last_strict_extension_error = std::move(result.error); + } else { + has_strict_extension_error = false; + } + continue; } else if (options.parser_override_setting == AllowParserOverride::FALLBACK_OVERRIDE) { continue; } } + if (options.parser_override_setting == AllowParserOverride::STRICT_OVERRIDE && has_strict_extension_error) { + last_strict_extension_error.Throw(); + } } PostgresParser::SetPreserveIdentifierCase(options.preserve_identifier_case); bool parsing_succeed = false; diff --git a/src/duckdb/src/storage/table/row_group_reorderer.cpp b/src/duckdb/src/storage/table/row_group_reorderer.cpp index 314848d59..de64dcc3d 100644 --- a/src/duckdb/src/storage/table/row_group_reorderer.cpp +++ b/src/duckdb/src/storage/table/row_group_reorderer.cpp @@ -233,9 +233,9 @@ Value RowGroupReorderer::RetrieveStat(const BaseStatistics &stats, OrderByStatis } switch (order_by) { case OrderByStatistics::MIN: - return StringStats::Min(stats); + return Value::BLOB_RAW(StringStats::Min(stats)); case OrderByStatistics::MAX: - return StringStats::Max(stats); + return Value::BLOB_RAW(StringStats::Max(stats)); default: throw InternalException("Unsupported OrderByStatistics for string"); }