Skip to content

Commit e802e74

Browse files
duckdblabs-botgithub-actions[bot]
authored andcommitted
Update vendored DuckDB sources to 22e6d1e375
1 parent ca70517 commit e802e74

21 files changed

Lines changed: 220 additions & 153 deletions

File tree

src/duckdb/src/execution/index/art/art.cpp

Lines changed: 14 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,11 @@
44
#include "duckdb/common/unordered_map.hpp"
55
#include "duckdb/common/vector_operations/vector_operations.hpp"
66
#include "duckdb/execution/expression_executor.hpp"
7+
#include "duckdb/execution/index/art/art_builder.hpp"
78
#include "duckdb/execution/index/art/art_key.hpp"
9+
#include "duckdb/execution/index/art/art_merger.hpp"
10+
#include "duckdb/execution/index/art/art_operator.hpp"
11+
#include "duckdb/execution/index/art/art_scanner.hpp"
812
#include "duckdb/execution/index/art/base_leaf.hpp"
913
#include "duckdb/execution/index/art/base_node.hpp"
1014
#include "duckdb/execution/index/art/iterator.hpp"
@@ -21,9 +25,6 @@
2125
#include "duckdb/storage/metadata/metadata_reader.hpp"
2226
#include "duckdb/storage/table/scan_state.hpp"
2327
#include "duckdb/storage/table_io_manager.hpp"
24-
#include "duckdb/execution/index/art/art_scanner.hpp"
25-
#include "duckdb/execution/index/art/art_merger.hpp"
26-
#include "duckdb/execution/index/art/art_operator.hpp"
2728

2829
namespace duckdb {
2930

@@ -424,76 +425,17 @@ void ART::GenerateKeyVectors(ArenaAllocator &allocator, DataChunk &input, Vector
424425
}
425426

426427
//===--------------------------------------------------------------------===//
427-
// Construct from sorted data.
428+
// Build from sorted data.
428429
//===--------------------------------------------------------------------===//
429430

430-
bool ART::ConstructInternal(const unsafe_vector<ARTKey> &keys, const unsafe_vector<ARTKey> &row_ids, Node &node,
431-
ARTKeySection &section) {
432-
D_ASSERT(section.start < keys.size());
433-
D_ASSERT(section.end < keys.size());
434-
D_ASSERT(section.start <= section.end);
435-
436-
auto &start = keys[section.start];
437-
auto &end = keys[section.end];
438-
D_ASSERT(start.len != 0);
439-
440-
// Increment the depth until we reach a leaf or find a mismatching byte.
441-
auto prefix_depth = section.depth;
442-
while (start.len != section.depth && start.ByteMatches(end, section.depth)) {
443-
section.depth++;
444-
}
445-
446-
if (start.len == section.depth) {
447-
// We reached a leaf. All the bytes of start_key and end_key match.
448-
auto row_id_count = section.end - section.start + 1;
449-
if (IsUnique() && row_id_count != 1) {
450-
return false;
451-
}
452-
453-
reference<Node> ref(node);
454-
auto count = UnsafeNumericCast<uint8_t>(start.len - prefix_depth);
455-
Prefix::New(*this, ref, start, prefix_depth, count);
456-
if (row_id_count == 1) {
457-
Leaf::New(ref, row_ids[section.start].GetRowId());
458-
} else {
459-
// Loop and insert the row IDs.
460-
// We cannot use Construct in the leaf because row IDs are not sorted.
461-
ArenaAllocator arena(BufferAllocator::Get(db));
462-
for (idx_t i = section.start; i < section.start + row_id_count; i++) {
463-
ARTOperator::Insert(arena, *this, ref, row_ids[i], 0, row_ids[i], GateStatus::GATE_SET, nullptr,
464-
IndexAppendMode::DEFAULT);
465-
}
466-
ref.get().SetGateStatus(GateStatus::GATE_SET);
467-
}
468-
return true;
469-
}
470-
471-
// Create a new node and recurse.
472-
unsafe_vector<ARTKeySection> children;
473-
section.GetChildSections(children, keys);
474-
475-
// Create the prefix.
476-
reference<Node> ref(node);
477-
auto prefix_length = section.depth - prefix_depth;
478-
Prefix::New(*this, ref, start, prefix_depth, prefix_length);
479-
480-
// Create the node.
481-
Node::New(*this, ref, Node::GetNodeType(children.size()));
482-
for (auto &child : children) {
483-
Node new_child;
484-
auto success = ConstructInternal(keys, row_ids, new_child, child);
485-
Node::InsertChild(*this, ref, child.key_byte, new_child);
486-
if (!success) {
487-
return false;
488-
}
489-
}
490-
return true;
491-
}
431+
ARTConflictType ART::Build(unsafe_vector<ARTKey> &keys, unsafe_vector<ARTKey> &row_ids, const idx_t row_count) {
432+
ArenaAllocator arena(BufferAllocator::Get(db));
433+
ARTBuilder builder(arena, *this, keys, row_ids);
434+
builder.Init(tree, row_count - 1);
492435

493-
bool ART::Construct(unsafe_vector<ARTKey> &keys, unsafe_vector<ARTKey> &row_ids, const idx_t row_count) {
494-
ARTKeySection section(0, row_count - 1, 0, 0);
495-
if (!ConstructInternal(keys, row_ids, tree, section)) {
496-
return false;
436+
auto result = builder.Build();
437+
if (result != ARTConflictType::NO_CONFLICT) {
438+
return result;
497439
}
498440

499441
#ifdef DEBUG
@@ -504,7 +446,8 @@ bool ART::Construct(unsafe_vector<ARTKey> &keys, unsafe_vector<ARTKey> &row_ids,
504446
it.Scan(empty_key, NumericLimits<row_t>().Maximum(), row_ids_debug, false);
505447
D_ASSERT(row_count == row_ids_debug.size());
506448
#endif
507-
return true;
449+
450+
return ARTConflictType::NO_CONFLICT;
508451
}
509452

510453
//===--------------------------------------------------------------------===//
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
#include "duckdb/execution/index/art/art_builder.hpp"
2+
3+
#include "duckdb/execution/index/art/art_key.hpp"
4+
#include "duckdb/execution/index/art/prefix.hpp"
5+
#include "duckdb/execution/index/art/leaf.hpp"
6+
#include "duckdb/execution/index/art/art_operator.hpp"
7+
8+
namespace duckdb {
9+
10+
ARTConflictType ARTBuilder::Build() {
11+
while (!s.empty()) {
12+
// Copy the entry so we can pop it.
13+
auto entry = s.top();
14+
s.pop();
15+
16+
D_ASSERT(entry.start < keys.size());
17+
D_ASSERT(entry.end < keys.size());
18+
D_ASSERT(entry.start <= entry.end);
19+
20+
auto &start = keys[entry.start];
21+
auto &end = keys[entry.end];
22+
D_ASSERT(start.len != 0);
23+
24+
// Increment the depth until we reach a leaf or find a mismatching byte.
25+
auto prefix_depth = entry.depth;
26+
while (start.len != entry.depth && start.ByteMatches(end, entry.depth)) {
27+
entry.depth++;
28+
}
29+
30+
// True, if we reached a leaf: all bytes of start_key and end_key match.
31+
if (start.len == entry.depth) {
32+
// Get the number of row IDs in the leaf.
33+
auto row_id_count = entry.end - entry.start + 1;
34+
if (art.IsUnique() && row_id_count != 1) {
35+
return ARTConflictType::CONSTRAINT;
36+
}
37+
38+
reference<Node> ref(entry.node);
39+
auto count = UnsafeNumericCast<uint8_t>(start.len - prefix_depth);
40+
Prefix::New(art, ref, start, prefix_depth, count);
41+
42+
// Inline the row ID.
43+
if (row_id_count == 1) {
44+
Leaf::New(ref, row_ids[entry.start].GetRowId());
45+
continue;
46+
}
47+
48+
// Loop and insert the row IDs.
49+
// We cannot iterate into the nested leaf with the builder
50+
// because row IDs are not sorted.
51+
for (idx_t i = entry.start; i < entry.start + row_id_count; i++) {
52+
ARTOperator::Insert(arena, art, ref, row_ids[i], 0, row_ids[i], GateStatus::GATE_SET, nullptr,
53+
IndexAppendMode::DEFAULT);
54+
}
55+
ref.get().SetGateStatus(GateStatus::GATE_SET);
56+
continue;
57+
}
58+
59+
// Create the prefix. Returns early, if the prefix_length is zero.
60+
reference<Node> ref(entry.node);
61+
auto prefix_length = entry.depth - prefix_depth;
62+
Prefix::New(art, ref, start, prefix_depth, prefix_length);
63+
64+
vector<idx_t> child_offsets;
65+
child_offsets.emplace_back(entry.start);
66+
for (idx_t i = entry.start + 1; i <= entry.end; i++) {
67+
if (keys[i - 1].data[entry.depth] != keys[i].data[entry.depth]) {
68+
child_offsets.emplace_back(i);
69+
}
70+
}
71+
72+
// Create a new node containing the children.
73+
Node::New(art, ref, Node::GetNodeType(child_offsets.size()));
74+
auto start_offset = child_offsets[0];
75+
for (idx_t i = 1; i <= child_offsets.size(); i++) {
76+
auto child_byte = keys[start_offset].data[entry.depth];
77+
// FIXME: Improve performance by either returning a reference to the child directly,
78+
// FIXME: or by calling InsertChild after processing the child (at the end of the stack loop).
79+
Node::InsertChild(art, ref, child_byte);
80+
auto child = ref.get().Node::GetChildMutable(art, child_byte, true);
81+
auto end_offset = i != child_offsets.size() ? child_offsets[i] - 1 : entry.end;
82+
s.emplace(*child, start_offset, end_offset, entry.depth + 1);
83+
start_offset = end_offset + 1;
84+
}
85+
}
86+
87+
// We exhausted the stack.
88+
return ARTConflictType::NO_CONFLICT;
89+
}
90+
91+
} // namespace duckdb

src/duckdb/src/execution/index/art/art_key.cpp

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -163,27 +163,4 @@ idx_t ARTKey::GetMismatchPos(const ARTKey &other, const idx_t start) const {
163163
return DConstants::INVALID_INDEX;
164164
}
165165

166-
//===--------------------------------------------------------------------===//
167-
// ARTKeySection
168-
//===--------------------------------------------------------------------===//
169-
170-
ARTKeySection::ARTKeySection(idx_t start, idx_t end, idx_t depth, data_t byte)
171-
: start(start), end(end), depth(depth), key_byte(byte) {
172-
}
173-
174-
ARTKeySection::ARTKeySection(idx_t start, idx_t end, const unsafe_vector<ARTKey> &keys, const ARTKeySection &section)
175-
: start(start), end(end), depth(section.depth + 1), key_byte(keys[end].data[section.depth]) {
176-
}
177-
178-
void ARTKeySection::GetChildSections(unsafe_vector<ARTKeySection> &sections, const unsafe_vector<ARTKey> &keys) {
179-
auto child_idx = start;
180-
for (idx_t i = start + 1; i <= end; i++) {
181-
if (keys[i - 1].data[depth] != keys[i].data[depth]) {
182-
sections.emplace_back(child_idx, i - 1, keys, *this);
183-
child_idx = i;
184-
}
185-
}
186-
sections.emplace_back(child_idx, end, keys, *this);
187-
}
188-
189166
} // namespace duckdb

src/duckdb/src/execution/index/art/node.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -198,31 +198,31 @@ void Node::DeleteChild(ART &art, Node &node, Node &prefix, const uint8_t byte, c
198198
//===--------------------------------------------------------------------===//
199199

200200
template <class NODE>
201-
unsafe_optional_ptr<Node> GetChildInternal(ART &art, NODE &node, const uint8_t byte) {
201+
static unsafe_optional_ptr<Node> GetChildInternal(ART &art, NODE &node, const uint8_t byte, const bool unsafe) {
202202
D_ASSERT(node.HasMetadata());
203203

204204
auto type = node.GetType();
205205
switch (type) {
206206
case NType::NODE_4:
207-
return Node4::GetChild(Node::Ref<Node4>(art, node, type), byte);
207+
return Node4::GetChild(Node::Ref<Node4>(art, node, type), byte, unsafe);
208208
case NType::NODE_16:
209-
return Node16::GetChild(Node::Ref<Node16>(art, node, type), byte);
209+
return Node16::GetChild(Node::Ref<Node16>(art, node, type), byte, unsafe);
210210
case NType::NODE_48:
211-
return Node48::GetChild(Node::Ref<Node48>(art, node, type), byte);
211+
return Node48::GetChild(Node::Ref<Node48>(art, node, type), byte, unsafe);
212212
case NType::NODE_256: {
213-
return Node256::GetChild(Node::Ref<Node256>(art, node, type), byte);
213+
return Node256::GetChild(Node::Ref<Node256>(art, node, type), byte, unsafe);
214214
}
215215
default:
216216
throw InternalException("Invalid node type for GetChildInternal: %d.", type);
217217
}
218218
}
219219

220220
const unsafe_optional_ptr<Node> Node::GetChild(ART &art, const uint8_t byte) const {
221-
return GetChildInternal(art, *this, byte);
221+
return GetChildInternal(art, *this, byte, false);
222222
}
223223

224-
unsafe_optional_ptr<Node> Node::GetChildMutable(ART &art, const uint8_t byte) const {
225-
return GetChildInternal(art, *this, byte);
224+
unsafe_optional_ptr<Node> Node::GetChildMutable(ART &art, const uint8_t byte, const bool unsafe) const {
225+
return GetChildInternal(art, *this, byte, unsafe);
226226
}
227227

228228
template <class NODE>

src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ SinkResultType PhysicalCreateARTIndex::SinkSorted(OperatorSinkInput &input) cons
114114
auto art = make_uniq<ART>(info->index_name, l_index->GetConstraintType(), l_index->GetColumnIds(),
115115
l_index->table_io_manager, l_index->unbound_expressions, storage.db,
116116
l_index->Cast<ART>().allocators);
117-
if (!art->Construct(l_state.keys, l_state.row_ids, l_state.key_chunk.size())) {
117+
if (art->Build(l_state.keys, l_state.row_ids, l_state.key_chunk.size()) != ARTConflictType::NO_CONFLICT) {
118118
throw ConstraintException("Data contains duplicates on indexed column(s)");
119119
}
120120

src/duckdb/src/function/table/version/pragma_version.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#ifndef DUCKDB_PATCH_VERSION
2-
#define DUCKDB_PATCH_VERSION "0-dev3109"
2+
#define DUCKDB_PATCH_VERSION "0-dev3141"
33
#endif
44
#ifndef DUCKDB_MINOR_VERSION
55
#define DUCKDB_MINOR_VERSION 4
@@ -8,10 +8,10 @@
88
#define DUCKDB_MAJOR_VERSION 1
99
#endif
1010
#ifndef DUCKDB_VERSION
11-
#define DUCKDB_VERSION "v1.4.0-dev3109"
11+
#define DUCKDB_VERSION "v1.4.0-dev3141"
1212
#endif
1313
#ifndef DUCKDB_SOURCE_ID
14-
#define DUCKDB_SOURCE_ID "d229d97f40"
14+
#define DUCKDB_SOURCE_ID "22e6d1e375"
1515
#endif
1616
#include "duckdb/function/table/system_functions.hpp"
1717
#include "duckdb/main/database.hpp"

src/duckdb/src/include/duckdb/execution/index/art/art.hpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,8 @@ class ART : public BoundIndex {
9292
//! Drop the ART.
9393
void CommitDrop(IndexLock &index_lock) override;
9494

95-
//! Construct an ART from a vector of sorted keys and their row IDs.
96-
bool Construct(unsafe_vector<ARTKey> &keys, unsafe_vector<ARTKey> &row_ids, const idx_t row_count);
95+
//! Build an ART from a vector of sorted keys and their row IDs.
96+
ARTConflictType Build(unsafe_vector<ARTKey> &keys, unsafe_vector<ARTKey> &row_ids, const idx_t row_count);
9797

9898
//! Merge another ART into this ART. Both must be locked.
9999
//! FIXME: Return ARTConflictType instead of a boolean.
@@ -138,9 +138,6 @@ class ART : public BoundIndex {
138138
string GetConstraintViolationMessage(VerifyExistenceType verify_type, idx_t failed_index,
139139
DataChunk &input) override;
140140

141-
bool ConstructInternal(const unsafe_vector<ARTKey> &keys, const unsafe_vector<ARTKey> &row_ids, Node &node,
142-
ARTKeySection &section);
143-
144141
void InitializeMergeUpperBounds(unsafe_vector<idx_t> &upper_bounds);
145142
void InitializeMerge(Node &node, unsafe_vector<idx_t> &upper_bounds);
146143

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
//===----------------------------------------------------------------------===//
2+
// DuckDB
3+
//
4+
// duckdb/execution/index/art/art_builder.hpp
5+
//
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#pragma once
10+
11+
#include "duckdb/execution/index/art/art.hpp"
12+
#include "duckdb/common/stack.hpp"
13+
14+
namespace duckdb {
15+
16+
class ARTBuilder {
17+
public:
18+
ARTBuilder() = delete;
19+
ARTBuilder(ArenaAllocator &arena, ART &art, const unsafe_vector<ARTKey> &keys, const unsafe_vector<ARTKey> &row_ids)
20+
: arena(arena), art(art), keys(keys), row_ids(row_ids) {
21+
}
22+
23+
public:
24+
//! Initialize the ART builder by passing a reference to the root node.
25+
void Init(Node &node, const idx_t end) {
26+
s.emplace(node, 0, end, 0);
27+
}
28+
//! Build the ART starting at the first entry in the stack.
29+
ARTConflictType Build();
30+
31+
private:
32+
struct NodeEntry {
33+
NodeEntry() = delete;
34+
NodeEntry(Node &node, const idx_t start, const idx_t end, const idx_t depth)
35+
: node(node), start(start), end(end), depth(depth) {};
36+
37+
Node &node;
38+
idx_t start;
39+
idx_t end;
40+
idx_t depth;
41+
};
42+
43+
//! The arena holds any temporary memory allocated during the Build phase.
44+
ArenaAllocator &arena;
45+
//! The ART holding the node memory.
46+
ART &art;
47+
//! The keys to build the ART from.
48+
const unsafe_vector<ARTKey> &keys;
49+
//! The row IDs matching the keys.
50+
const unsafe_vector<ARTKey> &row_ids;
51+
//! The stack. While merging, NodeEntry elements are pushed onto of the stack.
52+
stack<NodeEntry> s;
53+
};
54+
55+
} // namespace duckdb

0 commit comments

Comments
 (0)