Skip to content

Commit 8bb5b83

Browse files
Update vendored DuckDB sources to 885d2b4e8d
1 parent 9118691 commit 8bb5b83

8 files changed

Lines changed: 136 additions & 58 deletions

File tree

src/duckdb/extension/json/json_functions/json_transform.cpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -262,8 +262,8 @@ bool JSONTransform::GetStringVector(yyjson_val *vals[], const idx_t count, const
262262
if (!unsafe_yyjson_is_str(val)) {
263263
validity.SetInvalid(i);
264264
if (success && options.strict_cast && !unsafe_yyjson_is_str(val)) {
265-
options.error_message = StringUtil::Format("Unable to cast '%s' to " + EnumUtil::ToString(target.id()),
266-
JSONCommon::ValToString(val, 50));
265+
options.error_message = StringUtil::Format(
266+
"Unable to cast '%s' to %s", JSONCommon::ValToString(val, 50), EnumUtil::ToString(target.id()));
267267
options.object_index = i;
268268
success = false;
269269
}
@@ -424,8 +424,8 @@ bool JSONTransform::TransformObject(yyjson_val *objects[], yyjson_alc *alc, cons
424424
if (found_keys[col_idx]) {
425425
if (success && options.error_duplicate_key) {
426426
options.error_message =
427-
StringUtil::Format("Duplicate key \"" + string(key_ptr, key_len) + "\" in object %s",
428-
JSONCommon::ValToString(objects[i], 50));
427+
StringUtil::Format("Object %s has duplicate key \"%s\"",
428+
JSONCommon::ValToString(objects[i], 50), string(key_ptr, key_len));
429429
options.object_index = i;
430430
success = false;
431431
}
@@ -436,8 +436,8 @@ bool JSONTransform::TransformObject(yyjson_val *objects[], yyjson_alc *alc, cons
436436
}
437437
} else if (success && error_unknown_key && options.error_unknown_key) {
438438
options.error_message =
439-
StringUtil::Format("Object %s has unknown key \"" + string(key_ptr, key_len) + "\"",
440-
JSONCommon::ValToString(objects[i], 50));
439+
StringUtil::Format("Object %s has unknown key \"%s\"", JSONCommon::ValToString(objects[i], 50),
440+
string(key_ptr, key_len));
441441
options.object_index = i;
442442
success = false;
443443
}
@@ -453,8 +453,8 @@ bool JSONTransform::TransformObject(yyjson_val *objects[], yyjson_alc *alc, cons
453453
nested_vals[col_idx][i] = nullptr;
454454

455455
if (success && options.error_missing_key) {
456-
options.error_message = StringUtil::Format("Object %s does not have key \"" + names[col_idx] + "\"",
457-
JSONCommon::ValToString(objects[i], 50));
456+
options.error_message = StringUtil::Format("Object %s does not have key \"%s\"",
457+
JSONCommon::ValToString(objects[i], 50), names[col_idx]);
458458
options.object_index = i;
459459
success = false;
460460
}
@@ -750,7 +750,7 @@ static bool TransformObjectToMap(yyjson_val *objects[], yyjson_alc *alc, Vector
750750
// Transform keys
751751
if (!JSONTransform::Transform(keys, alc, MapVector::GetKeys(result), list_size, options, nullptr)) {
752752
throw ConversionException(
753-
StringUtil::Format(options.error_message + ". Cannot default to NULL, because map keys cannot be NULL"));
753+
StringUtil::Format("%s. Cannot default to NULL, because map keys cannot be NULL", options.error_message));
754754
}
755755

756756
// Transform values

src/duckdb/src/execution/operator/join/physical_hash_join.cpp

Lines changed: 36 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -733,9 +733,9 @@ void JoinFilterPushdownInfo::PushInFilter(const JoinFilterPushdownFilter &info,
733733
return;
734734
}
735735

736-
unique_ptr<DataChunk> JoinFilterPushdownInfo::Finalize(ClientContext &context, JoinHashTable &ht,
736+
unique_ptr<DataChunk> JoinFilterPushdownInfo::Finalize(ClientContext &context, optional_ptr<JoinHashTable> ht,
737737
JoinFilterGlobalState &gstate,
738-
const PhysicalOperator &op) const {
738+
const PhysicalComparisonJoin &op) const {
739739
// finalize the min/max aggregates
740740
vector<LogicalType> min_max_types;
741741
for (auto &aggr_expr : min_max_aggregates) {
@@ -753,6 +753,7 @@ unique_ptr<DataChunk> JoinFilterPushdownInfo::Finalize(ClientContext &context, J
753753
auto dynamic_or_filter_threshold = ClientConfig::GetSetting<DynamicOrFilterThresholdSetting>(context);
754754
// create a filter for each of the aggregates
755755
for (idx_t filter_idx = 0; filter_idx < join_condition.size(); filter_idx++) {
756+
const auto cmp = op.conditions[join_condition[filter_idx]].comparison;
756757
for (auto &info : probe_info) {
757758
auto filter_col_idx = info.columns[filter_idx].probe_column_index.column_index;
758759
auto min_idx = filter_idx * 2;
@@ -767,22 +768,44 @@ unique_ptr<DataChunk> JoinFilterPushdownInfo::Finalize(ClientContext &context, J
767768
continue;
768769
}
769770
// if the HT is small we can generate a complete "OR" filter
770-
if (ht.Count() > 1 && ht.Count() <= dynamic_or_filter_threshold) {
771-
PushInFilter(info, ht, op, filter_idx, filter_col_idx);
771+
if (ht && ht->Count() > 1 && ht->Count() <= dynamic_or_filter_threshold) {
772+
PushInFilter(info, *ht, op, filter_idx, filter_col_idx);
772773
}
773774

774775
if (Value::NotDistinctFrom(min_val, max_val)) {
775-
// min = max - generate an equality filter
776-
auto constant_filter = make_uniq<ConstantFilter>(ExpressionType::COMPARE_EQUAL, std::move(min_val));
776+
// min = max - single value
777+
// generate a "one-sided" comparison filter for the LHS
778+
// Note that this also works for equalities.
779+
auto constant_filter = make_uniq<ConstantFilter>(cmp, std::move(min_val));
777780
info.dynamic_filters->PushFilter(op, filter_col_idx, std::move(constant_filter));
778781
} else {
779782
// min != max - generate a range filter
780-
auto greater_equals =
781-
make_uniq<ConstantFilter>(ExpressionType::COMPARE_GREATERTHANOREQUALTO, std::move(min_val));
782-
info.dynamic_filters->PushFilter(op, filter_col_idx, std::move(greater_equals));
783-
auto less_equals =
784-
make_uniq<ConstantFilter>(ExpressionType::COMPARE_LESSTHANOREQUALTO, std::move(max_val));
785-
info.dynamic_filters->PushFilter(op, filter_col_idx, std::move(less_equals));
783+
// for non-equalities, the range must be half-open
784+
// e.g., for lhs < rhs we can only use lhs <= max
785+
switch (cmp) {
786+
case ExpressionType::COMPARE_EQUAL:
787+
case ExpressionType::COMPARE_GREATERTHAN:
788+
case ExpressionType::COMPARE_GREATERTHANOREQUALTO: {
789+
auto greater_equals =
790+
make_uniq<ConstantFilter>(ExpressionType::COMPARE_GREATERTHANOREQUALTO, std::move(min_val));
791+
info.dynamic_filters->PushFilter(op, filter_col_idx, std::move(greater_equals));
792+
break;
793+
}
794+
default:
795+
break;
796+
}
797+
switch (cmp) {
798+
case ExpressionType::COMPARE_EQUAL:
799+
case ExpressionType::COMPARE_LESSTHAN:
800+
case ExpressionType::COMPARE_LESSTHANOREQUALTO: {
801+
auto less_equals =
802+
make_uniq<ConstantFilter>(ExpressionType::COMPARE_LESSTHANOREQUALTO, std::move(max_val));
803+
info.dynamic_filters->PushFilter(op, filter_col_idx, std::move(less_equals));
804+
break;
805+
}
806+
default:
807+
break;
808+
}
786809
}
787810
}
788811
}
@@ -857,7 +880,7 @@ SinkFinalizeType PhysicalHashJoin::Finalize(Pipeline &pipeline, Event &event, Cl
857880
Value min;
858881
Value max;
859882
if (filter_pushdown && !sink.skip_filter_pushdown && ht.Count() > 0) {
860-
auto final_min_max = filter_pushdown->Finalize(context, ht, *sink.global_filter_state, *this);
883+
auto final_min_max = filter_pushdown->Finalize(context, &ht, *sink.global_filter_state, *this);
861884
min = final_min_max->data[0].GetValue(0);
862885
max = final_min_max->data[1].GetValue(0);
863886
} else if (TypeIsIntegral(conditions[0].right->return_type.InternalType())) {

src/duckdb/src/execution/operator/join/physical_nested_loop_join.cpp

Lines changed: 68 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,23 @@ namespace duckdb {
1111

1212
PhysicalNestedLoopJoin::PhysicalNestedLoopJoin(LogicalOperator &op, PhysicalOperator &left, PhysicalOperator &right,
1313
vector<JoinCondition> cond, JoinType join_type,
14-
idx_t estimated_cardinality)
14+
idx_t estimated_cardinality,
15+
unique_ptr<JoinFilterPushdownInfo> pushdown_info_p)
1516
: PhysicalComparisonJoin(op, PhysicalOperatorType::NESTED_LOOP_JOIN, std::move(cond), join_type,
1617
estimated_cardinality) {
18+
19+
filter_pushdown = std::move(pushdown_info_p);
20+
1721
children.push_back(left);
1822
children.push_back(right);
1923
}
2024

25+
PhysicalNestedLoopJoin::PhysicalNestedLoopJoin(LogicalOperator &op, PhysicalOperator &left, PhysicalOperator &right,
26+
vector<JoinCondition> cond, JoinType join_type,
27+
idx_t estimated_cardinality)
28+
: PhysicalNestedLoopJoin(op, left, right, std::move(cond), join_type, estimated_cardinality, nullptr) {
29+
}
30+
2131
bool PhysicalJoin::HasNullValues(DataChunk &chunk) {
2232
for (idx_t col_idx = 0; col_idx < chunk.ColumnCount(); col_idx++) {
2333
UnifiedVectorFormat vdata;
@@ -130,30 +140,16 @@ bool PhysicalNestedLoopJoin::IsSupported(const vector<JoinCondition> &conditions
130140
//===--------------------------------------------------------------------===//
131141
// Sink
132142
//===--------------------------------------------------------------------===//
133-
class NestedLoopJoinLocalState : public LocalSinkState {
134-
public:
135-
explicit NestedLoopJoinLocalState(ClientContext &context, const vector<JoinCondition> &conditions)
136-
: rhs_executor(context) {
137-
vector<LogicalType> condition_types;
138-
for (auto &cond : conditions) {
139-
rhs_executor.AddExpression(*cond.right);
140-
condition_types.push_back(cond.right->return_type);
141-
}
142-
right_condition.Initialize(Allocator::Get(context), condition_types);
143-
}
144-
145-
//! The chunk holding the right condition
146-
DataChunk right_condition;
147-
//! The executor of the RHS condition
148-
ExpressionExecutor rhs_executor;
149-
};
150-
151143
class NestedLoopJoinGlobalState : public GlobalSinkState {
152144
public:
153145
explicit NestedLoopJoinGlobalState(ClientContext &context, const PhysicalNestedLoopJoin &op)
154146
: right_payload_data(context, op.children[1].get().GetTypes()),
155147
right_condition_data(context, op.GetJoinTypes()), has_null(false),
156148
right_outer(PropagatesBuildSide(op.join_type)) {
149+
if (op.filter_pushdown) {
150+
skip_filter_pushdown = op.filter_pushdown->probe_info.empty();
151+
global_filter_state = op.filter_pushdown->GetGlobalState(context, op);
152+
}
157153
}
158154

159155
mutex nj_lock;
@@ -165,6 +161,35 @@ class NestedLoopJoinGlobalState : public GlobalSinkState {
165161
atomic<bool> has_null;
166162
//! A bool indicating for each tuple in the RHS if they found a match (only used in FULL OUTER JOIN)
167163
OuterJoinMarker right_outer;
164+
//! Should we not bother pushing down filters?
165+
bool skip_filter_pushdown = false;
166+
//! The global filter states to push down (if any)
167+
unique_ptr<JoinFilterGlobalState> global_filter_state;
168+
};
169+
170+
class NestedLoopJoinLocalState : public LocalSinkState {
171+
public:
172+
explicit NestedLoopJoinLocalState(ClientContext &context, const PhysicalNestedLoopJoin &op,
173+
NestedLoopJoinGlobalState &gstate)
174+
: rhs_executor(context) {
175+
vector<LogicalType> condition_types;
176+
for (auto &cond : op.conditions) {
177+
rhs_executor.AddExpression(*cond.right);
178+
condition_types.push_back(cond.right->return_type);
179+
}
180+
right_condition.Initialize(Allocator::Get(context), condition_types);
181+
182+
if (op.filter_pushdown) {
183+
local_filter_state = op.filter_pushdown->GetLocalState(*gstate.global_filter_state);
184+
}
185+
}
186+
187+
//! The chunk holding the right condition
188+
DataChunk right_condition;
189+
//! The executor of the RHS condition
190+
ExpressionExecutor rhs_executor;
191+
//! Local state for accumulating filter statistics
192+
unique_ptr<JoinFilterLocalState> local_filter_state;
168193
};
169194

170195
vector<LogicalType> PhysicalNestedLoopJoin::GetJoinTypes() const {
@@ -178,40 +203,54 @@ vector<LogicalType> PhysicalNestedLoopJoin::GetJoinTypes() const {
178203
SinkResultType PhysicalNestedLoopJoin::Sink(ExecutionContext &context, DataChunk &chunk,
179204
OperatorSinkInput &input) const {
180205
auto &gstate = input.global_state.Cast<NestedLoopJoinGlobalState>();
181-
auto &nlj_state = input.local_state.Cast<NestedLoopJoinLocalState>();
206+
auto &lstate = input.local_state.Cast<NestedLoopJoinLocalState>();
182207

183208
// resolve the join expression of the right side
184-
nlj_state.right_condition.Reset();
185-
nlj_state.rhs_executor.Execute(chunk, nlj_state.right_condition);
209+
lstate.right_condition.Reset();
210+
lstate.rhs_executor.Execute(chunk, lstate.right_condition);
211+
212+
if (filter_pushdown && !gstate.skip_filter_pushdown) {
213+
filter_pushdown->Sink(lstate.right_condition, *lstate.local_filter_state);
214+
}
186215

187216
// if we have not seen any NULL values yet, and we are performing a MARK join, check if there are NULL values in
188217
// this chunk
189218
if (join_type == JoinType::MARK && !gstate.has_null) {
190-
if (HasNullValues(nlj_state.right_condition)) {
219+
if (HasNullValues(lstate.right_condition)) {
191220
gstate.has_null = true;
192221
}
193222
}
194223

195224
// append the payload data and the conditions
196225
lock_guard<mutex> nj_guard(gstate.nj_lock);
197226
gstate.right_payload_data.Append(chunk);
198-
gstate.right_condition_data.Append(nlj_state.right_condition);
227+
gstate.right_condition_data.Append(lstate.right_condition);
199228
return SinkResultType::NEED_MORE_INPUT;
200229
}
201230

202231
SinkCombineResultType PhysicalNestedLoopJoin::Combine(ExecutionContext &context,
203232
OperatorSinkCombineInput &input) const {
233+
auto &gstate = input.global_state.Cast<NestedLoopJoinGlobalState>();
234+
auto &lstate = input.local_state.Cast<NestedLoopJoinLocalState>();
235+
204236
auto &client_profiler = QueryProfiler::Get(context.client);
205237
context.thread.profiler.Flush(*this);
206238
client_profiler.Flush(context.thread.profiler);
239+
if (filter_pushdown && !gstate.skip_filter_pushdown) {
240+
filter_pushdown->Combine(*gstate.global_filter_state, *lstate.local_filter_state);
241+
}
207242
return SinkCombineResultType::FINISHED;
208243
}
209244

210245
SinkFinalizeType PhysicalNestedLoopJoin::Finalize(Pipeline &pipeline, Event &event, ClientContext &context,
211246
OperatorSinkFinalizeInput &input) const {
212-
auto &gstate = input.global_state.Cast<NestedLoopJoinGlobalState>();
213-
gstate.right_outer.Initialize(gstate.right_payload_data.Count());
214-
if (gstate.right_payload_data.Count() == 0 && EmptyResultIfRHSIsEmpty()) {
247+
auto &gsink = input.global_state.Cast<NestedLoopJoinGlobalState>();
248+
if (filter_pushdown && !gsink.skip_filter_pushdown) {
249+
(void)filter_pushdown->Finalize(context, nullptr, *gsink.global_filter_state, *this);
250+
}
251+
252+
gsink.right_outer.Initialize(gsink.right_payload_data.Count());
253+
if (gsink.right_payload_data.Count() == 0 && EmptyResultIfRHSIsEmpty()) {
215254
return SinkFinalizeType::NO_OUTPUT_POSSIBLE;
216255
}
217256
return SinkFinalizeType::READY;
@@ -222,7 +261,8 @@ unique_ptr<GlobalSinkState> PhysicalNestedLoopJoin::GetGlobalSinkState(ClientCon
222261
}
223262

224263
unique_ptr<LocalSinkState> PhysicalNestedLoopJoin::GetLocalSinkState(ExecutionContext &context) const {
225-
return make_uniq<NestedLoopJoinLocalState>(context.client, conditions);
264+
auto &gstate = sink_state->Cast<NestedLoopJoinGlobalState>();
265+
return make_uniq<NestedLoopJoinLocalState>(context.client, *this, gstate);
226266
}
227267

228268
//===--------------------------------------------------------------------===//

src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ PhysicalOperator &PhysicalPlanGenerator::PlanComparisonJoin(LogicalComparisonJoi
9494
if (PhysicalNestedLoopJoin::IsSupported(op.conditions, op.join_type)) {
9595
// inequality join: use nested loop
9696
return Make<PhysicalNestedLoopJoin>(op, left, right, std::move(op.conditions), op.join_type,
97-
op.estimated_cardinality);
97+
op.estimated_cardinality, std::move(op.filter_pushdown));
9898
}
9999

100100
for (auto &cond : op.conditions) {

src/duckdb/src/function/table/version/pragma_version.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#ifndef DUCKDB_PATCH_VERSION
2-
#define DUCKDB_PATCH_VERSION "0-dev2940"
2+
#define DUCKDB_PATCH_VERSION "0-dev2947"
33
#endif
44
#ifndef DUCKDB_MINOR_VERSION
55
#define DUCKDB_MINOR_VERSION 3
@@ -8,10 +8,10 @@
88
#define DUCKDB_MAJOR_VERSION 1
99
#endif
1010
#ifndef DUCKDB_VERSION
11-
#define DUCKDB_VERSION "v1.3.0-dev2940"
11+
#define DUCKDB_VERSION "v1.3.0-dev2947"
1212
#endif
1313
#ifndef DUCKDB_SOURCE_ID
14-
#define DUCKDB_SOURCE_ID "56d42bc46a"
14+
#define DUCKDB_SOURCE_ID "885d2b4e8d"
1515
#endif
1616
#include "duckdb/function/table/system_functions.hpp"
1717
#include "duckdb/main/database.hpp"

src/duckdb/src/include/duckdb/execution/operator/join/join_filter_pushdown.hpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ class DataChunk;
1717
class DynamicTableFilterSet;
1818
class LogicalGet;
1919
class JoinHashTable;
20+
class PhysicalComparisonJoin;
2021
struct GlobalUngroupedAggregateState;
2122
struct LocalUngroupedAggregateState;
2223

@@ -69,8 +70,8 @@ struct JoinFilterPushdownInfo {
6970

7071
void Sink(DataChunk &chunk, JoinFilterLocalState &lstate) const;
7172
void Combine(JoinFilterGlobalState &gstate, JoinFilterLocalState &lstate) const;
72-
unique_ptr<DataChunk> Finalize(ClientContext &context, JoinHashTable &ht, JoinFilterGlobalState &gstate,
73-
const PhysicalOperator &op) const;
73+
unique_ptr<DataChunk> Finalize(ClientContext &context, optional_ptr<JoinHashTable> ht,
74+
JoinFilterGlobalState &gstate, const PhysicalComparisonJoin &op) const;
7475

7576
private:
7677
void PushInFilter(const JoinFilterPushdownFilter &info, JoinHashTable &ht, const PhysicalOperator &op,

src/duckdb/src/include/duckdb/execution/operator/join/physical_nested_loop_join.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ class PhysicalNestedLoopJoin : public PhysicalComparisonJoin {
1818
static constexpr const PhysicalOperatorType TYPE = PhysicalOperatorType::NESTED_LOOP_JOIN;
1919

2020
public:
21+
PhysicalNestedLoopJoin(LogicalOperator &op, PhysicalOperator &left, PhysicalOperator &right,
22+
vector<JoinCondition> cond, JoinType join_type, idx_t estimated_cardinality,
23+
unique_ptr<JoinFilterPushdownInfo> pushdown_info);
2124
PhysicalNestedLoopJoin(LogicalOperator &op, PhysicalOperator &left, PhysicalOperator &right,
2225
vector<JoinCondition> cond, JoinType join_type, idx_t estimated_cardinality);
2326

src/duckdb/src/optimizer/join_filter_pushdown_optimizer.cpp

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -170,8 +170,17 @@ void JoinFilterPushdownOptimizer::GenerateJoinFilters(LogicalComparisonJoin &joi
170170
vector<JoinFilterPushdownColumn> pushdown_columns;
171171
for (idx_t cond_idx = 0; cond_idx < join.conditions.size(); cond_idx++) {
172172
auto &cond = join.conditions[cond_idx];
173-
if (cond.comparison != ExpressionType::COMPARE_EQUAL) {
174-
// only equality supported for now
173+
switch (cond.comparison) {
174+
case ExpressionType::COMPARE_EQUAL:
175+
case ExpressionType::COMPARE_LESSTHAN:
176+
case ExpressionType::COMPARE_LESSTHANOREQUALTO:
177+
case ExpressionType::COMPARE_GREATERTHAN:
178+
case ExpressionType::COMPARE_GREATERTHANOREQUALTO:
179+
break;
180+
case ExpressionType::COMPARE_NOT_DISTINCT_FROM:
181+
// TODO: Need OR predicates
182+
continue;
183+
default:
175184
continue;
176185
}
177186
if (cond.left->GetExpressionType() != ExpressionType::BOUND_COLUMN_REF) {
@@ -215,8 +224,10 @@ void JoinFilterPushdownOptimizer::GenerateJoinFilters(LogicalComparisonJoin &joi
215224

216225
// Even if we cannot find any table sources in which we can push down filters,
217226
// we still initialize the aggregate states so that we have the possibility of doing a perfect hash join
227+
// TODO: Can ExpressionType::COMPARE_NOT_DISTINCT_FROM be used with perfect hash joins?
218228
const auto compute_aggregates_anyway = join.join_type == JoinType::INNER && join.conditions.size() == 1 &&
219229
pushdown_info->join_condition.size() == 1 &&
230+
join.conditions[0].comparison == ExpressionType::COMPARE_EQUAL &&
220231
TypeIsIntegral(join.conditions[0].right->return_type.InternalType());
221232
if (pushdown_info->probe_info.empty() && !compute_aggregates_anyway) {
222233
// no table sources found in which we can push down filters

0 commit comments

Comments
 (0)