From bbda1910eec982120b336a6268c878f3ae0bce71 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Thu, 29 Jan 2026 17:13:29 +0000 Subject: [PATCH 1/2] Reduce number of traversals per node in physical expr simplification --- .../src/simplifier/const_evaluator.rs | 19 +++++++++++++++++-- .../physical-expr/src/simplifier/mod.rs | 5 ++--- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/datafusion/physical-expr/src/simplifier/const_evaluator.rs b/datafusion/physical-expr/src/simplifier/const_evaluator.rs index 65111b2911654..8a2368c4040a4 100644 --- a/datafusion/physical-expr/src/simplifier/const_evaluator.rs +++ b/datafusion/physical-expr/src/simplifier/const_evaluator.rs @@ -25,7 +25,6 @@ use arrow::record_batch::RecordBatch; use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion}; use datafusion_common::{Result, ScalarValue}; use datafusion_expr_common::columnar_value::ColumnarValue; -use datafusion_physical_expr_common::physical_expr::is_volatile; use crate::PhysicalExpr; use crate::expressions::{Column, Literal}; @@ -43,7 +42,7 @@ use crate::expressions::{Column, Literal}; pub fn simplify_const_expr( expr: &Arc, ) -> Result>> { - if is_volatile(expr) || has_column_references(expr) { + if !can_evaluate_as_constant(expr) { return Ok(Transformed::no(Arc::clone(expr))); } @@ -73,6 +72,22 @@ pub fn simplify_const_expr( } } +fn can_evaluate_as_constant(expr: &Arc) -> bool { + let mut can_evaluate = true; + + expr.apply(|e| { + if e.as_any().is::() || e.is_volatile_node() { + can_evaluate = false; + Ok(TreeNodeRecursion::Stop) + } else { + Ok(TreeNodeRecursion::Continue) + } + }) + .expect("apply should not fail"); + + can_evaluate +} + /// Create a 1-row dummy RecordBatch for evaluating constant expressions. /// /// The batch is never actually accessed for data - it's just needed because diff --git a/datafusion/physical-expr/src/simplifier/mod.rs b/datafusion/physical-expr/src/simplifier/mod.rs index 97395f4fe8a27..f24afc30e6efd 100644 --- a/datafusion/physical-expr/src/simplifier/mod.rs +++ b/datafusion/physical-expr/src/simplifier/mod.rs @@ -53,7 +53,7 @@ impl<'a> PhysicalExprSimplifier<'a> { while count < MAX_LOOP_COUNT { count += 1; let result = current_expr.transform(|node| { - #[cfg(test)] + #[cfg(debug_assertions)] let original_type = node.data_type(schema).unwrap(); // Apply NOT expression simplification first, then unwrap cast optimization, @@ -64,8 +64,7 @@ impl<'a> PhysicalExprSimplifier<'a> { })? .transform_data(|node| const_evaluator::simplify_const_expr(&node))?; - #[cfg(test)] - assert_eq!( + debug_assert_eq!( rewritten.data.data_type(schema).unwrap(), original_type, "Simplified expression should have the same data type as the original" From 328b36ba669e64039169a6c4b1845f32d11a418d Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Fri, 30 Jan 2026 19:03:37 +0000 Subject: [PATCH 2/2] TIL how is implemented --- datafusion/physical-expr/src/simplifier/mod.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/datafusion/physical-expr/src/simplifier/mod.rs b/datafusion/physical-expr/src/simplifier/mod.rs index f24afc30e6efd..3bd4683c167c0 100644 --- a/datafusion/physical-expr/src/simplifier/mod.rs +++ b/datafusion/physical-expr/src/simplifier/mod.rs @@ -64,7 +64,8 @@ impl<'a> PhysicalExprSimplifier<'a> { })? .transform_data(|node| const_evaluator::simplify_const_expr(&node))?; - debug_assert_eq!( + #[cfg(debug_assertions)] + assert_eq!( rewritten.data.data_type(schema).unwrap(), original_type, "Simplified expression should have the same data type as the original"