From 7c49527fd9298ab0f8fdbc405f4577f8987d7634 Mon Sep 17 00:00:00 2001 From: sdf-jkl Date: Wed, 28 Jan 2026 13:50:28 -0500 Subject: [PATCH 1/4] Enable inlist support for preimage --- .../simplify_expressions/expr_simplifier.rs | 46 +++++++++++++++++++ .../src/simplify_expressions/udf_preimage.rs | 35 +++++++++++++- 2 files changed, 80 insertions(+), 1 deletion(-) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index c47316bccc7c1..2fcaaa292412b 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -2038,6 +2038,52 @@ impl TreeNodeRewriter for Simplifier<'_> { Transformed::no(Expr::BinaryExpr(BinaryExpr { left, op, right })) } } + // For case: + // date_part('YEAR', expr) IN (literal1, literal2, ...) + Expr::InList(InList { expr, list, negated }) => { + if list.len() > THRESHOLD_INLINE_INLIST || list.iter().any(is_null) { + return Ok(Transformed::no(Expr::InList(InList { + expr, + list, + negated, + }))); + } + + let mut rewritten: Option = None; + for item in &list { + let PreimageResult::Range { interval, expr } = + get_preimage(expr.as_ref(), item, info)? + else { + return Ok(Transformed::no(Expr::InList(InList { + expr, + list, + negated, + }))); + }; + + let (op, combiner): (Operator, fn(Expr, Expr) -> Expr) = if negated { + (NotEq, and) + } else { + (Eq, or) + }; + + let range_expr = rewrite_with_preimage(*interval, op, expr)?.data; + rewritten = Some(match rewritten { + None => range_expr, + Some(acc) => combiner(acc, range_expr), + }); + } + + if let Some(rewritten) = rewritten { + Transformed::yes(rewritten) + } else { + Transformed::no(Expr::InList(InList { + expr, + list, + negated, + })) + } + } // no additional rewrites possible expr => Transformed::no(expr), diff --git a/datafusion/optimizer/src/simplify_expressions/udf_preimage.rs b/datafusion/optimizer/src/simplify_expressions/udf_preimage.rs index e0837196ca990..4e5de99ac6f3a 100644 --- a/datafusion/optimizer/src/simplify_expressions/udf_preimage.rs +++ b/datafusion/optimizer/src/simplify_expressions/udf_preimage.rs @@ -77,7 +77,7 @@ mod test { use datafusion_expr::{ ColumnarValue, Expr, Operator, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, Signature, Volatility, and, binary_expr, col, lit, preimage::PreimageResult, - simplify::SimplifyContext, + or, simplify::SimplifyContext, }; use super::Interval; @@ -165,6 +165,15 @@ mod test { )?), }) } + Expr::Literal(ScalarValue::Int32(Some(600)), _) => { + Ok(PreimageResult::Range { + expr, + interval: Box::new(Interval::try_new( + ScalarValue::Int32(Some(300)), + ScalarValue::Int32(Some(400)), + )?), + }) + } _ => Ok(PreimageResult::None), } } @@ -312,6 +321,30 @@ mod test { assert_eq!(optimize_test(expr, &schema), expected); } + #[test] + fn test_preimage_in_list_rewrite() { + let schema = test_schema(); + let expr = preimage_udf_expr().in_list(vec![lit(500), lit(600)], false); + let expected = or( + and(col("x").gt_eq(lit(100)), col("x").lt(lit(200))), + and(col("x").gt_eq(lit(300)), col("x").lt(lit(400))), + ); + + assert_eq!(optimize_test(expr, &schema), expected); + } + + #[test] + fn test_preimage_not_in_list_rewrite() { + let schema = test_schema(); + let expr = preimage_udf_expr().in_list(vec![lit(500), lit(600)], true); + let expected = and( + or(col("x").lt(lit(100)), col("x").gt_eq(lit(200))), + or(col("x").lt(lit(300)), col("x").gt_eq(lit(400))), + ); + + assert_eq!(optimize_test(expr, &schema), expected); + } + #[test] fn test_preimage_non_literal_rhs_no_rewrite() { // Non-literal RHS should not be rewritten. From a9c28213caa5d3d7f3a9b72b7a68319c3bbb8f93 Mon Sep 17 00:00:00 2001 From: sdf-jkl Date: Wed, 28 Jan 2026 14:56:29 -0500 Subject: [PATCH 2/4] cargo fmt --- .../src/simplify_expressions/expr_simplifier.rs | 13 +++++++------ .../src/simplify_expressions/udf_preimage.rs | 4 ++-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index 2fcaaa292412b..c64062ec5912c 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -2040,7 +2040,11 @@ impl TreeNodeRewriter for Simplifier<'_> { } // For case: // date_part('YEAR', expr) IN (literal1, literal2, ...) - Expr::InList(InList { expr, list, negated }) => { + Expr::InList(InList { + expr, + list, + negated, + }) => { if list.len() > THRESHOLD_INLINE_INLIST || list.iter().any(is_null) { return Ok(Transformed::no(Expr::InList(InList { expr, @@ -2061,11 +2065,8 @@ impl TreeNodeRewriter for Simplifier<'_> { }))); }; - let (op, combiner): (Operator, fn(Expr, Expr) -> Expr) = if negated { - (NotEq, and) - } else { - (Eq, or) - }; + let (op, combiner): (Operator, fn(Expr, Expr) -> Expr) = + if negated { (NotEq, and) } else { (Eq, or) }; let range_expr = rewrite_with_preimage(*interval, op, expr)?.data; rewritten = Some(match rewritten { diff --git a/datafusion/optimizer/src/simplify_expressions/udf_preimage.rs b/datafusion/optimizer/src/simplify_expressions/udf_preimage.rs index 4e5de99ac6f3a..540b34c9e1ee1 100644 --- a/datafusion/optimizer/src/simplify_expressions/udf_preimage.rs +++ b/datafusion/optimizer/src/simplify_expressions/udf_preimage.rs @@ -76,8 +76,8 @@ mod test { use datafusion_common::{DFSchema, DFSchemaRef, Result, ScalarValue}; use datafusion_expr::{ ColumnarValue, Expr, Operator, ScalarFunctionArgs, ScalarUDF, ScalarUDFImpl, - Signature, Volatility, and, binary_expr, col, lit, preimage::PreimageResult, - or, simplify::SimplifyContext, + Signature, Volatility, and, binary_expr, col, lit, or, preimage::PreimageResult, + simplify::SimplifyContext, }; use super::Interval; From 0f2e8ecf4630e73958f48440ca124fcc09ae52d1 Mon Sep 17 00:00:00 2001 From: sdf-jkl Date: Thu, 29 Jan 2026 18:16:20 -0500 Subject: [PATCH 3/4] Move op, combiner out of the for loop --- .../optimizer/src/simplify_expressions/expr_simplifier.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs index c64062ec5912c..86c5aac3d0192 100644 --- a/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs +++ b/datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs @@ -2053,6 +2053,9 @@ impl TreeNodeRewriter for Simplifier<'_> { }))); } + let (op, combiner): (Operator, fn(Expr, Expr) -> Expr) = + if negated { (NotEq, and) } else { (Eq, or) }; + let mut rewritten: Option = None; for item in &list { let PreimageResult::Range { interval, expr } = @@ -2065,9 +2068,6 @@ impl TreeNodeRewriter for Simplifier<'_> { }))); }; - let (op, combiner): (Operator, fn(Expr, Expr) -> Expr) = - if negated { (NotEq, and) } else { (Eq, or) }; - let range_expr = rewrite_with_preimage(*interval, op, expr)?.data; rewritten = Some(match rewritten { None => range_expr, From 18ed08fa961c8720ea950f2fa81a4bb24919e149 Mon Sep 17 00:00:00 2001 From: sdf-jkl Date: Tue, 3 Feb 2026 11:18:28 -0500 Subject: [PATCH 4/4] Add long list inlist test --- .../optimizer/src/simplify_expressions/udf_preimage.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/datafusion/optimizer/src/simplify_expressions/udf_preimage.rs b/datafusion/optimizer/src/simplify_expressions/udf_preimage.rs index 540b34c9e1ee1..11c80c62e1b3f 100644 --- a/datafusion/optimizer/src/simplify_expressions/udf_preimage.rs +++ b/datafusion/optimizer/src/simplify_expressions/udf_preimage.rs @@ -345,6 +345,14 @@ mod test { assert_eq!(optimize_test(expr, &schema), expected); } + #[test] + fn test_preimage_in_list_long_list_no_rewrite() { + let schema = test_schema(); + let expr = preimage_udf_expr().in_list((1..100).map(lit).collect(), false); + + assert_eq!(optimize_test(expr.clone(), &schema), expr); + } + #[test] fn test_preimage_non_literal_rhs_no_rewrite() { // Non-literal RHS should not be rewritten.