From 0610595f1a8d71efb75843a4fe40f2c7add3fccf Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Mon, 9 Feb 2026 11:17:37 +0000 Subject: [PATCH 1/2] Remove recursive const check in `simplify_const_expr` --- .../src/simplifier/const_evaluator.rs | 77 ++++++++----------- .../physical-expr/src/simplifier/mod.rs | 12 ++- .../physical-expr/src/simplifier/not.rs | 2 +- 3 files changed, 37 insertions(+), 54 deletions(-) diff --git a/datafusion/physical-expr/src/simplifier/const_evaluator.rs b/datafusion/physical-expr/src/simplifier/const_evaluator.rs index 1e62e47ce2066..7e120be9306c8 100644 --- a/datafusion/physical-expr/src/simplifier/const_evaluator.rs +++ b/datafusion/physical-expr/src/simplifier/const_evaluator.rs @@ -22,35 +22,51 @@ use std::sync::Arc; use arrow::array::new_null_array; use arrow::datatypes::{DataType, Field, Schema}; use arrow::record_batch::RecordBatch; -use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion}; +use datafusion_common::tree_node::Transformed; use datafusion_common::{Result, ScalarValue}; use datafusion_expr_common::columnar_value::ColumnarValue; use crate::PhysicalExpr; use crate::expressions::{Column, Literal}; -/// Simplify expressions that consist only of literals by evaluating them. +/// Simplify expressions whose immediate children are all literals. /// -/// This function checks if all children of the given expression are literals. -/// If so, it evaluates the expression against a dummy RecordBatch and returns -/// the result as a new Literal. +/// This function only checks the direct children of the expression, +/// not the entire subtree. It is designed to be used with bottom-up tree +/// traversal, where children are simplified before parents. /// /// # Example transformations /// - `1 + 2` -> `3` -/// - `(1 + 2) * 3` -> `9` (with bottom-up traversal) +/// - `(1 + 2) * 3` -> `9` (with bottom-up traversal, inner expr simplified first) /// - `'hello' || ' world'` -> `'hello world'` -pub fn simplify_const_expr( - expr: Arc, -) -> Result>> { - simplify_const_expr_with_dummy(expr, &create_dummy_batch()?) -} - -pub(crate) fn simplify_const_expr_with_dummy( +pub(crate) fn simplify_const_expr( expr: Arc, batch: &RecordBatch, ) -> Result>> { - // If expr is already a const literal or can't be evaluated into one. - if expr.as_any().is::() || (!can_evaluate_as_constant(&expr)) { + // Already a literal - nothing to do + if expr.as_any().is::() { + return Ok(Transformed::no(expr)); + } + + // Column references cannot be evaluated at plan time + if expr.as_any().is::() { + return Ok(Transformed::no(expr)); + } + + // Volatile nodes cannot be evaluated at plan time + if expr.is_volatile_node() { + return Ok(Transformed::no(expr)); + } + + // Since transform visits bottom-up, children have already been simplified. + // If all children are now Literals, this node can be const-evaluated. + // This is O(k) where k = number of children, instead of O(subtree). + let all_children_literal = expr + .children() + .iter() + .all(|child| child.as_any().is::()); + + if !all_children_literal { return Ok(Transformed::no(expr)); } @@ -77,22 +93,6 @@ pub(crate) fn simplify_const_expr_with_dummy( } } -fn can_evaluate_as_constant(expr: &Arc) -> bool { - let mut can_evaluate = true; - - expr.apply(|e| { - if e.as_any().is::() || e.is_volatile_node() { - can_evaluate = false; - Ok(TreeNodeRecursion::Stop) - } else { - Ok(TreeNodeRecursion::Continue) - } - }) - .expect("apply should not fail"); - - can_evaluate -} - /// Create a 1-row dummy RecordBatch for evaluating constant expressions. /// /// The batch is never actually accessed for data - it's just needed because @@ -106,18 +106,3 @@ pub(crate) fn create_dummy_batch() -> Result { let col = new_null_array(&DataType::Null, 1); Ok(RecordBatch::try_new(dummy_schema, vec![col])?) } - -/// Check if this expression has any column references. -pub fn has_column_references(expr: &Arc) -> bool { - let mut has_columns = false; - expr.apply(|expr| { - if expr.as_any().downcast_ref::().is_some() { - has_columns = true; - Ok(TreeNodeRecursion::Stop) - } else { - Ok(TreeNodeRecursion::Continue) - } - }) - .expect("apply should not fail"); - has_columns -} diff --git a/datafusion/physical-expr/src/simplifier/mod.rs b/datafusion/physical-expr/src/simplifier/mod.rs index 45ead82a0a93d..04f9e0973aa91 100644 --- a/datafusion/physical-expr/src/simplifier/mod.rs +++ b/datafusion/physical-expr/src/simplifier/mod.rs @@ -24,15 +24,15 @@ use std::sync::Arc; use crate::{ PhysicalExpr, simplifier::{ - const_evaluator::{create_dummy_batch, simplify_const_expr_with_dummy}, + const_evaluator::{create_dummy_batch, simplify_const_expr}, not::simplify_not_expr, unwrap_cast::unwrap_cast_in_comparison, }, }; -pub mod const_evaluator; -pub mod not; -pub mod unwrap_cast; +mod const_evaluator; +mod not; +mod unwrap_cast; const MAX_LOOP_COUNT: usize = 5; @@ -69,9 +69,7 @@ impl<'a> PhysicalExprSimplifier<'a> { // then constant expression evaluation let rewritten = simplify_not_expr(node, schema)? .transform_data(|node| unwrap_cast_in_comparison(node, schema))? - .transform_data(|node| { - simplify_const_expr_with_dummy(node, &batch) - })?; + .transform_data(|node| simplify_const_expr(node, &batch))?; #[cfg(debug_assertions)] assert_eq!( diff --git a/datafusion/physical-expr/src/simplifier/not.rs b/datafusion/physical-expr/src/simplifier/not.rs index ea5467d0a4b42..47e9059d092de 100644 --- a/datafusion/physical-expr/src/simplifier/not.rs +++ b/datafusion/physical-expr/src/simplifier/not.rs @@ -43,7 +43,7 @@ use crate::expressions::{BinaryExpr, InListExpr, Literal, NotExpr, in_list, lit} /// This function applies a single simplification rule and returns. When used with /// TreeNodeRewriter, multiple passes will automatically be applied until no more /// transformations are possible. -pub fn simplify_not_expr( +pub(crate) fn simplify_not_expr( expr: Arc, schema: &Schema, ) -> Result>> { From 3b4dc057d17165fb4c0e700bd6b767f2a25ce515 Mon Sep 17 00:00:00 2001 From: Adam Gutglick Date: Tue, 10 Feb 2026 11:28:42 +0000 Subject: [PATCH 2/2] Restore public functions and mark them as deprecated --- .../src/simplifier/const_evaluator.rs | 85 ++++++++++++++++++- .../physical-expr/src/simplifier/mod.rs | 17 ++-- .../physical-expr/src/simplifier/not.rs | 6 +- 3 files changed, 97 insertions(+), 11 deletions(-) diff --git a/datafusion/physical-expr/src/simplifier/const_evaluator.rs b/datafusion/physical-expr/src/simplifier/const_evaluator.rs index 7e120be9306c8..1f3781c537dd5 100644 --- a/datafusion/physical-expr/src/simplifier/const_evaluator.rs +++ b/datafusion/physical-expr/src/simplifier/const_evaluator.rs @@ -22,13 +22,59 @@ use std::sync::Arc; use arrow::array::new_null_array; use arrow::datatypes::{DataType, Field, Schema}; use arrow::record_batch::RecordBatch; -use datafusion_common::tree_node::Transformed; +use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRecursion}; use datafusion_common::{Result, ScalarValue}; use datafusion_expr_common::columnar_value::ColumnarValue; use crate::PhysicalExpr; use crate::expressions::{Column, Literal}; +/// Simplify expressions that consist only of literals by evaluating them. +/// +/// This function checks if all children of the given expression are literals. +/// If so, it evaluates the expression against a dummy RecordBatch and returns +/// the result as a new Literal. +/// +/// # Example transformations +/// - `1 + 2` -> `3` +/// - `(1 + 2) * 3` -> `9` (with bottom-up traversal) +/// - `'hello' || ' world'` -> `'hello world'` +#[deprecated( + since = "53.0.0", + note = "This function will be removed in a future release in favor of a private implementation that depends on other implementation details. Please open an issue if you have a use case for keeping it." +)] +pub fn simplify_const_expr( + expr: Arc, +) -> Result>> { + let batch = create_dummy_batch()?; + // If expr is already a const literal or can't be evaluated into one. + if expr.as_any().is::() || (!can_evaluate_as_constant(&expr)) { + return Ok(Transformed::no(expr)); + } + + // Evaluate the expression + match expr.evaluate(&batch) { + Ok(ColumnarValue::Scalar(scalar)) => { + Ok(Transformed::yes(Arc::new(Literal::new(scalar)))) + } + Ok(ColumnarValue::Array(arr)) if arr.len() == 1 => { + // Some operations return an array even for scalar inputs + let scalar = ScalarValue::try_from_array(&arr, 0)?; + Ok(Transformed::yes(Arc::new(Literal::new(scalar)))) + } + Ok(_) => { + // Unexpected result - keep original expression + Ok(Transformed::no(expr)) + } + Err(_) => { + // On error, keep original expression + // The expression might succeed at runtime due to short-circuit evaluation + // or other runtime conditions + Ok(Transformed::no(expr)) + } + } +} + /// Simplify expressions whose immediate children are all literals. /// /// This function only checks the direct children of the expression, @@ -39,7 +85,7 @@ use crate::expressions::{Column, Literal}; /// - `1 + 2` -> `3` /// - `(1 + 2) * 3` -> `9` (with bottom-up traversal, inner expr simplified first) /// - `'hello' || ' world'` -> `'hello world'` -pub(crate) fn simplify_const_expr( +pub(crate) fn simplify_const_expr_immediate( expr: Arc, batch: &RecordBatch, ) -> Result>> { @@ -106,3 +152,38 @@ pub(crate) fn create_dummy_batch() -> Result { let col = new_null_array(&DataType::Null, 1); Ok(RecordBatch::try_new(dummy_schema, vec![col])?) } + +fn can_evaluate_as_constant(expr: &Arc) -> bool { + let mut can_evaluate = true; + + expr.apply(|e| { + if e.as_any().is::() || e.is_volatile_node() { + can_evaluate = false; + Ok(TreeNodeRecursion::Stop) + } else { + Ok(TreeNodeRecursion::Continue) + } + }) + .expect("apply should not fail"); + + can_evaluate +} + +/// Check if this expression has any column references. +#[deprecated( + since = "53.0.0", + note = "This function isn't used internally and is trivial to implement, therefore it will be removed in a future release." +)] +pub fn has_column_references(expr: &Arc) -> bool { + let mut has_columns = false; + expr.apply(|expr| { + if expr.as_any().downcast_ref::().is_some() { + has_columns = true; + Ok(TreeNodeRecursion::Stop) + } else { + Ok(TreeNodeRecursion::Continue) + } + }) + .expect("apply should not fail"); + has_columns +} diff --git a/datafusion/physical-expr/src/simplifier/mod.rs b/datafusion/physical-expr/src/simplifier/mod.rs index 04f9e0973aa91..3f3f8573449eb 100644 --- a/datafusion/physical-expr/src/simplifier/mod.rs +++ b/datafusion/physical-expr/src/simplifier/mod.rs @@ -24,15 +24,13 @@ use std::sync::Arc; use crate::{ PhysicalExpr, simplifier::{ - const_evaluator::{create_dummy_batch, simplify_const_expr}, - not::simplify_not_expr, - unwrap_cast::unwrap_cast_in_comparison, + const_evaluator::create_dummy_batch, unwrap_cast::unwrap_cast_in_comparison, }, }; -mod const_evaluator; -mod not; -mod unwrap_cast; +pub mod const_evaluator; +pub mod not; +pub mod unwrap_cast; const MAX_LOOP_COUNT: usize = 5; @@ -67,9 +65,12 @@ impl<'a> PhysicalExprSimplifier<'a> { // Apply NOT expression simplification first, then unwrap cast optimization, // then constant expression evaluation - let rewritten = simplify_not_expr(node, schema)? + #[expect(deprecated, reason = "`simplify_not_expr` is marked as deprecated until it's made private.")] + let rewritten = not::simplify_not_expr(node, schema)? .transform_data(|node| unwrap_cast_in_comparison(node, schema))? - .transform_data(|node| simplify_const_expr(node, &batch))?; + .transform_data(|node| { + const_evaluator::simplify_const_expr_immediate(node, &batch) + })?; #[cfg(debug_assertions)] assert_eq!( diff --git a/datafusion/physical-expr/src/simplifier/not.rs b/datafusion/physical-expr/src/simplifier/not.rs index 47e9059d092de..709260aa48791 100644 --- a/datafusion/physical-expr/src/simplifier/not.rs +++ b/datafusion/physical-expr/src/simplifier/not.rs @@ -43,7 +43,11 @@ use crate::expressions::{BinaryExpr, InListExpr, Literal, NotExpr, in_list, lit} /// This function applies a single simplification rule and returns. When used with /// TreeNodeRewriter, multiple passes will automatically be applied until no more /// transformations are possible. -pub(crate) fn simplify_not_expr( +#[deprecated( + since = "53.0.0", + note = "This function will be made private in a future release, please file an issue if you have a reason for keeping it public." +)] +pub fn simplify_not_expr( expr: Arc, schema: &Schema, ) -> Result>> {