Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 84 additions & 18 deletions datafusion/physical-expr/src/simplifier/const_evaluator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,18 +39,80 @@ use crate::expressions::{Column, Literal};
/// - `1 + 2` -> `3`
/// - `(1 + 2) * 3` -> `9` (with bottom-up traversal)
/// - `'hello' || ' world'` -> `'hello world'`
#[deprecated(
since = "53.0.0",
note = "This function will be removed in a future release in favor of a private implementation that depends on other implementation details. Please open an issue if you have a use case for keeping it."
)]
pub fn simplify_const_expr(
expr: Arc<dyn PhysicalExpr>,
) -> Result<Transformed<Arc<dyn PhysicalExpr>>> {
simplify_const_expr_with_dummy(expr, &create_dummy_batch()?)
let batch = create_dummy_batch()?;
// If expr is already a const literal or can't be evaluated into one.
if expr.as_any().is::<Literal>() || (!can_evaluate_as_constant(&expr)) {
return Ok(Transformed::no(expr));
}

// Evaluate the expression
match expr.evaluate(&batch) {
Ok(ColumnarValue::Scalar(scalar)) => {
Ok(Transformed::yes(Arc::new(Literal::new(scalar))))
}
Ok(ColumnarValue::Array(arr)) if arr.len() == 1 => {
// Some operations return an array even for scalar inputs
let scalar = ScalarValue::try_from_array(&arr, 0)?;
Ok(Transformed::yes(Arc::new(Literal::new(scalar))))
}
Ok(_) => {
// Unexpected result - keep original expression
Ok(Transformed::no(expr))
}
Err(_) => {
// On error, keep original expression
// The expression might succeed at runtime due to short-circuit evaluation
// or other runtime conditions
Ok(Transformed::no(expr))
}
}
}

pub(crate) fn simplify_const_expr_with_dummy(
/// Simplify expressions whose immediate children are all literals.
///
/// This function only checks the direct children of the expression,
/// not the entire subtree. It is designed to be used with bottom-up tree
/// traversal, where children are simplified before parents.
///
/// # Example transformations
/// - `1 + 2` -> `3`
/// - `(1 + 2) * 3` -> `9` (with bottom-up traversal, inner expr simplified first)
/// - `'hello' || ' world'` -> `'hello world'`
pub(crate) fn simplify_const_expr_immediate(
expr: Arc<dyn PhysicalExpr>,
batch: &RecordBatch,
) -> Result<Transformed<Arc<dyn PhysicalExpr>>> {
// If expr is already a const literal or can't be evaluated into one.
if expr.as_any().is::<Literal>() || (!can_evaluate_as_constant(&expr)) {
// Already a literal - nothing to do
if expr.as_any().is::<Literal>() {
return Ok(Transformed::no(expr));
}

// Column references cannot be evaluated at plan time
if expr.as_any().is::<Column>() {
return Ok(Transformed::no(expr));
}

// Volatile nodes cannot be evaluated at plan time
if expr.is_volatile_node() {
return Ok(Transformed::no(expr));
}

// Since transform visits bottom-up, children have already been simplified.
// If all children are now Literals, this node can be const-evaluated.
// This is O(k) where k = number of children, instead of O(subtree).
let all_children_literal = expr
.children()
.iter()
.all(|child| child.as_any().is::<Literal>());

if !all_children_literal {
return Ok(Transformed::no(expr));
}

Expand All @@ -77,6 +139,20 @@ pub(crate) fn simplify_const_expr_with_dummy(
}
}

/// Create a 1-row dummy RecordBatch for evaluating constant expressions.
///
/// The batch is never actually accessed for data - it's just needed because
/// the PhysicalExpr::evaluate API requires a RecordBatch. For expressions
/// that only contain literals, the batch content is irrelevant.
///
/// This is the same approach used in the logical expression `ConstEvaluator`.
pub(crate) fn create_dummy_batch() -> Result<RecordBatch> {
// RecordBatch requires at least one column
let dummy_schema = Arc::new(Schema::new(vec![Field::new("_", DataType::Null, true)]));
let col = new_null_array(&DataType::Null, 1);
Ok(RecordBatch::try_new(dummy_schema, vec![col])?)
}

fn can_evaluate_as_constant(expr: &Arc<dyn PhysicalExpr>) -> bool {
let mut can_evaluate = true;

Expand All @@ -93,21 +169,11 @@ fn can_evaluate_as_constant(expr: &Arc<dyn PhysicalExpr>) -> bool {
can_evaluate
}

/// Create a 1-row dummy RecordBatch for evaluating constant expressions.
///
/// The batch is never actually accessed for data - it's just needed because
/// the PhysicalExpr::evaluate API requires a RecordBatch. For expressions
/// that only contain literals, the batch content is irrelevant.
///
/// This is the same approach used in the logical expression `ConstEvaluator`.
pub(crate) fn create_dummy_batch() -> Result<RecordBatch> {
// RecordBatch requires at least one column
let dummy_schema = Arc::new(Schema::new(vec![Field::new("_", DataType::Null, true)]));
let col = new_null_array(&DataType::Null, 1);
Ok(RecordBatch::try_new(dummy_schema, vec![col])?)
}

/// Check if this expression has any column references.
#[deprecated(
since = "53.0.0",
note = "This function isn't used internally and is trivial to implement, therefore it will be removed in a future release."
)]
pub fn has_column_references(expr: &Arc<dyn PhysicalExpr>) -> bool {
let mut has_columns = false;
expr.apply(|expr| {
Expand Down
9 changes: 4 additions & 5 deletions datafusion/physical-expr/src/simplifier/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,7 @@ use std::sync::Arc;
use crate::{
PhysicalExpr,
simplifier::{
const_evaluator::{create_dummy_batch, simplify_const_expr_with_dummy},
not::simplify_not_expr,
unwrap_cast::unwrap_cast_in_comparison,
const_evaluator::create_dummy_batch, unwrap_cast::unwrap_cast_in_comparison,
},
};

Expand Down Expand Up @@ -67,10 +65,11 @@ impl<'a> PhysicalExprSimplifier<'a> {

// Apply NOT expression simplification first, then unwrap cast optimization,
// then constant expression evaluation
let rewritten = simplify_not_expr(node, schema)?
#[expect(deprecated, reason = "`simplify_not_expr` is marked as deprecated until it's made private.")]
let rewritten = not::simplify_not_expr(node, schema)?
.transform_data(|node| unwrap_cast_in_comparison(node, schema))?
.transform_data(|node| {
simplify_const_expr_with_dummy(node, &batch)
const_evaluator::simplify_const_expr_immediate(node, &batch)
})?;

#[cfg(debug_assertions)]
Expand Down
4 changes: 4 additions & 0 deletions datafusion/physical-expr/src/simplifier/not.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ use crate::expressions::{BinaryExpr, InListExpr, Literal, NotExpr, in_list, lit}
/// This function applies a single simplification rule and returns. When used with
/// TreeNodeRewriter, multiple passes will automatically be applied until no more
/// transformations are possible.
#[deprecated(
since = "53.0.0",
note = "This function will be made private in a future release, please file an issue if you have a reason for keeping it public."
)]
pub fn simplify_not_expr(
expr: Arc<dyn PhysicalExpr>,
schema: &Schema,
Expand Down