Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1466,6 +1466,21 @@ impl TreeNodeRewriter for Simplifier<'_> {
}))
}

// CASE [expr] WHEN ... THEN A WHEN ... THEN A ... ELSE A END --> A
// All branches (THEN expressions and ELSE) must be identical.
// ELSE clause must be present (otherwise unmatched cases return NULL).
Expr::Case(Case {
expr: _,
when_then_expr,
else_expr: Some(else_expr),
}) if !when_then_expr.is_empty()
&& when_then_expr
.iter()
.all(|(_, then_expr)| then_expr.as_ref() == else_expr.as_ref()) =>
{
Transformed::yes(*else_expr)
}

// CASE
// WHEN X THEN A
// WHEN Y THEN B
Expand Down
12 changes: 6 additions & 6 deletions datafusion/sqllogictest/test_files/cse.slt
Original file line number Diff line number Diff line change
Expand Up @@ -188,12 +188,12 @@ EXPLAIN SELECT
FROM t1
----
logical_plan
01)Projection: (__common_expr_1 OR random() = Float64(0)) AND __common_expr_2 AS c1, __common_expr_2 AND random() = Float64(0) OR __common_expr_1 AS c2, CASE WHEN __common_expr_3 = Float64(0) THEN __common_expr_3 ELSE Float64(0) END AS c3, CASE WHEN __common_expr_4 = Float64(0) THEN Int64(0) WHEN CAST(__common_expr_4 AS Boolean) THEN Int64(0) ELSE Int64(0) END AS c4, CASE WHEN __common_expr_5 = Float64(0) THEN Float64(0) WHEN random() = Float64(0) THEN __common_expr_5 ELSE Float64(0) END AS c5, CASE WHEN __common_expr_6 = Float64(0) THEN Float64(0) ELSE __common_expr_6 END AS c6
02)--Projection: t1.a = Float64(1) AS __common_expr_1, t1.a = Float64(2) AS __common_expr_2, t1.a + Float64(3) AS __common_expr_3, t1.a + Float64(4) AS __common_expr_4, t1.a + Float64(5) AS __common_expr_5, t1.a + Float64(6) AS __common_expr_6
01)Projection: (__common_expr_1 OR random() = Float64(0)) AND __common_expr_2 AS c1, __common_expr_2 AND random() = Float64(0) OR __common_expr_1 AS c2, CASE WHEN __common_expr_3 = Float64(0) THEN __common_expr_3 ELSE Float64(0) END AS c3, Int64(0) AS c4, CASE WHEN __common_expr_4 = Float64(0) THEN Float64(0) WHEN random() = Float64(0) THEN __common_expr_4 ELSE Float64(0) END AS c5, CASE WHEN __common_expr_5 = Float64(0) THEN Float64(0) ELSE __common_expr_5 END AS c6
02)--Projection: t1.a = Float64(1) AS __common_expr_1, t1.a = Float64(2) AS __common_expr_2, t1.a + Float64(3) AS __common_expr_3, t1.a + Float64(5) AS __common_expr_4, t1.a + Float64(6) AS __common_expr_5
03)----TableScan: t1 projection=[a]
physical_plan
01)ProjectionExec: expr=[(__common_expr_1@0 OR random() = 0) AND __common_expr_2@1 as c1, __common_expr_2@1 AND random() = 0 OR __common_expr_1@0 as c2, CASE WHEN __common_expr_3@2 = 0 THEN __common_expr_3@2 ELSE 0 END as c3, CASE WHEN __common_expr_4@3 = 0 THEN 0 WHEN CAST(__common_expr_4@3 AS Boolean) THEN 0 ELSE 0 END as c4, CASE WHEN __common_expr_5@4 = 0 THEN 0 WHEN random() = 0 THEN __common_expr_5@4 ELSE 0 END as c5, CASE WHEN __common_expr_6@5 = 0 THEN 0 ELSE __common_expr_6@5 END as c6]
02)--ProjectionExec: expr=[a@0 = 1 as __common_expr_1, a@0 = 2 as __common_expr_2, a@0 + 3 as __common_expr_3, a@0 + 4 as __common_expr_4, a@0 + 5 as __common_expr_5, a@0 + 6 as __common_expr_6]
01)ProjectionExec: expr=[(__common_expr_1@0 OR random() = 0) AND __common_expr_2@1 as c1, __common_expr_2@1 AND random() = 0 OR __common_expr_1@0 as c2, CASE WHEN __common_expr_3@2 = 0 THEN __common_expr_3@2 ELSE 0 END as c3, 0 as c4, CASE WHEN __common_expr_4@3 = 0 THEN 0 WHEN random() = 0 THEN __common_expr_4@3 ELSE 0 END as c5, CASE WHEN __common_expr_5@4 = 0 THEN 0 ELSE __common_expr_5@4 END as c6]
02)--ProjectionExec: expr=[a@0 = 1 as __common_expr_1, a@0 = 2 as __common_expr_2, a@0 + 3 as __common_expr_3, a@0 + 5 as __common_expr_4, a@0 + 6 as __common_expr_5]
03)----DataSourceExec: partitions=1, partition_sizes=[0]

# Surely only once but also conditionally evaluated subexpressions
Expand Down Expand Up @@ -226,8 +226,8 @@ EXPLAIN SELECT
FROM t1
----
logical_plan
01)Projection: (random() = Float64(0) OR t1.a = Float64(1)) AND t1.a = Float64(2) AS c1, random() = Float64(0) AND t1.a = Float64(2) OR t1.a = Float64(1) AS c2, CASE WHEN random() = Float64(0) THEN t1.a + Float64(3) ELSE t1.a + Float64(3) END AS c3, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN t1.a + Float64(4) = Float64(0) THEN t1.a + Float64(4) ELSE Float64(0) END AS c4, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN t1.a + Float64(5) = Float64(0) THEN Float64(0) ELSE t1.a + Float64(5) END AS c5, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN random() = Float64(0) THEN t1.a + Float64(6) ELSE t1.a + Float64(6) END AS c6
01)Projection: (random() = Float64(0) OR t1.a = Float64(1)) AND t1.a = Float64(2) AS c1, random() = Float64(0) AND t1.a = Float64(2) OR t1.a = Float64(1) AS c2, t1.a + Float64(3) AS c3, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN t1.a + Float64(4) = Float64(0) THEN t1.a + Float64(4) ELSE Float64(0) END AS c4, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN t1.a + Float64(5) = Float64(0) THEN Float64(0) ELSE t1.a + Float64(5) END AS c5, CASE WHEN random() = Float64(0) THEN Float64(0) WHEN random() = Float64(0) THEN t1.a + Float64(6) ELSE t1.a + Float64(6) END AS c6
02)--TableScan: t1 projection=[a]
physical_plan
01)ProjectionExec: expr=[(random() = 0 OR a@0 = 1) AND a@0 = 2 as c1, random() = 0 AND a@0 = 2 OR a@0 = 1 as c2, CASE WHEN random() = 0 THEN a@0 + 3 ELSE a@0 + 3 END as c3, CASE WHEN random() = 0 THEN 0 WHEN a@0 + 4 = 0 THEN a@0 + 4 ELSE 0 END as c4, CASE WHEN random() = 0 THEN 0 WHEN a@0 + 5 = 0 THEN 0 ELSE a@0 + 5 END as c5, CASE WHEN random() = 0 THEN 0 WHEN random() = 0 THEN a@0 + 6 ELSE a@0 + 6 END as c6]
01)ProjectionExec: expr=[(random() = 0 OR a@0 = 1) AND a@0 = 2 as c1, random() = 0 AND a@0 = 2 OR a@0 = 1 as c2, a@0 + 3 as c3, CASE WHEN random() = 0 THEN 0 WHEN a@0 + 4 = 0 THEN a@0 + 4 ELSE 0 END as c4, CASE WHEN random() = 0 THEN 0 WHEN a@0 + 5 = 0 THEN 0 ELSE a@0 + 5 END as c5, CASE WHEN random() = 0 THEN 0 WHEN random() = 0 THEN a@0 + 6 ELSE a@0 + 6 END as c6]
02)--DataSourceExec: partitions=1, partition_sizes=[0]
18 changes: 18 additions & 0 deletions datafusion/sqllogictest/test_files/simplify_expr.slt
Original file line number Diff line number Diff line change
Expand Up @@ -113,3 +113,21 @@ logical_plan
physical_plan
01)ProjectionExec: expr=[[{x:100}] as a]
02)--PlaceholderRowExec

# Simplify cases where the end expressions are the same to that expression
query TT
EXPLAIN SELECT
CASE v when 100 then 1 else 1 end as opt1,
CASE v when 200 then 2 when 201 then 2 else 2 end as opt2,
CASE v when 300 then 3 when 301 then 3 else 4 end as noopt1,
CASE v when 400 then 4 when 401 then 4 end as noopt2
FROM (VALUES (0), (1), (2)) t(v)
----
logical_plan
01)Projection: Int64(1) AS opt1, Int64(2) AS opt2, CASE t.v WHEN Int64(300) THEN Int64(3) WHEN Int64(301) THEN Int64(3) ELSE Int64(4) END AS noopt1, CASE t.v WHEN Int64(400) THEN Int64(4) WHEN Int64(401) THEN Int64(4) END AS noopt2
02)--SubqueryAlias: t
03)----Projection: column1 AS v
04)------Values: (Int64(0)), (Int64(1)), (Int64(2))
physical_plan
01)ProjectionExec: expr=[1 as opt1, 2 as opt2, CASE column1@0 WHEN 300 THEN 3 WHEN 301 THEN 3 ELSE 4 END as noopt1, CASE column1@0 WHEN 400 THEN 4 WHEN 401 THEN 4 END as noopt2]
02)--DataSourceExec: partitions=1, partition_sizes=[1]
Loading