chore: remove squash_selections from sqlglot_ir (#1833)

chelsea-lin · web-flow · commit 0fffc497d426 · 2025-06-18T22:23:09.000Z
diff --git a/bigframes/core/compile/googlesql/query.py b/bigframes/core/compile/googlesql/query.py
@@ -83,7 +83,13 @@ def _select_field(self, field) -> SelectExpression:
             return SelectExpression(expression=expr.ColumnExpression(name=field))
 
         else:
-            alias = field[1] if (field[0] != field[1]) else None
+            alias = (
+                expr.AliasExpression(field[1])
+                if isinstance(field[1], str)
+                else field[1]
+                if (field[0] != field[1])
+                else None
+            )
             return SelectExpression(
                 expression=expr.ColumnExpression(name=field[0]), alias=alias
             )
diff --git a/bigframes/core/compile/sqlglot/compiler.py b/bigframes/core/compile/sqlglot/compiler.py
@@ -125,10 +125,7 @@ def _compile_result_node(self, root: nodes.ResultNode) -> str:
             (name, scalar_compiler.compile_scalar_expression(ref))
             for ref, name in root.output_cols
         )
-        # Skip squashing selections to ensure the right ordering and limit keys
-        sqlglot_ir = self.compile_node(root.child).select(
-            selected_cols, squash_selections=False
-        )
+        sqlglot_ir = self.compile_node(root.child).select(selected_cols)
 
         if root.order_by is not None:
             ordering_cols = tuple(
diff --git a/bigframes/core/compile/sqlglot/sqlglot_ir.py b/bigframes/core/compile/sqlglot/sqlglot_ir.py
@@ -203,7 +203,6 @@ def from_union(
     def select(
         self,
         selected_cols: tuple[tuple[str, sge.Expression], ...],
-        squash_selections: bool = True,
     ) -> SQLGlotIR:
         selections = [
             sge.Alias(
@@ -213,15 +212,6 @@ def select(
             for id, expr in selected_cols
         ]
 
-        # If squashing is enabled, we try to simplify the selections
-        # by checking if the new selections are simply aliases of the
-        # original columns.
-        if squash_selections:
-            new_selections = _squash_selections(self.expr.expressions, selections)
-            if new_selections != []:
-                new_expr = self.expr.select(*new_selections, append=False)
-                return SQLGlotIR(expr=new_expr, uid_gen=self.uid_gen)
-
         new_expr = self._encapsulate_as_cte().select(*selections, append=False)
         return SQLGlotIR(expr=new_expr, uid_gen=self.uid_gen)
 
@@ -361,63 +351,3 @@ def _table(table: bigquery.TableReference) -> sge.Table:
         db=sg.to_identifier(table.dataset_id, quoted=True),
         catalog=sg.to_identifier(table.project, quoted=True),
     )
-
-
-def _squash_selections(
-    old_expr: list[sge.Expression], new_expr: list[sge.Alias]
-) -> list[sge.Alias]:
-    """
-    TODO: Reanble this function to optimize the SQL.
-    Simplifies the select column expressions if existing (old_expr) and
-    new (new_expr) selected columns are both simple aliases of column definitions.
-
-    Example:
-    old_expr: [A AS X, B AS Y]
-    new_expr: [X AS P, Y AS Q]
-    Result:   [A AS P, B AS Q]
-    """
-    old_alias_map: typing.Dict[str, str] = {}
-    for selected in old_expr:
-        column_alias_pair = _get_column_alias_pair(selected)
-        if column_alias_pair is None:
-            return []
-        else:
-            old_alias_map[column_alias_pair[1]] = column_alias_pair[0]
-
-    new_selected_cols: typing.List[sge.Alias] = []
-    for selected in new_expr:
-        column_alias_pair = _get_column_alias_pair(selected)
-        if column_alias_pair is None or column_alias_pair[0] not in old_alias_map:
-            return []
-        else:
-            new_alias_expr = sge.Alias(
-                this=sge.ColumnDef(
-                    this=sge.to_identifier(
-                        old_alias_map[column_alias_pair[0]], quoted=True
-                    )
-                ),
-                alias=sg.to_identifier(column_alias_pair[1], quoted=True),
-            )
-            new_selected_cols.append(new_alias_expr)
-    return new_selected_cols
-
-
-def _get_column_alias_pair(
-    expr: sge.Expression,
-) -> typing.Optional[typing.Tuple[str, str]]:
-    """Checks if an expression is a simple alias of a column definition
-    (e.g., "column_name AS alias_name").
-    If it is, returns a tuple containing the alias name and original column name.
-    Returns `None` otherwise.
-    """
-    if not isinstance(expr, sge.Alias):
-        return None
-    if not isinstance(expr.this, sge.ColumnDef):
-        return None
-
-    column_def_expr: sge.ColumnDef = expr.this
-    if not isinstance(column_def_expr.this, sge.Identifier):
-        return None
-
-    original_identifier: sge.Identifier = column_def_expr.this
-    return (original_identifier.this, expr.alias)
diff --git a/bigframes/core/rewrite/pruning.py b/bigframes/core/rewrite/pruning.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 import dataclasses
 import functools
-from typing import AbstractSet
+import typing
 
 from bigframes.core import identifiers, nodes
 
@@ -143,7 +143,7 @@ def prune_selection_child(
 
 def prune_node(
     node: nodes.BigFrameNode,
-    ids: AbstractSet[identifiers.ColumnId],
+    ids: typing.AbstractSet[identifiers.ColumnId],
 ):
     # This clause is important, ensures idempotency, so can reach fixed point
     if not (set(node.ids) - ids):
@@ -157,7 +157,7 @@ def prune_node(
 
 def prune_aggregate(
     node: nodes.AggregateNode,
-    used_cols: AbstractSet[identifiers.ColumnId],
+    used_cols: typing.AbstractSet[identifiers.ColumnId],
 ) -> nodes.AggregateNode:
     pruned_aggs = (
         tuple(agg for agg in node.aggregations if agg[1] in used_cols)
@@ -169,15 +169,15 @@ def prune_aggregate(
 @functools.singledispatch
 def prune_leaf(
     node: nodes.BigFrameNode,
-    used_cols: AbstractSet[identifiers.ColumnId],
+    used_cols: typing.AbstractSet[identifiers.ColumnId],
 ):
     ...
 
 
 @prune_leaf.register
 def prune_readlocal(
     node: nodes.ReadLocalNode,
-    selection: AbstractSet[identifiers.ColumnId],
+    selection: typing.AbstractSet[identifiers.ColumnId],
 ) -> nodes.ReadLocalNode:
     new_scan_list = node.scan_list.filter_cols(selection)
     return dataclasses.replace(
@@ -190,7 +190,7 @@ def prune_readlocal(
 @prune_leaf.register
 def prune_readtable(
     node: nodes.ReadTableNode,
-    selection: AbstractSet[identifiers.ColumnId],
+    selection: typing.AbstractSet[identifiers.ColumnId],
 ) -> nodes.ReadTableNode:
     new_scan_list = node.scan_list.filter_cols(selection)
     return dataclasses.replace(node, scan_list=new_scan_list)