fix(optimizer): qualify (UN)PIVOT on CTE sources (#7560)

georgesittas · web-flow · commit cf0ffd99ef27 · 2026-04-24T20:40:57.000+03:00
* fix(optimizer): qualify (UN)PIVOT on CTE sources

  Fixes `SELECT * FROM cte UNPIVOT(...)` and `SELECT alias.col FROM cte
  UNPIVOT(...) AS alias` in bigquery and other dialects. Both cases used
  to error or silently skip star expansion because column resolution for a
  pivoted CTE went to the wrong place.

  Background: when a CTE is referenced with a pivot, `scope.py` stores the
  pivoted `exp.Table` under the pivot alias (not the CTE's `Scope`) so the
  pivot is treated as a new logical source. But the Table has no schema
  entry for the CTE name, so column lookups returned `[]`.

  Changes:

  - parser: UNPIVOT's pre-FOR value column(s) and the FOR field are now
    parsed as `Identifier` rather than `Column`. They're new output names,
    not references to existing columns. IN-list items stay as `Column`
    since those do reference source-table columns. PIVOT is unchanged.

  - optimizer/resolver: for pivoted-CTE sources (`Table` with pivots, no
    db qualifier, name matches a known CTE), fall back to the CTE's
    `Scope` to read pre-pivot columns. Enables star expansion.

  - optimizer/qualify_columns: when validating a column against a pivoted
    source, validate against the post-pivot column set rather than the
    pre-pivot source columns. Direct references like `u.val` pass, typos
    like `u.nonexistent` still error.

  - optimizer/qualify_columns: `_pivot_output_columns` helper factored out
    of `_expand_stars` and reused by the validator. Collapses ~18 lines
    of inline logic that computed excluded-vs-output column sets
    separately then combined them at the use site.

  - optimizer/qualify_columns: the `_unpivot_columns` filter in
    `validate_qualify_columns` is removed. With the parser change, unpivot
    output names are Identifiers and never enter `scope.unqualified_columns`,
    so the filter was dead code.

* Add multi-value/name test
diff --git a/sqlglot/optimizer/qualify_columns.py b/sqlglot/optimizer/qualify_columns.py
@@ -15,7 +15,7 @@
 
 if t.TYPE_CHECKING:
     from sqlglot._typing import E
-    from collections.abc import Iterator, Iterable
+    from collections.abc import Iterator, Iterable, Sequence
 
 
 def qualify_columns(
@@ -181,6 +181,33 @@ def _separate_pseudocolumns(scope: Scope, pseudocolumns: set[str]) -> None:
         scope.clear_cache()
 
 
+def _pivot_output_columns(pivot: exp.Pivot, pre_pivot_columns: Sequence[str]) -> list[str]:
+    """Compute the columns exposed after a (UN)PIVOT, given its pre-pivot source columns.
+
+    Returns an empty list for degenerate pivots (no IN-list or no output names) so callers
+    can fall through to their non-pivot handling.
+    """
+    if pivot.unpivot:
+        excluded = {
+            c.output_name
+            for field in pivot.fields
+            if isinstance(field, exp.In)
+            for e in field.expressions
+            for c in e.find_all(exp.Column)
+        }
+        outputs = [i.name for i in _unpivot_columns(pivot)]
+    else:
+        excluded = {c.output_name for c in pivot.find_all(exp.Column)}
+        outputs = [c.output_name for c in pivot.args.get("columns") or []]
+        if not outputs:
+            outputs = [c.alias_or_name for c in pivot.expressions]
+
+    if not excluded or not outputs:
+        return []
+
+    return [c for c in pre_pivot_columns if c not in excluded] + outputs
+
+
 def _unpivot_columns(unpivot: exp.Pivot) -> Iterator[exp.Identifier]:
     name_columns = [
         field.this
@@ -605,7 +632,13 @@ def _qualify_columns(
         column_name = column.name
 
         if column_table and column_table in scope.sources:
+            column_source = scope.sources[column_table]
             source_columns = resolver.get_source_columns(column_table)
+            # For pivoted sources, source_columns are pre-pivot; validate against the post-pivot set.
+            if isinstance(column_source, exp.Table) and (
+                pivots := column_source.args.get("pivots")
+            ):
+                source_columns = _pivot_output_columns(pivots[0], source_columns)
             if (
                 not allow_partial_qualification
                 and source_columns
@@ -782,26 +815,7 @@ def _expand_stars(
     coalesced_columns = set()
     dialect = resolver.dialect
 
-    pivot_output_columns = None
-    pivot_exclude_columns: set[str] = set()
-
     pivot = t.cast(t.Optional[exp.Pivot], seq_get(scope.pivots, 0))
-    if isinstance(pivot, exp.Pivot) and not pivot.alias_column_names:
-        if pivot.unpivot:
-            pivot_output_columns = [c.output_name for c in _unpivot_columns(pivot)]
-
-            for field in pivot.fields:
-                if isinstance(field, exp.In):
-                    pivot_exclude_columns.update(
-                        c.output_name for e in field.expressions for c in e.find_all(exp.Column)
-                    )
-
-        else:
-            pivot_exclude_columns = set(c.output_name for c in pivot.find_all(exp.Column))
-
-            pivot_output_columns = [c.output_name for c in pivot.args.get("columns", [])]
-            if not pivot_output_columns:
-                pivot_output_columns = [c.alias_or_name for c in pivot.expressions]
 
     if dialect.SUPPORTS_STRUCT_STAR_EXPANSION and any(
         isinstance(col, exp.Dot) for col in scope.stars
@@ -865,11 +879,7 @@ def _expand_stars(
             replaced_columns = replace_columns.get(table_id, {})
 
             if pivot:
-                if pivot_output_columns and pivot_exclude_columns:
-                    pivot_columns = [c for c in columns if c not in pivot_exclude_columns]
-                    pivot_columns.extend(pivot_output_columns)
-                else:
-                    pivot_columns = pivot.alias_column_names
+                pivot_columns = pivot.alias_column_names or _pivot_output_columns(pivot, columns)
 
                 if pivot_columns:
                     new_selections.extend(
diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py
@@ -647,6 +647,35 @@ def test_qualify_columns(self, logger):
             "UNPIVOT(`sales` FOR `quarter` IN (`produce`.`q1`, `produce`.`q2`)) AS `produce`",
         )
 
+        self.assertEqual(
+            optimizer.qualify.qualify(
+                parse_one(
+                    "WITH cte AS (SELECT 1 AS a, 2 AS b, 3 AS c) "
+                    "SELECT u.val, u.name FROM cte UNPIVOT(val FOR name IN (a, b, c)) AS u"
+                ),
+            ).sql(),
+            'WITH "cte" AS (SELECT 1 AS "a", 2 AS "b", 3 AS "c") '
+            'SELECT "u"."val" AS "val", "u"."name" AS "name" FROM "cte" AS "cte" '
+            'UNPIVOT("val" FOR "name" IN ("cte"."a", "cte"."b", "cte"."c")) AS "u"',
+        )
+
+        self.assertEqual(
+            optimizer.qualify.qualify(
+                parse_one(
+                    "WITH produce AS (SELECT 'Kale' AS product, 51 AS q1, 23 AS q2, 45 AS q3, 3 AS q4) "
+                    "SELECT * FROM produce UNPIVOT((first_half, second_half) FOR semesters "
+                    "IN ((q1, q2) AS 'h1', (q3, q4) AS 'h2'))",
+                    dialect="bigquery",
+                ),
+                dialect="bigquery",
+            ).sql(dialect="bigquery"),
+            "WITH `produce` AS (SELECT 'Kale' AS `product`, 51 AS `q1`, 23 AS `q2`, 45 AS `q3`, 3 AS `q4`) "
+            "SELECT `produce`.`product` AS `product`, `produce`.`semesters` AS `semesters`, "
+            "`produce`.`first_half` AS `first_half`, `produce`.`second_half` AS `second_half` "
+            "FROM `produce` AS `produce` UNPIVOT((`first_half`, `second_half`) FOR `semesters` "
+            "IN ((`produce`.`q1`, `produce`.`q2`) AS 'h1', (`produce`.`q3`, `produce`.`q4`) AS 'h2')) AS `produce`",
+        )
+
     def test_validate_columns(self):
         with self.assertRaisesRegex(
             OptimizeError, "Column 'foo' could not be resolved. Line: 1, Col: 10"