Skip to content

Commit 8f572f8

Browse files
authored
fix(optimizer)!: qualify UNPIVOT on CTE sources (#7550)
Two fixes that together let `SELECT * FROM cte UNPIVOT(...)` qualify correctly in BigQuery and other dialects. - parser: `UNPIVOT`'s pre-`FOR` value column(s) and the `FOR` field are now parsed as `Identifier` (was `Column`). Those names don't reference existing columns — they're new output names. The `IN`-list items stay as `Column` since they do reference source-table columns. `PIVOT` is unchanged. - optimizer/resolver: when a CTE is pivoted, the scope stores an `exp.Table` under the pivot alias rather than the CTE's `Scope`, so column resolution couldn't see the CTE's columns. Fall back to `scope.cte_sources` in that case. Guarded on `not source.db` and `source.args.get("pivots")` so a real `db.x` that happens to share a name with a CTE doesn't misroute through the CTE scope. - optimizer/qualify_columns: removes the post-hoc filter in `validate_qualify_columns` that excluded unpivot output names from `scope.unqualified_columns` — they're `Identifier`s now and never land there. `_unpivot_columns` yields `Identifier`s; `output_name` still works. Test: new assertion in `test_qualify_columns` covering the CTE + `UNPIVOT` shape.
1 parent e3fee4d commit 8f572f8

4 files changed

Lines changed: 49 additions & 10 deletions

File tree

sqlglot/optimizer/qualify_columns.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -138,13 +138,6 @@ def validate_qualify_columns(expression: E, sql: str | None = None) -> E:
138138

139139
raise OptimizeError(error_msg)
140140

141-
if unqualified_columns and scope.pivots and scope.pivots[0].unpivot:
142-
# New columns produced by the UNPIVOT can't be qualified, but there may be columns
143-
# under the UNPIVOT's IN clause that can and should be qualified. We recompute
144-
# this list here to ensure those in the former category will be excluded.
145-
unpivot_columns = set(_unpivot_columns(scope.pivots[0]))
146-
unqualified_columns = [c for c in unqualified_columns if c not in unpivot_columns]
147-
148141
all_unqualified_columns.extend(unqualified_columns)
149142

150143
if all_unqualified_columns:
@@ -188,13 +181,18 @@ def _separate_pseudocolumns(scope: Scope, pseudocolumns: set[str]) -> None:
188181
scope.clear_cache()
189182

190183

191-
def _unpivot_columns(unpivot: exp.Pivot) -> Iterator[exp.Column]:
184+
def _unpivot_columns(unpivot: exp.Pivot) -> Iterator[exp.Identifier]:
192185
name_columns = [
193186
field.this
194187
for field in unpivot.fields
195-
if isinstance(field, exp.In) and isinstance(field.this, exp.Column)
188+
if isinstance(field, exp.In) and isinstance(field.this, exp.Identifier)
196189
]
197-
value_columns = (c for e in unpivot.expressions for c in e.find_all(exp.Column))
190+
value_columns = (
191+
ident
192+
for e in unpivot.expressions
193+
for ident in (e.expressions if isinstance(e, exp.Tuple) else [e])
194+
if isinstance(ident, exp.Identifier)
195+
)
198196

199197
return itertools.chain(name_columns, value_columns)
200198

sqlglot/optimizer/resolver.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,17 @@ def get_source_columns(self, name: str, only_visible: bool = False) -> Sequence[
139139

140140
source = self.scope.sources[name]
141141

142+
# A pivoted CTE reference is stored as an exp.Table in the scope sources (see
143+
# _traverse_tables in scope.py), but the underlying CTE Scope still holds the
144+
# column information we need to resolve pre-pivot columns.
145+
if (
146+
isinstance(source, exp.Table)
147+
and not source.db
148+
and source.args.get("pivots")
149+
and source.name in self.scope.cte_sources
150+
):
151+
source = self.scope.cte_sources[source.name]
152+
142153
if isinstance(source, exp.Table):
143154
columns = self.schema.column_names(source, only_visible)
144155
elif isinstance(source, Scope) and isinstance(

sqlglot/parser.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,15 @@ def _resolve_dialect(dialect: DialectType) -> Dialect:
275275
return Dialect.get_or_raise(dialect)
276276

277277

278+
def _unpivot_target(expr: exp.Expr) -> exp.Expr:
279+
# UNPIVOT's pre-FOR values and FOR field are new output names, not column references.
280+
if isinstance(expr, exp.Column) and not expr.table:
281+
return expr.this
282+
if isinstance(expr, exp.Tuple):
283+
expr.set("expressions", [_unpivot_target(e) for e in expr.expressions])
284+
return expr
285+
286+
278287
SENTINEL_NONE: Token = Token(TokenType.SENTINEL, "SENTINEL")
279288

280289

@@ -5142,6 +5151,12 @@ def _parse_pivot(self) -> exp.Pivot | None:
51425151
)
51435152
)
51445153

5154+
if unpivot:
5155+
pivot.set("expressions", [_unpivot_target(e) for e in pivot.expressions])
5156+
for pivot_field in pivot.fields:
5157+
if isinstance(pivot_field, exp.In):
5158+
pivot_field.set("this", _unpivot_target(pivot_field.this))
5159+
51455160
if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False):
51465161
pivot.set("alias", self._parse_table_alias())
51475162

tests/test_optimizer.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -632,6 +632,21 @@ def test_qualify_columns(self, logger):
632632
"SELECT t.end AS end FROM t AS t",
633633
)
634634

635+
self.assertEqual(
636+
optimizer.qualify.qualify(
637+
parse_one(
638+
"WITH produce AS (SELECT 'Kale' AS product, 51 AS q1, 23 AS q2) "
639+
"SELECT * FROM produce UNPIVOT(sales FOR quarter IN (q1, q2))",
640+
dialect="bigquery",
641+
),
642+
dialect="bigquery",
643+
).sql(dialect="bigquery"),
644+
"WITH `produce` AS (SELECT 'Kale' AS `product`, 51 AS `q1`, 23 AS `q2`) "
645+
"SELECT `produce`.`product` AS `product`, `produce`.`quarter` AS `quarter`, "
646+
"`produce`.`sales` AS `sales` FROM `produce` AS `produce` "
647+
"UNPIVOT(`sales` FOR `quarter` IN (`produce`.`q1`, `produce`.`q2`)) AS `produce`",
648+
)
649+
635650
def test_validate_columns(self):
636651
with self.assertRaisesRegex(
637652
OptimizeError, "Column 'foo' could not be resolved. Line: 1, Col: 10"

0 commit comments

Comments
 (0)