diff --git a/sqlglot/dialects/clickhouse.py b/sqlglot/dialects/clickhouse.py index 22211905f6..b9d18886d8 100644 --- a/sqlglot/dialects/clickhouse.py +++ b/sqlglot/dialects/clickhouse.py @@ -90,6 +90,7 @@ class Tokenizer(tokens.Tokenizer): "ENUM8": TokenType.ENUM8, "ENUM16": TokenType.ENUM16, "EXCHANGE": TokenType.COMMAND, + "EXPLAIN": TokenType.DESCRIBE, "FINAL": TokenType.FINAL, "FIXEDSTRING": TokenType.FIXEDSTRING, "FLOAT32": TokenType.FLOAT, @@ -121,6 +122,8 @@ class Tokenizer(tokens.Tokenizer): KEYWORDS.pop("/*+") + COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} + SINGLE_TOKENS = { **tokens.Tokenizer.SINGLE_TOKENS, "$": TokenType.HEREDOC_STRING, diff --git a/sqlglot/generators/clickhouse.py b/sqlglot/generators/clickhouse.py index dda2129383..83d5dbf334 100644 --- a/sqlglot/generators/clickhouse.py +++ b/sqlglot/generators/clickhouse.py @@ -479,6 +479,36 @@ def trycast_sql(self, expression: exp.TryCast) -> str: return super().cast_sql(expression) + def describe_sql(self, expression: exp.Describe) -> str: + keyword = "EXPLAIN" if expression.text("kind").upper() == "EXPLAIN" else "DESCRIBE" + style = expression.args.get("style") + style = f" {style}" if style else "" + expressions = self.expressions(expression, key="expressions", flat=True) + expressions = f" {expressions}" if keyword == "EXPLAIN" and expressions else "" + format = self.sql(expression, "format") + format = f" {format}" if format else "" + partition = self.sql(expression, "partition") + partition = f" {partition}" if partition else "" + as_json = " AS JSON" if expression.args.get("as_json") else "" + + return ( + f"{keyword}{style}{expressions}{format} {self.sql(expression, 'this')}" + f"{partition}{as_json}" + ) + + def show_sql(self, expression: exp.Show) -> str: + query = expression.args.get("query") + if isinstance(query, str): + query = f" {query.strip()}" if query.strip() else "" + return f"SHOW {expression.name}{query}" + + target = self.sql(expression, "target") + target = f" {target}" if target else "" + from_ = self.sql(expression, "from_") + from_ = f" FROM {from_}" if from_ else "" + + return f"SHOW {expression.name}{target}{from_}" + def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: this = self.json_path_part(expression.this) return str(int(this) + 1) if is_int(this) else this diff --git a/sqlglot/parsers/clickhouse.py b/sqlglot/parsers/clickhouse.py index a65b16c35c..20eee982fa 100644 --- a/sqlglot/parsers/clickhouse.py +++ b/sqlglot/parsers/clickhouse.py @@ -13,6 +13,7 @@ ) from sqlglot.helper import seq_get from sqlglot.tokens import Token, TokenType +from sqlglot.trie import new_trie from builtins import type as Type if t.TYPE_CHECKING: @@ -72,6 +73,15 @@ def _build_split(exp_class: Type[E]) -> t.Callable[[list], E]: ) +def _show_parser( + *args: t.Any, **kwargs: t.Any +) -> t.Callable[[ClickHouseParser], exp.Show | exp.Command]: + def _parse(self: ClickHouseParser) -> exp.Show | exp.Command: + return self._parse_show_clickhouse(*args, **kwargs) + + return _parse + + # Skip the 'week' unit since ClickHouse's toStartOfWeek # uses an extra mode argument to specify the first day of the week TIMESTAMP_TRUNC_UNITS = { @@ -456,11 +466,282 @@ def _resolve_clickhouse_agg(cls, name: str) -> tuple[str, Sequence[str]] | None: TokenType.L_BRACE: lambda self: self._parse_query_parameter(), } + EXPLAIN_STYLES = ( + "AST", + "ESTIMATE", + "PIPELINE", + "PLAN", + "SYNTAX", + ) + + SHOW_PARSERS = { + "ACCESS": _show_parser("ACCESS"), + "CHANGED SETTINGS": _show_parser("CHANGED SETTINGS"), + "CLUSTER": _show_parser("CLUSTER"), + "CLUSTERS": _show_parser("CLUSTERS"), + "COLUMNS": _show_parser("COLUMNS"), + "CREATE DATABASE": _show_parser("CREATE DATABASE"), + "CREATE DICTIONARY": _show_parser("CREATE DICTIONARY"), + "CREATE MASKING POLICY": _show_parser("CREATE MASKING POLICY"), + "CREATE POLICY": _show_parser("CREATE POLICY"), + "CREATE PROFILE": _show_parser("CREATE PROFILE"), + "CREATE QUOTA": _show_parser("CREATE QUOTA"), + "CREATE ROLE": _show_parser("CREATE ROLE"), + "CREATE ROW POLICY": _show_parser("CREATE ROW POLICY"), + "CREATE SETTINGS PROFILE": _show_parser("CREATE SETTINGS PROFILE"), + "CREATE TABLE": _show_parser("CREATE TABLE"), + "CREATE TEMPORARY TABLE": _show_parser("CREATE TEMPORARY TABLE"), + "CREATE USER": _show_parser("CREATE USER"), + "CREATE VIEW": _show_parser("CREATE VIEW"), + "CURRENT QUOTA": _show_parser("CURRENT QUOTA"), + "CURRENT ROLES": _show_parser("CURRENT ROLES"), + "DATABASE": _show_parser("DATABASE"), + "DATABASES": _show_parser("DATABASES"), + "DICTIONARIES": _show_parser("DICTIONARIES"), + "DICTIONARY": _show_parser("DICTIONARY"), + "ENABLED ROLES": _show_parser("ENABLED ROLES"), + "ENGINES": _show_parser("ENGINES"), + "EXTENDED COLUMNS": _show_parser("EXTENDED COLUMNS"), + "EXTENDED FULL COLUMNS": _show_parser("EXTENDED FULL COLUMNS"), + "EXTENDED INDEX": _show_parser("EXTENDED INDEX"), + "EXTENDED INDEXES": _show_parser("EXTENDED INDEXES"), + "EXTENDED INDICES": _show_parser("EXTENDED INDICES"), + "EXTENDED KEYS": _show_parser("EXTENDED KEYS"), + "FILESYSTEM CACHES": _show_parser("FILESYSTEM CACHES"), + "FULL COLUMNS": _show_parser("FULL COLUMNS"), + "FULL EXTENDED COLUMNS": _show_parser("FULL EXTENDED COLUMNS"), + "FULL TABLES": _show_parser("FULL TABLES"), + "FULL TEMPORARY TABLES": _show_parser("FULL TEMPORARY TABLES"), + "FUNCTIONS": _show_parser("FUNCTIONS"), + "GRANTS": _show_parser("GRANTS"), + "INDEX": _show_parser("INDEX"), + "INDEXES": _show_parser("INDEXES"), + "INDICES": _show_parser("INDICES"), + "KEYS": _show_parser("KEYS"), + "MERGES": _show_parser("MERGES"), + "POLICIES": _show_parser("POLICIES"), + "PROCESSLIST": _show_parser("PROCESSLIST"), + "PROFILES": _show_parser("PROFILES"), + "QUOTA": _show_parser("QUOTA"), + "QUOTAS": _show_parser("QUOTAS"), + "ROLES": _show_parser("ROLES"), + "ROW POLICIES": _show_parser("ROW POLICIES"), + "SETTING": _show_parser("SETTING"), + "SETTINGS": _show_parser("SETTINGS"), + "SETTINGS PROFILES": _show_parser("SETTINGS PROFILES"), + "TABLE": _show_parser("TABLE"), + "TABLES": _show_parser("TABLES"), + "TEMPORARY FULL TABLES": _show_parser("TEMPORARY FULL TABLES"), + "TEMPORARY TABLE": _show_parser("TEMPORARY TABLE"), + "TEMPORARY TABLES": _show_parser("TEMPORARY TABLES"), + "USERS": _show_parser("USERS"), + "VIEW": _show_parser("VIEW"), + } + STATEMENT_PARSERS = { **parser.Parser.STATEMENT_PARSERS, TokenType.DETACH: lambda self: self._parse_detach(), + TokenType.SHOW: lambda self: self._parse_show(), + } + + SHOW_TRIE = new_trie(key.split(" ") for key in SHOW_PARSERS) + + SHOW_NO_QUERY = { + "ACCESS", + "ENABLED ROLES", + "FILESYSTEM CACHES", + "SETTINGS PROFILES", + "USERS", + } + + SHOW_QUERY_STARTERS = { + "CHANGED SETTINGS": {"LIKE", "ILIKE"}, + "CLUSTERS": {"LIKE", "ILIKE", "NOT", "LIMIT"}, + "DATABASES": {"LIKE", "ILIKE", "NOT", "LIMIT", "INTO", "FORMAT"}, + "DICTIONARIES": {"FROM", "LIKE", "LIMIT", "INTO", "FORMAT"}, + "FUNCTIONS": {"LIKE", "ILIKE"}, + "MERGES": {"LIKE", "ILIKE", "NOT", "LIMIT"}, + "PROCESSLIST": {"INTO", "FORMAT"}, + "SETTINGS": {"LIKE", "ILIKE", "PROFILES"}, + "TABLES": {"FROM", "IN", "LIKE", "ILIKE", "NOT", "LIMIT", "INTO", "FORMAT"}, + } + + SHOW_IDENTIFIER_REQUIRED = { + "CLUSTER", + "CREATE DATABASE", + "CREATE DICTIONARY", + "CREATE MASKING POLICY", + "CREATE QUOTA", + "CREATE ROLE", + "CREATE ROW POLICY", + "CREATE SETTINGS PROFILE", + "CREATE TABLE", + "CREATE TEMPORARY TABLE", + "CREATE USER", + "CREATE VIEW", + "SETTING", } + SHOW_IDENTIFIER_OR_STARTERS = { + "COLUMNS": {"FROM", "IN"}, + "EXTENDED COLUMNS": {"FROM", "IN"}, + "EXTENDED FULL COLUMNS": {"FROM", "IN"}, + "EXTENDED INDEX": {"FROM", "IN"}, + "EXTENDED INDEXES": {"FROM", "IN"}, + "EXTENDED INDICES": {"FROM", "IN"}, + "EXTENDED KEYS": {"FROM", "IN"}, + "FULL COLUMNS": {"FROM", "IN"}, + "FULL EXTENDED COLUMNS": {"FROM", "IN"}, + "FULL TABLES": {"FROM", "IN", "LIKE", "ILIKE", "NOT", "LIMIT", "INTO", "FORMAT"}, + "FULL TEMPORARY TABLES": { + "FROM", + "IN", + "LIKE", + "ILIKE", + "NOT", + "LIMIT", + "INTO", + "FORMAT", + }, + "GRANTS": {"FOR", "WITH", "FINAL"}, + "INDEX": {"FROM", "IN"}, + "INDEXES": {"FROM", "IN"}, + "INDICES": {"FROM", "IN"}, + "KEYS": {"FROM", "IN"}, + "ROW POLICIES": {"ON"}, + "TABLE": {"FROM", "IN", "LIKE", "ILIKE", "NOT", "LIMIT", "INTO", "FORMAT"}, + "TEMPORARY FULL TABLES": { + "FROM", + "IN", + "LIKE", + "ILIKE", + "NOT", + "LIMIT", + "INTO", + "FORMAT", + }, + "TEMPORARY TABLE": {"FROM", "IN", "LIKE", "ILIKE", "NOT", "LIMIT", "INTO", "FORMAT"}, + "TEMPORARY TABLES": { + "FROM", + "IN", + "LIKE", + "ILIKE", + "NOT", + "LIMIT", + "INTO", + "FORMAT", + }, + } + + @staticmethod + def _first_show_token(query: str) -> str: + stripped = query.strip() + return stripped.split(None, 1)[0].upper() if stripped else "" + + @staticmethod + def _is_identifier_like(query: str) -> bool: + stripped = query.lstrip() + if not stripped: + return False + return bool(stripped[0].isalnum() or stripped[0] in ("_", "`", '"', "'", "(")) + + def _is_supported_show_query(self, this: str, query: str | None) -> bool: + if not query or not query.strip(): + return this not in self.SHOW_IDENTIFIER_REQUIRED + + if this in self.SHOW_NO_QUERY: + return False + + token = self._first_show_token(query) + + if this in self.SHOW_QUERY_STARTERS: + return token in self.SHOW_QUERY_STARTERS[this] + + if this in self.SHOW_IDENTIFIER_REQUIRED: + return self._is_identifier_like(query) + + if this in self.SHOW_IDENTIFIER_OR_STARTERS: + return token in self.SHOW_IDENTIFIER_OR_STARTERS[this] or self._is_identifier_like( + query + ) + + return True + + def _parse_show_clickhouse(self, this: str) -> exp.Show | exp.Command: + query = None + if self._curr: + query_start = self._curr + while self._curr: + self._advance() + query = self._find_sql(query_start, self._prev) + + if not self._is_supported_show_query(this, query): + expression = f" {this}{f' {query.strip()}' if query and query.strip() else ''}" + self._warn_unsupported() + return exp.Command(this="SHOW", expression=expression) + + return self.expression(exp.Show(this=this, query=query)) + + def _parse_explain_settings(self) -> list[exp.EQ] | None: + expressions = [] + + while True: + index = self._index + setting = self._parse_id_var() + + if not setting or not self._match(TokenType.EQ): + self._retreat(index) + break + + expressions.append( + self.expression(exp.EQ(this=setting, expression=self._parse_assignment())) + ) + + if not self._match(TokenType.COMMA): + break + + return expressions or None + + def _parse_describe(self) -> exp.Describe | exp.Command: # type: ignore[override] + if self._prev.text.upper() != "EXPLAIN": + return super()._parse_describe() + + start = self._prev + style = None + + if self._match_text_seq("QUERY", "TREE"): + style = "QUERY TREE" + elif self._match_text_seq("TABLE", "OVERRIDE"): + start = self._tokens[self._index - 2] + while self._curr: + self._advance() + text = self._find_sql(start, self._prev) + size = len(start.text) + return self.expression( + exp.Describe( + this=exp.Command(this=text[:size], expression=text[size:]), + kind="EXPLAIN", + ) + ) + elif self._match_texts(self.EXPLAIN_STYLES): + style = self._prev.text.upper() + + expressions = self._parse_explain_settings() + if self._match_set(self.STATEMENT_PARSERS, advance=False): + this = self._parse_statement() + else: + this = self._parse_select() + + if not this: + return self._parse_as_command(start) + + if self._curr and self._curr.token_type != TokenType.SEMICOLON: + return self._parse_as_command(start) + + return self.expression( + exp.Describe(this=this, kind="EXPLAIN", style=style, expressions=expressions) + ) + def _parse_wrapped_select_or_assignment(self) -> exp.Expr | None: return self._parse_wrapped( lambda: self._parse_select() or self._parse_assignment(), optional=True diff --git a/tests/dialects/test_clickhouse.py b/tests/dialects/test_clickhouse.py index dfcc0c2363..8326dc136a 100644 --- a/tests/dialects/test_clickhouse.py +++ b/tests/dialects/test_clickhouse.py @@ -734,6 +734,91 @@ def test_clickhouse(self): self.validate_identity("SELECT []") + def test_explain(self): + for sql, style in ( + ("EXPLAIN SELECT 1", None), + ("EXPLAIN ESTIMATE SELECT 1", "ESTIMATE"), + ("EXPLAIN AST SELECT 1", "AST"), + ("EXPLAIN SYNTAX SELECT 1", "SYNTAX"), + ("EXPLAIN QUERY TREE SELECT 1", "QUERY TREE"), + ("EXPLAIN PLAN SELECT 1", "PLAN"), + ("EXPLAIN PIPELINE SELECT 1", "PIPELINE"), + ("EXPLAIN json = 1, description = 0 SELECT 1", None), + ("EXPLAIN PLAN json = 1, description = 0 SELECT 1", "PLAN"), + ( + "EXPLAIN TABLE OVERRIDE mysql('127.0.0.1:3306', 'db', 'tbl', 'root', 'clickhouse') PARTITION BY toYYYYMM(assumeNotNull(created))", + None, + ), + ): + with self.subTest(sql=sql): + expression = self.validate_identity(sql) + self.assertIsInstance(expression, exp.Describe) + self.assertEqual(expression.text("kind"), "EXPLAIN") + self.assertEqual(expression.args.get("style"), style) + + for sql in ("EXPLAIN FOO SELECT 1",): + with self.subTest(sql=sql): + self.validate_identity(sql, check_command_warning=True).assert_is(exp.Command) + + def test_show(self): + for sql, write_sql in ( + ("SHOW TABLES", None), + ("SHOW TABLES FROM system", None), + ("SHOW TABLES IN system", None), + ("SHOW CREATE TABLE t", None), + ("SHOW CREATE TABLE db.t", None), + ): + with self.subTest(sql=sql): + self.validate_identity(sql, write_sql=write_sql).assert_is(exp.Show) + + for sql in ( + "SHOW FULL TABLES FROM system LIKE '%user%'", + "SHOW TABLES FROM system LIMIT 2", + "SHOW DATABASES ILIKE '%de%'", + "SHOW DATABASES NOT LIKE '%de%'", + "SHOW COLUMNS FROM t", + "SHOW EXTENDED FULL COLUMNS FROM tbl FROM db ILIKE '%x%' LIMIT 10 INTO OUTFILE 'f' FORMAT TSVRaw", + "SHOW DICTIONARIES", + "SHOW INDEX FROM tbl", + "SHOW EXTENDED KEYS FROM tbl FROM db WHERE a > 1 INTO OUTFILE 'x' FORMAT TSVRaw", + "SHOW PROCESSLIST INTO OUTFILE 'x' FORMAT TSVRaw", + "SHOW GRANTS FOR user1 WITH IMPLICIT FINAL", + "SHOW CREATE USER CURRENT_USER", + "SHOW CREATE ROLE role1", + "SHOW CREATE ROW POLICY p ON db.t", + "SHOW CREATE QUOTA CURRENT", + "SHOW CREATE SETTINGS PROFILE p", + "SHOW USERS", + "SHOW CURRENT ROLES", + "SHOW ENABLED ROLES", + "SHOW SETTINGS PROFILES", + "SHOW ROW POLICIES ON db.t", + "SHOW SETTINGS ILIKE '%timeout%'", + "SHOW CHANGED SETTINGS ILIKE '%memory%'", + "SHOW SETTING max_threads", + "SHOW FILESYSTEM CACHES", + "SHOW ENGINES", + "SHOW FUNCTIONS ILIKE 'to%'", + "SHOW MERGES LIKE 'your_t%' LIMIT 1", + "SHOW CLUSTERS LIKE 'test%' LIMIT 1", + "SHOW CLUSTER 'test_shard_localhost'", + "SHOW CREATE MASKING POLICY policy_name ON db.t", + "SHOW CREATE TABLE t FORMAT TSVRaw", + "SHOW CREATE TABLE t INTO OUTFILE 'x'", + ): + with self.subTest(sql=sql): + self.validate_identity(sql).assert_is(exp.Show) + + for sql in ( + "SHOW FOO BAR", + "SHOW TABLES FOO BAR", + "SHOW SETTINGS max_threads", + "SHOW ACCESS FOR user1", + "SHOW FUNCTIONS NOT LIKE 'to%'", + ): + with self.subTest(sql=sql): + self.validate_identity(sql, check_command_warning=True).assert_is(exp.Command) + def test_clickhouse_values(self): ast = self.parse_one("SELECT * FROM VALUES (1, 2, 3)") self.assertEqual(len(list(ast.find_all(exp.Tuple))), 4)