Skip to content

Commit e3fee4d

Browse files
feat(snowflake): transpile DuckDB list() and list_distinct() [CLAUDE] (#7551)
Map DuckDB's list(col) to Snowflake's ARRAY_AGG(col) via a rename_func entry in the Snowflake generator TRANSFORMS. Parse DuckDB's list_distinct(col) into exp.ArrayDistinct by adding the missing FUNCTIONS entry in the DuckDB parser. On the Snowflake side, add an arraydistinct_sql method that accounts for NULL semantics: DuckDB's LIST_DISTINCT strips NULLs while Snowflake's ARRAY_DISTINCT preserves them, so the generated SQL wraps with ARRAY_COMPACT when the source doesn't preserve NULLs (check_null=False).
1 parent 63f8dc6 commit e3fee4d

4 files changed

Lines changed: 35 additions & 0 deletions

File tree

sqlglot/generators/snowflake.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -511,6 +511,7 @@ class SnowflakeGenerator(generator.Generator):
511511
exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")(
512512
rename_func("EDITDISTANCE")
513513
),
514+
exp.List: rename_func("ARRAY_AGG"),
514515
exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}",
515516
exp.LogicalAnd: rename_func("BOOLAND_AGG"),
516517
exp.LogicalOr: rename_func("BOOLOR_AGG"),
@@ -1048,6 +1049,11 @@ def arrayagg_sql(self, expression: exp.ArrayAgg) -> str:
10481049

10491050
return expr_sql
10501051

1052+
def arraydistinct_sql(self, expression: exp.ArrayDistinct) -> str:
1053+
if expression.args.get("check_null"):
1054+
return self.func("ARRAY_DISTINCT", expression.this)
1055+
return self.func("ARRAY_DISTINCT", exp.ArrayCompact(this=expression.this))
1056+
10511057
def arraytostring_sql(self, expression: exp.ArrayToString) -> str:
10521058
return self.func("ARRAY_TO_STRING", expression.this, expression.expression)
10531059

sqlglot/parsers/duckdb.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ class DuckDBParser(parser.Parser):
133133
"JSON_ARRAY": lambda args: exp.JSONArray(expressions=args),
134134
"JSON_EXTRACT_PATH": parser.build_extract_json_with_path(exp.JSONExtract),
135135
"JSON_EXTRACT_STRING": parser.build_extract_json_with_path(exp.JSONExtractScalar),
136+
"LIST_DISTINCT": exp.ArrayDistinct.from_arg_list,
136137
"LIST_APPEND": exp.ArrayAppend.from_arg_list,
137138
"LIST_CONCAT": parser.build_array_concat,
138139
"LIST_CONTAINS": exp.ArrayContains.from_arg_list,

tests/dialects/test_duckdb.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -690,6 +690,20 @@ def test_duckdb(self):
690690
"SELECT LIST(DISTINCT sample_col) FILTER(WHERE NOT sample_col IS NULL) FROM sample_table",
691691
read={"spark": "SELECT COLLECT_SET(sample_col) FROM sample_table"},
692692
)
693+
self.validate_all(
694+
"SELECT LIST(col) FROM t",
695+
write={
696+
"duckdb": "SELECT LIST(col) FROM t",
697+
"snowflake": "SELECT ARRAY_AGG(col) FROM t",
698+
},
699+
)
700+
self.validate_all(
701+
"SELECT LIST_DISTINCT(col)",
702+
write={
703+
"duckdb": "SELECT LIST_DISTINCT(col)",
704+
"snowflake": "SELECT ARRAY_DISTINCT(ARRAY_COMPACT(col))",
705+
},
706+
)
693707
self.validate_all(
694708
"SELECT LIST_TRANSFORM(STR_SPLIT_REGEX('abc , dfg ', ','), x -> TRIM(x))",
695709
write={

tests/dialects/test_snowflake.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -675,6 +675,20 @@ def test_snowflake(self):
675675
"duckdb": "SELECT LIST(DISTINCT col) FILTER(WHERE NOT col IS NULL) OVER (PARTITION BY grp) FROM t"
676676
},
677677
)
678+
self.validate_all(
679+
"SELECT ARRAY_AGG(col) FROM t",
680+
write={
681+
"snowflake": "SELECT ARRAY_AGG(col) FROM t",
682+
"duckdb": "SELECT ARRAY_AGG(col) FILTER(WHERE col IS NOT NULL) FROM t",
683+
},
684+
)
685+
self.validate_all(
686+
"SELECT ARRAY_DISTINCT(col)",
687+
write={
688+
"snowflake": "SELECT ARRAY_DISTINCT(col)",
689+
"duckdb": "SELECT CASE WHEN ARRAY_LENGTH(col) <> LIST_COUNT(col) THEN LIST_APPEND(LIST_DISTINCT(LIST_FILTER(col, _u -> NOT _u IS NULL)), NULL) ELSE LIST_DISTINCT(col) END",
690+
},
691+
)
678692
self.validate_identity("SELECT ARRAY_APPEND([1, 2, 3], 4)")
679693
self.validate_identity("SELECT ARRAY_CAT([1, 2], [3, 4])")
680694
self.validate_identity("SELECT ARRAY_PREPEND([2, 3, 4], 1)")

0 commit comments

Comments
 (0)