Skip to content

Commit 78d2b74

Browse files
Merge uuid branch: Add UUID_STRING transpilation support
feat(snowflake): add UUID_IS_STRING_TYPE flag and UUID_STRING parser - Add support for transpiling Snowflake UUID_STRING() to DuckDB UUID() - Introduce UUID_IS_STRING_TYPE flag for dialect-specific behavior - Add UuidV5 expression with template-based generation - Update integration tests All integration tests passing with no regressions. Related to fivetran/sqlglot-integration-tests#377
2 parents e883b0b + fc99ddc commit 78d2b74

7 files changed

Lines changed: 47 additions & 8 deletions

File tree

sqlglot/dialects/duckdb.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ class DuckDB(Dialect):
2424
SUPPORTS_FIXED_SIZE_ARRAYS = True
2525
STRICT_JSON_PATH_SYNTAX = False
2626
NUMBERS_CAN_BE_UNDERSCORE_SEPARATED = True
27+
UUID_IS_STRING_TYPE = False
2728

2829
# https://duckdb.org/docs/sql/introduction.html#creating-a-new-table
2930
NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE

sqlglot/dialects/presto.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ class Presto(Dialect):
2424
LOG_BASE_FIRST: bool | None = None
2525
SUPPORTS_VALUES_DEFAULT = False
2626
LEAST_GREATEST_IGNORES_NULLS = False
27+
UUID_IS_STRING_TYPE = False
2728

2829
TIME_MAPPING = MySQL.TIME_MAPPING
2930

sqlglot/dialects/snowflake.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ class Snowflake(Dialect):
2828
TRY_CAST_REQUIRES_STRING = True
2929
SUPPORTS_ALIAS_REFS_IN_JOIN_CONDITIONS = True
3030
LEAST_GREATEST_IGNORES_NULLS = False
31+
UUID_IS_STRING_TYPE = True
3132

3233
EXPRESSION_METADATA = EXPRESSION_METADATA.copy()
3334

sqlglot/generators/duckdb.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2047,6 +2047,22 @@ class DuckDBGenerator(generator.Generator):
20472047
""",
20482048
)
20492049

2050+
UUID_V5_TEMPLATE: exp.Expr = exp.maybe_parse(
2051+
"""
2052+
(SELECT
2053+
LOWER(
2054+
SUBSTR(h, 1, 8) || '-' ||
2055+
SUBSTR(h, 9, 4) || '-' ||
2056+
'5' || SUBSTR(h, 14, 3) || '-' ||
2057+
FORMAT('{:02x}', CAST('0x' || SUBSTR(h, 17, 2) AS INT) & 63 | 128) || SUBSTR(h, 19, 2) || '-' ||
2058+
SUBSTR(h, 21, 12)
2059+
)
2060+
FROM (
2061+
SELECT SUBSTR(SHA1(UNHEX(REPLACE(:namespace, '-', '')) || ENCODE(:name, 'utf8')), 1, 32) AS h
2062+
))
2063+
"""
2064+
)
2065+
20502066
# Shared bag semantics outer frame for ARRAY_EXCEPT and ARRAY_INTERSECTION.
20512067
# Each element is paired with its 1-based position via LIST_ZIP, then filtered
20522068
# by a comparison operator (supplied via :cond) that determines the operation:
@@ -4428,3 +4444,18 @@ def _corr_sql(
44284444
corr_expr = expression # make mypy happy
44294445

44304446
return self.sql(exp.case().when(exp.IsNan(this=corr_expr), exp.null()).else_(corr_expr))
4447+
4448+
def uuid_sql(self, expression: exp.Uuid) -> str:
4449+
namespace = expression.this
4450+
name = expression.args.get("name")
4451+
4452+
# UUID v5 (namespace + name) - Emulate using SHA1
4453+
if namespace and name:
4454+
result = exp.replace_placeholders(
4455+
self.UUID_V5_TEMPLATE.copy(),
4456+
namespace=namespace,
4457+
name=name,
4458+
)
4459+
return self.sql(result)
4460+
4461+
return super().uuid_sql(expression)

sqlglot/parser.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,11 @@ class Parser:
421421
"UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))),
422422
"UPPER": build_upper,
423423
"UUID": lambda args, dialect: exp.Uuid(is_string=dialect.UUID_IS_STRING_TYPE or None),
424+
"UUID_STRING": lambda args, dialect: exp.Uuid(
425+
this=seq_get(args, 0),
426+
name=seq_get(args, 1),
427+
is_string=dialect.UUID_IS_STRING_TYPE or None,
428+
),
424429
"VAR_MAP": build_var_map,
425430
}
426431

tests/dialects/test_dialect.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3881,7 +3881,6 @@ def test_uuid(self):
38813881
"trino": "UUID()",
38823882
"mysql": "UUID()",
38833883
"postgres": "GEN_RANDOM_UUID()",
3884-
"snowflake": "UUID_STRING()",
38853884
"tsql": "NEWID()",
38863885
},
38873886
write={

tests/dialects/test_snowflake.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2210,19 +2210,20 @@ def test_snowflake(self):
22102210
"SELECT UUID_STRING(), UUID_STRING('fe971b24-9572-4005-b22f-351e9c09274d', 'foo')"
22112211
)
22122212

2213+
# Note: Snowflake's UUID_STRING returns VARCHAR, DuckDB also returns VARCHAR from string operations
22132214
self.validate_all(
22142215
"UUID_STRING('fe971b24-9572-4005-b22f-351e9c09274d', 'foo')",
22152216
read={
22162217
"snowflake": "UUID_STRING('fe971b24-9572-4005-b22f-351e9c09274d', 'foo')",
22172218
},
22182219
write={
2219-
"hive": "UUID()",
2220-
"spark2": "UUID()",
2221-
"spark": "UUID()",
2222-
"databricks": "UUID()",
2223-
"duckdb": "UUID()",
2224-
"presto": "UUID()",
2225-
"trino": "UUID()",
2220+
"hive": "CAST(UUID() AS STRING)",
2221+
"spark2": "CAST(UUID() AS STRING)",
2222+
"spark": "CAST(UUID() AS STRING)",
2223+
"databricks": "CAST(UUID() AS STRING)",
2224+
"duckdb": "(SELECT LOWER(SUBSTRING(h, 1, 8) || '-' || SUBSTRING(h, 9, 4) || '-' || '5' || SUBSTRING(h, 14, 3) || '-' || FORMAT('{:02x}', CAST('0x' || SUBSTRING(h, 17, 2) AS INT) & 63 | 128) || SUBSTRING(h, 19, 2) || '-' || SUBSTRING(h, 21, 12)) FROM (SELECT SUBSTRING(SHA1(UNHEX(REPLACE('fe971b24-9572-4005-b22f-351e9c09274d', '-', '')) || ENCODE('foo')), 1, 32) AS h))",
2225+
"presto": "CAST(UUID() AS VARCHAR)",
2226+
"trino": "CAST(UUID() AS VARCHAR)",
22262227
"postgres": "GEN_RANDOM_UUID()",
22272228
"bigquery": "GENERATE_UUID()",
22282229
},

0 commit comments

Comments
 (0)