Skip to content

Commit b4dea8c

Browse files
fivetran-ashashankargeorgesittas
authored andcommitted
feat(snowflake)!: Transpilation support for UUID_STRING
1 parent 38b8986 commit b4dea8c

2 files changed

Lines changed: 103 additions & 2 deletions

File tree

sqlglot/generators/duckdb.py

Lines changed: 102 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4429,6 +4429,107 @@ def _corr_sql(
44294429

44304430
return self.sql(exp.case().when(exp.IsNan(this=corr_expr), exp.null()).else_(corr_expr))
44314431

4432-
@unsupported_args("this", "name")
44334432
def uuid_sql(self, expression: exp.Uuid) -> str:
4433+
# UUID v5 (namespace + name) - Emulate using SHA1
4434+
if expression.this and expression.args.get("name"):
4435+
namespace = expression.this
4436+
name = expression.args.get("name")
4437+
4438+
# Inner query: SELECT substr(sha1(unhex(replace(namespace, '-', '')) || encode(name)), 1, 32) AS h
4439+
# Step 1: replace(namespace, '-', '')
4440+
namespace_stripped = exp.func(
4441+
"REPLACE", namespace, exp.Literal.string("-"), exp.Literal.string("")
4442+
)
4443+
4444+
# Step 2: unhex(...)
4445+
namespace_bytes = exp.func("UNHEX", namespace_stripped)
4446+
4447+
# Step 3: encode(name) - DuckDB defaults to UTF-8
4448+
name_bytes = exp.Encode(this=name, charset=exp.Literal.string("utf8"))
4449+
4450+
# Step 4: namespace_bytes || name_bytes
4451+
concat_bytes = exp.DPipe(this=namespace_bytes, expression=name_bytes)
4452+
4453+
# Step 5: sha1(...)
4454+
sha1_hash = exp.func("SHA1", concat_bytes)
4455+
4456+
# Step 6: substr(..., 1, 32)
4457+
hash_32_chars = exp.func(
4458+
"SUBSTR", sha1_hash, exp.Literal.number(1), exp.Literal.number(32)
4459+
)
4460+
4461+
# Inner SELECT
4462+
inner_select = exp.Select(expressions=[exp.alias_(hash_32_chars, "h")])
4463+
4464+
# Outer query: Format as UUID v5
4465+
# Reference to 'h' column from inner query
4466+
h = exp.column("h")
4467+
4468+
# Build the UUID string parts
4469+
# substr(h, 1, 8) || '-' || substr(h, 9, 4) || '-' || '5' || substr(h, 14, 3) || '-' || ...
4470+
4471+
# Part 1: substr(h, 1, 8)
4472+
part1 = exp.func("SUBSTR", h, exp.Literal.number(1), exp.Literal.number(8))
4473+
4474+
# Part 2: substr(h, 9, 4)
4475+
part2 = exp.func("SUBSTR", h, exp.Literal.number(9), exp.Literal.number(4))
4476+
4477+
# Part 3: '5' || substr(h, 14, 3) (version 5)
4478+
part3 = exp.DPipe(
4479+
this=exp.Literal.string("5"),
4480+
expression=exp.func("SUBSTR", h, exp.Literal.number(14), exp.Literal.number(3)),
4481+
)
4482+
4483+
# Part 4: format('{:02x}', (('0x' || substr(h, 17, 2))::INTEGER & 63) | 128) || substr(h, 19, 2)
4484+
# This sets the variant bits
4485+
h_17_2 = exp.func("SUBSTR", h, exp.Literal.number(17), exp.Literal.number(2))
4486+
hex_str = exp.DPipe(this=exp.Literal.string("0x"), expression=h_17_2)
4487+
as_int = exp.cast(hex_str, exp.DataType.Type.INT)
4488+
masked = exp.BitwiseAnd(this=as_int, expression=exp.Literal.number(63))
4489+
variant_byte = exp.BitwiseOr(this=masked, expression=exp.Literal.number(128))
4490+
formatted = exp.func("FORMAT", exp.Literal.string("{:02x}"), variant_byte)
4491+
h_19_2 = exp.func("SUBSTR", h, exp.Literal.number(19), exp.Literal.number(2))
4492+
part4 = exp.DPipe(this=formatted, expression=h_19_2)
4493+
4494+
# Part 5: substr(h, 21, 12)
4495+
part5 = exp.func("SUBSTR", h, exp.Literal.number(21), exp.Literal.number(12))
4496+
4497+
# Concatenate all parts: part1 || '-' || part2 || '-' || part3 || '-' || part4 || '-' || part5
4498+
dash = exp.Literal.string("-")
4499+
concat = exp.DPipe(
4500+
this=exp.DPipe(
4501+
this=exp.DPipe(
4502+
this=exp.DPipe(
4503+
this=exp.DPipe(
4504+
this=exp.DPipe(
4505+
this=exp.DPipe(
4506+
this=exp.DPipe(this=part1, expression=dash),
4507+
expression=part2,
4508+
),
4509+
expression=dash,
4510+
),
4511+
expression=part3,
4512+
),
4513+
expression=dash,
4514+
),
4515+
expression=part4,
4516+
),
4517+
expression=dash,
4518+
),
4519+
expression=part5,
4520+
)
4521+
4522+
# Wrap in lower() and cast to UUID
4523+
lowered = exp.func("LOWER", concat)
4524+
uuid_value = exp.cast(lowered, exp.DataType.Type.UUID)
4525+
4526+
# Outer SELECT: SELECT uuid_value FROM (inner_select)
4527+
outer_select = exp.Select(expressions=[uuid_value]).from_(
4528+
exp.Subquery(this=inner_select)
4529+
)
4530+
4531+
# Wrap the entire thing in a subquery/parentheses
4532+
return f"({self.sql(outer_select)})"
4533+
4534+
# UUID v4 or BigQuery compatibility
44344535
return super().uuid_sql(expression)

tests/dialects/test_snowflake.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2220,7 +2220,7 @@ def test_snowflake(self):
22202220
"spark2": "UUID()",
22212221
"spark": "UUID()",
22222222
"databricks": "UUID()",
2223-
"duckdb": UnsupportedError,
2223+
"duckdb": "(SELECT CAST(LOWER(SUBSTRING(h, 1, 8) || '-' || SUBSTRING(h, 9, 4) || '-' || '5' || SUBSTRING(h, 14, 3) || '-' || FORMAT('{:02x}', CAST('0x' || SUBSTRING(h, 17, 2) AS INT) & 63 | 128) || SUBSTRING(h, 19, 2) || '-' || SUBSTRING(h, 21, 12)) AS UUID) FROM (SELECT SUBSTRING(SHA1(UNHEX(REPLACE('fe971b24-9572-4005-b22f-351e9c09274d', '-', '')) || ENCODE('foo')), 1, 32) AS h))",
22242224
"presto": "UUID()",
22252225
"trino": "UUID()",
22262226
"postgres": "GEN_RANDOM_UUID()",

0 commit comments

Comments
 (0)