Skip to content

Commit 49787a5

Browse files
fivetran-ashashankarclaude
authored andcommitted
refactor: remove redundant CAST from UUID_V5_TEMPLATE
Review comments implement uuid_string The CAST to VARCHAR is unnecessary as LOWER() and string concatenation already return VARCHAR type in DuckDB. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
1 parent 9bc9ef2 commit 49787a5

1 file changed

Lines changed: 25 additions & 99 deletions

File tree

sqlglot/generators/duckdb.py

Lines changed: 25 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -2047,6 +2047,22 @@ class DuckDBGenerator(generator.Generator):
20472047
""",
20482048
)
20492049

2050+
UUID_V5_TEMPLATE: exp.Expr = exp.maybe_parse(
2051+
"""
2052+
(SELECT
2053+
LOWER(
2054+
SUBSTR(h, 1, 8) || '-' ||
2055+
SUBSTR(h, 9, 4) || '-' ||
2056+
'5' || SUBSTR(h, 14, 3) || '-' ||
2057+
FORMAT('{:02x}', CAST('0x' || SUBSTR(h, 17, 2) AS INT) & 63 | 128) || SUBSTR(h, 19, 2) || '-' ||
2058+
SUBSTR(h, 21, 12)
2059+
)
2060+
FROM (
2061+
SELECT SUBSTR(SHA1(UNHEX(REPLACE(:namespace, '-', '')) || ENCODE(:name, 'utf8')), 1, 32) AS h
2062+
))
2063+
"""
2064+
)
2065+
20502066
# Shared bag semantics outer frame for ARRAY_EXCEPT and ARRAY_INTERSECTION.
20512067
# Each element is paired with its 1-based position via LIST_ZIP, then filtered
20522068
# by a comparison operator (supplied via :cond) that determines the operation:
@@ -4430,106 +4446,16 @@ def _corr_sql(
44304446
return self.sql(exp.case().when(exp.IsNan(this=corr_expr), exp.null()).else_(corr_expr))
44314447

44324448
def uuid_sql(self, expression: exp.Uuid) -> str:
4433-
# UUID v5 (namespace + name) - Emulate using SHA1
4434-
if expression.this and expression.args.get("name"):
4435-
namespace = expression.this
4436-
name = expression.args.get("name")
4437-
4438-
# Inner query: SELECT substr(sha1(unhex(replace(namespace, '-', '')) || encode(name)), 1, 32) AS h
4439-
# Step 1: replace(namespace, '-', '')
4440-
namespace_stripped = exp.func(
4441-
"REPLACE", namespace, exp.Literal.string("-"), exp.Literal.string("")
4442-
)
4443-
4444-
# Step 2: unhex(...)
4445-
namespace_bytes = exp.func("UNHEX", namespace_stripped)
4446-
4447-
# Step 3: encode(name) - DuckDB defaults to UTF-8
4448-
name_bytes = exp.Encode(this=name, charset=exp.Literal.string("utf8"))
4449-
4450-
# Step 4: namespace_bytes || name_bytes
4451-
concat_bytes = exp.DPipe(this=namespace_bytes, expression=name_bytes)
4452-
4453-
# Step 5: sha1(...)
4454-
sha1_hash = exp.func("SHA1", concat_bytes)
4455-
4456-
# Step 6: substr(..., 1, 32)
4457-
hash_32_chars = exp.func(
4458-
"SUBSTR", sha1_hash, exp.Literal.number(1), exp.Literal.number(32)
4459-
)
4460-
4461-
# Inner SELECT
4462-
inner_select = exp.Select(expressions=[exp.alias_(hash_32_chars, "h")])
4463-
4464-
# Outer query: Format as UUID v5
4465-
# Reference to 'h' column from inner query
4466-
h = exp.column("h")
4449+
namespace = expression.this
4450+
name = expression.args.get("name")
44674451

4468-
# Build the UUID string parts
4469-
# substr(h, 1, 8) || '-' || substr(h, 9, 4) || '-' || '5' || substr(h, 14, 3) || '-' || ...
4470-
4471-
# Part 1: substr(h, 1, 8)
4472-
part1 = exp.func("SUBSTR", h, exp.Literal.number(1), exp.Literal.number(8))
4473-
4474-
# Part 2: substr(h, 9, 4)
4475-
part2 = exp.func("SUBSTR", h, exp.Literal.number(9), exp.Literal.number(4))
4476-
4477-
# Part 3: '5' || substr(h, 14, 3) (version 5)
4478-
part3 = exp.DPipe(
4479-
this=exp.Literal.string("5"),
4480-
expression=exp.func("SUBSTR", h, exp.Literal.number(14), exp.Literal.number(3)),
4481-
)
4482-
4483-
# Part 4: format('{:02x}', (('0x' || substr(h, 17, 2))::INTEGER & 63) | 128) || substr(h, 19, 2)
4484-
# This sets the variant bits
4485-
h_17_2 = exp.func("SUBSTR", h, exp.Literal.number(17), exp.Literal.number(2))
4486-
hex_str = exp.DPipe(this=exp.Literal.string("0x"), expression=h_17_2)
4487-
as_int = exp.cast(hex_str, exp.DataType.Type.INT)
4488-
masked = exp.BitwiseAnd(this=as_int, expression=exp.Literal.number(63))
4489-
variant_byte = exp.BitwiseOr(this=masked, expression=exp.Literal.number(128))
4490-
formatted = exp.func("FORMAT", exp.Literal.string("{:02x}"), variant_byte)
4491-
h_19_2 = exp.func("SUBSTR", h, exp.Literal.number(19), exp.Literal.number(2))
4492-
part4 = exp.DPipe(this=formatted, expression=h_19_2)
4493-
4494-
# Part 5: substr(h, 21, 12)
4495-
part5 = exp.func("SUBSTR", h, exp.Literal.number(21), exp.Literal.number(12))
4496-
4497-
# Concatenate all parts: part1 || '-' || part2 || '-' || part3 || '-' || part4 || '-' || part5
4498-
dash = exp.Literal.string("-")
4499-
concat = exp.DPipe(
4500-
this=exp.DPipe(
4501-
this=exp.DPipe(
4502-
this=exp.DPipe(
4503-
this=exp.DPipe(
4504-
this=exp.DPipe(
4505-
this=exp.DPipe(
4506-
this=exp.DPipe(this=part1, expression=dash),
4507-
expression=part2,
4508-
),
4509-
expression=dash,
4510-
),
4511-
expression=part3,
4512-
),
4513-
expression=dash,
4514-
),
4515-
expression=part4,
4516-
),
4517-
expression=dash,
4518-
),
4519-
expression=part5,
4520-
)
4521-
4522-
# Wrap in lower() and cast to UUID
4523-
lowered = exp.func("LOWER", concat)
4524-
uuid_value = exp.cast(lowered, exp.DataType.Type.UUID)
4525-
4526-
# Outer SELECT: SELECT uuid_value FROM (inner_select)
4527-
outer_select = exp.Select(expressions=[uuid_value]).from_(
4528-
exp.Subquery(this=inner_select)
4452+
# UUID v5 (namespace + name) - Emulate using SHA1
4453+
if namespace and name:
4454+
result = exp.replace_placeholders(
4455+
self.UUID_V5_TEMPLATE.copy(),
4456+
namespace=namespace,
4457+
name=name,
45294458
)
4459+
return self.sql(result)
45304460

4531-
# Wrap the entire thing in a subquery/parentheses
4532-
return f"({self.sql(outer_select)})"
4533-
4534-
# UUID v4 or BigQuery compatibility
45354461
return super().uuid_sql(expression)

0 commit comments

Comments
 (0)