@@ -2047,6 +2047,22 @@ class DuckDBGenerator(generator.Generator):
20472047 """ ,
20482048 )
20492049
2050+ UUID_V5_TEMPLATE : exp .Expr = exp .maybe_parse (
2051+ """
2052+ (SELECT
2053+ LOWER(
2054+ SUBSTR(h, 1, 8) || '-' ||
2055+ SUBSTR(h, 9, 4) || '-' ||
2056+ '5' || SUBSTR(h, 14, 3) || '-' ||
2057+ FORMAT('{:02x}', CAST('0x' || SUBSTR(h, 17, 2) AS INT) & 63 | 128) || SUBSTR(h, 19, 2) || '-' ||
2058+ SUBSTR(h, 21, 12)
2059+ )
2060+ FROM (
2061+ SELECT SUBSTR(SHA1(UNHEX(REPLACE(:namespace, '-', '')) || ENCODE(:name, 'utf8')), 1, 32) AS h
2062+ ))
2063+ """
2064+ )
2065+
20502066 # Shared bag semantics outer frame for ARRAY_EXCEPT and ARRAY_INTERSECTION.
20512067 # Each element is paired with its 1-based position via LIST_ZIP, then filtered
20522068 # by a comparison operator (supplied via :cond) that determines the operation:
@@ -4430,106 +4446,16 @@ def _corr_sql(
44304446 return self .sql (exp .case ().when (exp .IsNan (this = corr_expr ), exp .null ()).else_ (corr_expr ))
44314447
44324448 def uuid_sql (self , expression : exp .Uuid ) -> str :
4433- # UUID v5 (namespace + name) - Emulate using SHA1
4434- if expression .this and expression .args .get ("name" ):
4435- namespace = expression .this
4436- name = expression .args .get ("name" )
4437-
4438- # Inner query: SELECT substr(sha1(unhex(replace(namespace, '-', '')) || encode(name)), 1, 32) AS h
4439- # Step 1: replace(namespace, '-', '')
4440- namespace_stripped = exp .func (
4441- "REPLACE" , namespace , exp .Literal .string ("-" ), exp .Literal .string ("" )
4442- )
4443-
4444- # Step 2: unhex(...)
4445- namespace_bytes = exp .func ("UNHEX" , namespace_stripped )
4446-
4447- # Step 3: encode(name) - DuckDB defaults to UTF-8
4448- name_bytes = exp .Encode (this = name , charset = exp .Literal .string ("utf8" ))
4449-
4450- # Step 4: namespace_bytes || name_bytes
4451- concat_bytes = exp .DPipe (this = namespace_bytes , expression = name_bytes )
4452-
4453- # Step 5: sha1(...)
4454- sha1_hash = exp .func ("SHA1" , concat_bytes )
4455-
4456- # Step 6: substr(..., 1, 32)
4457- hash_32_chars = exp .func (
4458- "SUBSTR" , sha1_hash , exp .Literal .number (1 ), exp .Literal .number (32 )
4459- )
4460-
4461- # Inner SELECT
4462- inner_select = exp .Select (expressions = [exp .alias_ (hash_32_chars , "h" )])
4463-
4464- # Outer query: Format as UUID v5
4465- # Reference to 'h' column from inner query
4466- h = exp .column ("h" )
4449+ namespace = expression .this
4450+ name = expression .args .get ("name" )
44674451
4468- # Build the UUID string parts
4469- # substr(h, 1, 8) || '-' || substr(h, 9, 4) || '-' || '5' || substr(h, 14, 3) || '-' || ...
4470-
4471- # Part 1: substr(h, 1, 8)
4472- part1 = exp .func ("SUBSTR" , h , exp .Literal .number (1 ), exp .Literal .number (8 ))
4473-
4474- # Part 2: substr(h, 9, 4)
4475- part2 = exp .func ("SUBSTR" , h , exp .Literal .number (9 ), exp .Literal .number (4 ))
4476-
4477- # Part 3: '5' || substr(h, 14, 3) (version 5)
4478- part3 = exp .DPipe (
4479- this = exp .Literal .string ("5" ),
4480- expression = exp .func ("SUBSTR" , h , exp .Literal .number (14 ), exp .Literal .number (3 )),
4481- )
4482-
4483- # Part 4: format('{:02x}', (('0x' || substr(h, 17, 2))::INTEGER & 63) | 128) || substr(h, 19, 2)
4484- # This sets the variant bits
4485- h_17_2 = exp .func ("SUBSTR" , h , exp .Literal .number (17 ), exp .Literal .number (2 ))
4486- hex_str = exp .DPipe (this = exp .Literal .string ("0x" ), expression = h_17_2 )
4487- as_int = exp .cast (hex_str , exp .DataType .Type .INT )
4488- masked = exp .BitwiseAnd (this = as_int , expression = exp .Literal .number (63 ))
4489- variant_byte = exp .BitwiseOr (this = masked , expression = exp .Literal .number (128 ))
4490- formatted = exp .func ("FORMAT" , exp .Literal .string ("{:02x}" ), variant_byte )
4491- h_19_2 = exp .func ("SUBSTR" , h , exp .Literal .number (19 ), exp .Literal .number (2 ))
4492- part4 = exp .DPipe (this = formatted , expression = h_19_2 )
4493-
4494- # Part 5: substr(h, 21, 12)
4495- part5 = exp .func ("SUBSTR" , h , exp .Literal .number (21 ), exp .Literal .number (12 ))
4496-
4497- # Concatenate all parts: part1 || '-' || part2 || '-' || part3 || '-' || part4 || '-' || part5
4498- dash = exp .Literal .string ("-" )
4499- concat = exp .DPipe (
4500- this = exp .DPipe (
4501- this = exp .DPipe (
4502- this = exp .DPipe (
4503- this = exp .DPipe (
4504- this = exp .DPipe (
4505- this = exp .DPipe (
4506- this = exp .DPipe (this = part1 , expression = dash ),
4507- expression = part2 ,
4508- ),
4509- expression = dash ,
4510- ),
4511- expression = part3 ,
4512- ),
4513- expression = dash ,
4514- ),
4515- expression = part4 ,
4516- ),
4517- expression = dash ,
4518- ),
4519- expression = part5 ,
4520- )
4521-
4522- # Wrap in lower() and cast to UUID
4523- lowered = exp .func ("LOWER" , concat )
4524- uuid_value = exp .cast (lowered , exp .DataType .Type .UUID )
4525-
4526- # Outer SELECT: SELECT uuid_value FROM (inner_select)
4527- outer_select = exp .Select (expressions = [uuid_value ]).from_ (
4528- exp .Subquery (this = inner_select )
4452+ # UUID v5 (namespace + name) - Emulate using SHA1
4453+ if namespace and name :
4454+ result = exp .replace_placeholders (
4455+ self .UUID_V5_TEMPLATE .copy (),
4456+ namespace = namespace ,
4457+ name = name ,
45294458 )
4459+ return self .sql (result )
45304460
4531- # Wrap the entire thing in a subquery/parentheses
4532- return f"({ self .sql (outer_select )} )"
4533-
4534- # UUID v4 or BigQuery compatibility
45354461 return super ().uuid_sql (expression )
0 commit comments