|
| 1 | +-- Normalize a family name. Used in FAMILY_INNER. |
| 2 | +CREATE TEMPORARY FUNCTION FAMILY_INNER_INNER(name STRING) AS ( |
| 3 | + CASE |
| 4 | + WHEN REGEXP_CONTAINS(name, r'(?i)font\s?awesome') THEN 'Font Awesome' |
| 5 | + ELSE IF(LENGTH(TRIM(name)) < 3, NULL, NULLIF(TRIM(name), '')) |
| 6 | + END |
| 7 | +); |
| 8 | + |
| 9 | +-- Normalize a family name. Used in FAMILY. |
| 10 | +CREATE TEMPORARY FUNCTION FAMILY_INNER(name STRING) AS ( |
| 11 | + FAMILY_INNER_INNER( |
| 12 | + REGEXP_REPLACE( |
| 13 | + name, |
| 14 | + r'(?i)([\s-]?(black|bold|book|cond(ensed)?|demi|ex(tra)?|heavy|italic|light|medium|narrow|regular|semi|thin|ultra|wide|\d00|\d+pt))+$', |
| 15 | + '' |
| 16 | + ) |
| 17 | + ) |
| 18 | +); |
| 19 | + |
| 20 | +-- Extract the family name from a payload. |
| 21 | +CREATE TEMPORARY FUNCTION FAMILY(payload JSON) AS ( |
| 22 | + FAMILY_INNER( |
| 23 | + COALESCE( |
| 24 | + STRING(payload._font_details.names[16]), |
| 25 | + STRING(payload._font_details.names[1]) |
| 26 | + ) |
| 27 | + ) |
| 28 | +); |
| 29 | + |
| 30 | +-- Extract the file format from an extension and a MIME type. |
| 31 | +CREATE TEMPORARY FUNCTION FILE_FORMAT(extension STRING, type STRING) AS ( |
| 32 | + LOWER(IFNULL(REGEXP_EXTRACT(type, '/(?:x-)?(?:font-)?(.*)'), extension)) |
| 33 | +); |
| 34 | + |
| 35 | +-- Normalize a foundry name. Used in FOUNDRY. |
| 36 | +CREATE TEMPORARY FUNCTION FOUNDRY_INNER(name STRING) AS ( |
| 37 | + CASE UPPER(name) |
| 38 | + WHEN 'ADBO' THEN 'ADBE' |
| 39 | + WHEN 'PFED' THEN 'AWSM' |
| 40 | + ELSE NULLIF(TRIM(REGEXP_REPLACE(name, r'[[:cntrl:]]+', '')), '') |
| 41 | + END |
| 42 | +); |
| 43 | + |
| 44 | +-- Extract the foundry name from a payload. |
| 45 | +CREATE TEMPORARY FUNCTION FOUNDRY(payload JSON) AS ( |
| 46 | + FOUNDRY_INNER(STRING(payload._font_details.OS2.achVendID)) |
| 47 | +); |
| 48 | + |
| 49 | +-- Infer scripts from codepoints. Used in SCRIPTS. |
| 50 | +CREATE TEMPORARY FUNCTION SCRIPTS_INNER(codepoints JSON) |
| 51 | +RETURNS ARRAY<STRING> |
| 52 | +LANGUAGE js |
| 53 | +OPTIONS (library = ["gs://httparchive/lib/text-utils.js"]) |
| 54 | +AS r""" |
| 55 | +if (codepoints && codepoints.length) { |
| 56 | + return detectWritingScript(codepoints.map((character) => parseInt(character, 10)), 0.05); |
| 57 | +} else { |
| 58 | + return []; |
| 59 | +} |
| 60 | +"""; |
| 61 | + |
| 62 | +-- Infer scripts from a payload. |
| 63 | +CREATE TEMPORARY FUNCTION SCRIPTS(payload JSON) AS ( |
| 64 | + SCRIPTS_INNER(payload._font_details.cmap.codepoints) |
| 65 | +); |
| 66 | + |
| 67 | +-- Infer the service from a URL. |
| 68 | +CREATE TEMPORARY FUNCTION SERVICE(url STRING) AS ( |
| 69 | + CASE |
| 70 | + WHEN REGEXP_CONTAINS(url, r'(fonts|use)\.typekit\.(net|com)') THEN 'Adobe' |
| 71 | + WHEN REGEXP_CONTAINS(url, r'cloud\.typenetwork\.com') THEN 'typenetwork.com' |
| 72 | + WHEN REGEXP_CONTAINS(url, r'cloud\.typography\.com') THEN 'typography.com' |
| 73 | + WHEN REGEXP_CONTAINS(url, r'cloud\.webtype\.com') THEN 'webtype.com' |
| 74 | + WHEN REGEXP_CONTAINS(url, r'f\.fontdeck\.com') THEN 'fontdeck.com' |
| 75 | + WHEN REGEXP_CONTAINS(url, r'fast\.fonts\.(com|net)\/(jsapi|cssapi)') THEN 'fonts.com' |
| 76 | + WHEN REGEXP_CONTAINS(url, r'fnt\.webink\.com') THEN 'webink.com' |
| 77 | + WHEN REGEXP_CONTAINS(url, r'fontawesome\.com') THEN 'fontawesome.com' |
| 78 | + WHEN REGEXP_CONTAINS(url, r'fonts\.(gstatic|googleapis)\.com|themes.googleusercontent.com/static/fonts|ssl.gstatic.com/fonts') THEN 'Google' |
| 79 | + WHEN REGEXP_CONTAINS(url, r'fonts\.typonine\.com') THEN 'typonine.com' |
| 80 | + WHEN REGEXP_CONTAINS(url, r'fonts\.typotheque\.com') THEN 'typotheque.com' |
| 81 | + WHEN REGEXP_CONTAINS(url, r'kernest\.com') THEN 'kernest.com' |
| 82 | + WHEN REGEXP_CONTAINS(url, r'typefront\.com') THEN 'typefront.com' |
| 83 | + WHEN REGEXP_CONTAINS(url, r'typesquare\.com') THEN 'typesquare.com' |
| 84 | + WHEN REGEXP_CONTAINS(url, r'use\.edgefonts\.net|webfonts\.creativecloud\.com') THEN 'edgefonts.net' |
| 85 | + WHEN REGEXP_CONTAINS(url, r'webfont\.fontplus\.jp') THEN 'fontplus.jp' |
| 86 | + WHEN REGEXP_CONTAINS(url, r'webfonts\.fontslive\.com') THEN 'fontslive.com' |
| 87 | + WHEN REGEXP_CONTAINS(url, r'webfonts\.fontstand\.com') THEN 'fontstand.com' |
| 88 | + WHEN REGEXP_CONTAINS(url, r'webfonts\.justanotherfoundry\.com') THEN 'justanotherfoundry.com' |
| 89 | + ELSE 'self-hosted' |
| 90 | + END |
| 91 | +); |
| 92 | + |
| 93 | +-- Extract the color formats from a formats payload and remove spurious entries |
| 94 | +-- via a table-sizes payload. |
| 95 | +-- |
| 96 | +-- When nonempty, it is expected that |
| 97 | +-- |
| 98 | +-- * `CBDT` is larger than 2 + 2 bytes, |
| 99 | +-- * `COLR` is larger than 2 + 2 + 4 + 4 + 2 (+ 4 + 4 + 4 + 4 + 4) bytes, |
| 100 | +-- * `SVG ` is larger than 2 + 4 + 4 + 2 bytes, and |
| 101 | +-- * `sbix` is larger than 2 + 2 + 4 + 4 bytes. |
| 102 | +-- |
| 103 | +-- For simplicity, the threshold is set to 50 bytes. |
| 104 | +CREATE TEMPORARY FUNCTION COLOR_FORMATS_INNER(formats JSON, table_sizes JSON) |
| 105 | +RETURNS ARRAY<STRING> |
| 106 | +LANGUAGE js AS ''' |
| 107 | +try { |
| 108 | + return formats.filter((format) => { |
| 109 | + const table = `${format} `.slice(0, 4); |
| 110 | + return table_sizes[table] > 50; |
| 111 | + }); |
| 112 | +} catch (e) { |
| 113 | + return []; |
| 114 | +} |
| 115 | +'''; |
| 116 | + |
| 117 | +-- Extract the color formats from a payload. |
| 118 | +CREATE TEMPORARY FUNCTION COLOR_FORMATS(payload JSON) AS ( |
| 119 | + COLOR_FORMATS_INNER( |
| 120 | + payload._font_details.color.formats, |
| 121 | + payload._font_details.table_sizes |
| 122 | + ) |
| 123 | +); |
| 124 | + |
| 125 | +-- Check if the font is a color font given its payload. |
| 126 | +CREATE TEMPORARY FUNCTION IS_COLOR(payload JSON) AS ( |
| 127 | + ARRAY_LENGTH(COLOR_FORMATS(payload)) > 0 |
| 128 | +); |
| 129 | + |
| 130 | +-- Check if the font was successfully parsed given its payload. |
| 131 | +CREATE TEMPORARY FUNCTION IS_PARSED(payload JSON) AS ( |
| 132 | + payload._font_details.table_sizes IS NOT NULL |
| 133 | +); |
| 134 | + |
| 135 | +-- Check if the font is a variable font given its payload. |
| 136 | +CREATE TEMPORARY FUNCTION IS_VARIABLE(payload JSON) AS ( |
| 137 | + REGEXP_CONTAINS( |
| 138 | + TO_JSON_STRING(payload._font_details.table_sizes), |
| 139 | + '(?i)gvar|CFF2' |
| 140 | + ) |
| 141 | +); |
| 142 | + |
| 143 | +-- Extract the variable formats from a payload. |
| 144 | +CREATE TEMPORARY FUNCTION VARIABLE_FORMATS(payload JSON) AS ( |
| 145 | + REGEXP_EXTRACT_ALL( |
| 146 | + TO_JSON_STRING(payload._font_details.table_sizes), |
| 147 | + '(?i)glyf|CFF2' |
| 148 | + ) |
| 149 | +); |
0 commit comments