44
55private import codeql.regex.RegexTreeView
66private import codeql.util.Numbers
7+ private import codeql.util.Strings
78
89/**
910 * Classes and predicates that create an NFA and various algorithms for working with it.
@@ -15,34 +16,7 @@ module Make<RegexTreeViewSig TreeImpl> {
1516 * Gets the char after `c` (from a simplified ASCII table).
1617 */
1718 private string nextChar ( string c ) {
18- exists ( int code | code = ascii ( c ) | code + 1 = ascii ( result ) )
19- }
20-
21- /**
22- * Gets the `i`th codepoint in `s`.
23- */
24- bindingset [ s]
25- private string getCodepointAt ( string s , int i ) { result = s .regexpFind ( "(.|\\s)" , i , _) }
26-
27- /**
28- * Gets the length of `s` in codepoints.
29- */
30- bindingset [ str]
31- private int getCodepointLength ( string str ) {
32- result = str .regexpReplaceAll ( "(.|\\s)" , "x" ) .length ( )
33- }
34-
35- /**
36- * Gets an approximation for the ASCII code for `char`.
37- * Only the easily printable chars are included (so no newline, tab, null, etc).
38- */
39- private int ascii ( string char ) {
40- char =
41- rank [ result ] ( string c |
42- c =
43- "! \"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
44- .charAt ( _)
45- )
19+ exists ( int code | code = asciiPrintable ( c ) | code + 1 = asciiPrintable ( result ) )
4620 }
4721
4822 /**
@@ -394,7 +368,7 @@ module Make<RegexTreeViewSig TreeImpl> {
394368 * Includes all printable ascii chars, all constants mentioned in a regexp, and all chars matches by the regexp `/\s|\d|\w/`.
395369 */
396370 string getARelevantChar ( ) {
397- exists ( ascii ( result ) )
371+ exists ( asciiPrintable ( result ) )
398372 or
399373 exists ( RegexpCharacterConstant c | result = getCodepointAt ( c .getValue ( ) , _) )
400374 or
@@ -1191,7 +1165,7 @@ module Make<RegexTreeViewSig TreeImpl> {
11911165 private string relevant ( RegExpRoot root ) {
11921166 root = relevantRoot ( ) and
11931167 (
1194- exists ( ascii ( result ) ) and exists ( root )
1168+ exists ( asciiPrintable ( result ) ) and exists ( root )
11951169 or
11961170 exists ( InputSymbol s | belongsTo ( s , root ) | result = intersect ( s , _) )
11971171 or
@@ -1322,49 +1296,6 @@ module Make<RegexTreeViewSig TreeImpl> {
13221296 )
13231297 }
13241298
1325- /**
1326- * Gets the result of backslash-escaping newlines, carriage-returns and
1327- * backslashes in `s`.
1328- */
1329- bindingset [ s]
1330- private string escape ( string s ) {
1331- result =
1332- escapeUnicodeString ( s .replaceAll ( "\\" , "\\\\" )
1333- .replaceAll ( "\n" , "\\n" )
1334- .replaceAll ( "\r" , "\\r" )
1335- .replaceAll ( "\t" , "\\t" ) )
1336- }
1337-
1338- /**
1339- * Gets a string where the unicode characters in `s` have been escaped.
1340- */
1341- bindingset [ s]
1342- private string escapeUnicodeString ( string s ) {
1343- result =
1344- concat ( int i , string char | char = escapeUnicodeChar ( getCodepointAt ( s , i ) ) | char order by i )
1345- }
1346-
1347- /**
1348- * Gets a unicode escaped string for `char`.
1349- * If `char` is a printable char, then `char` is returned.
1350- */
1351- bindingset [ char]
1352- private string escapeUnicodeChar ( string char ) {
1353- if isPrintable ( char )
1354- then result = char
1355- else
1356- if exists ( to4digitHex ( any ( int i | i .toUnicode ( ) = char ) ) )
1357- then result = "\\u" + to4digitHex ( any ( int i | i .toUnicode ( ) = char ) )
1358- else result = "\\u{" + toHex ( any ( int i | i .toUnicode ( ) = char ) ) + "}"
1359- }
1360-
1361- /** Holds if `char` is easily printable char, or whitespace. */
1362- private predicate isPrintable ( string char ) {
1363- exists ( ascii ( char ) )
1364- or
1365- char = "\n\r\t" .charAt ( _)
1366- }
1367-
13681299 /**
13691300 * Gets `str` with the last `i` characters moved to the front.
13701301 *
0 commit comments