@@ -3,43 +3,23 @@ private import AST
33private import Constant
44private import TreeSitter
55private import codeql.ruby.controlflow.CfgNodes
6+ private import codeql.NumberUtils
67
78int parseInteger ( Ruby:: Integer i ) {
89 exists ( string s | s = i .getValue ( ) .toLowerCase ( ) .replaceAll ( "_" , "" ) |
910 s .charAt ( 0 ) != "0" and
1011 result = s .toInt ( )
1112 or
12- exists ( string str , string values , int shift |
13- s .matches ( "0b%" ) and
14- values = "01" and
15- str = s .suffix ( 2 ) and
16- shift = 1
17- or
18- s .matches ( "0x%" ) and
19- values = "0123456789abcdef" and
20- str = s .suffix ( 2 ) and
21- shift = 4
22- or
23- s .charAt ( 0 ) = "0" and
24- not s .charAt ( 1 ) = [ "b" , "x" , "o" ] and
25- values = "01234567" and
26- str = s .suffix ( 1 ) and
27- shift = 3
28- or
29- s .matches ( "0o%" ) and
30- values = "01234567" and
31- str = s .suffix ( 2 ) and
32- shift = 3
33- |
34- result =
35- sum ( int index , string c , int v , int exp |
36- c = str .charAt ( index ) and
37- v = values .indexOf ( c .toLowerCase ( ) ) and
38- exp = str .length ( ) - index - 1
39- |
40- v .bitShiftLeft ( ( str .length ( ) - index - 1 ) * shift )
41- )
42- )
13+ s .matches ( "0b%" ) and result = parseBinaryInt ( s .suffix ( 2 ) )
14+ or
15+ s .matches ( "0x%" ) and result = parseHexInt ( s .suffix ( 2 ) )
16+ or
17+ s .charAt ( 0 ) = "0" and
18+ not s .charAt ( 1 ) = [ "b" , "x" , "o" ] and
19+ result = parseOctalInt ( s .suffix ( 1 ) )
20+ or
21+ s .matches ( "0o%" ) and
22+ result = parseOctalInt ( s .suffix ( 2 ) )
4323 )
4424}
4525
@@ -148,16 +128,85 @@ private class RequiredFileLiteralConstantValue extends RequiredConstantValue {
148128
149129private class RequiredStringTextComponentConstantValue extends RequiredConstantValue {
150130 override predicate requiredString ( string s ) {
151- s = any ( Ruby:: Token t | exists ( TStringTextComponentNonRegexp ( t ) ) ) .getValue ( )
131+ s =
132+ unescapeTextComponent ( any ( Ruby:: Token t | exists ( TStringTextComponentNonRegexp ( t ) ) ) .getValue ( ) )
152133 }
153134}
154135
155136private class RequiredStringEscapeSequenceComponentConstantValue extends RequiredConstantValue {
156137 override predicate requiredString ( string s ) {
157- s = any ( Ruby:: Token t | exists ( TStringEscapeSequenceComponentNonRegexp ( t ) ) ) .getValue ( )
138+ s =
139+ unescapeEscapeSequence ( any ( Ruby:: Token t | exists ( TStringEscapeSequenceComponentNonRegexp ( t ) ) )
140+ .getValue ( ) )
158141 }
159142}
160143
144+ /**
145+ * Gets the string represented by the escape sequence in `escaped`. For example:
146+ *
147+ * ```
148+ * \\ => \
149+ * \141 => a
150+ * \u0078 => x
151+ * ```
152+ */
153+ bindingset [ escaped]
154+ string unescapeEscapeSequence ( string escaped ) {
155+ result = unescapeKnownEscapeSequence ( escaped )
156+ or
157+ // Any other character following a backslash is just that character.
158+ not exists ( unescapeKnownEscapeSequence ( escaped ) ) and
159+ result = escaped .suffix ( 1 )
160+ }
161+
162+ bindingset [ escaped]
163+ private string unescapeKnownEscapeSequence ( string escaped ) {
164+ escaped = "\\\\" and result = "\\"
165+ or
166+ escaped = "\\'" and result = "'"
167+ or
168+ escaped = "\\\"" and result = "\""
169+ or
170+ escaped = "\\a" and result = 7 .toUnicode ( )
171+ or
172+ escaped = "\\b" and result = 8 .toUnicode ( )
173+ or
174+ escaped = "\\t" and result = "\t"
175+ or
176+ escaped = "\\n" and result = "\n"
177+ or
178+ escaped = "\\v" and result = 11 .toUnicode ( )
179+ or
180+ escaped = "\\f" and result = 12 .toUnicode ( )
181+ or
182+ escaped = "\\r" and result = "\r"
183+ or
184+ escaped = "\\e" and result = 27 .toUnicode ( )
185+ or
186+ escaped = "\\s" and result = " "
187+ or
188+ escaped = [ "\\c?" , "\\C-?" ] and result = 127 .toUnicode ( )
189+ or
190+ result = parseOctalInt ( escaped .regexpCapture ( "\\\\([0-7]{1,3})" , 1 ) ) .toUnicode ( )
191+ or
192+ result = parseHexInt ( escaped .regexpCapture ( "\\\\x([0-9a-fA-F]{1,2})" , 1 ) ) .toUnicode ( )
193+ or
194+ result = parseHexInt ( escaped .regexpCapture ( "\\\\u([0-9a-fA-F]{4})" , 1 ) ) .toUnicode ( )
195+ or
196+ result = parseHexInt ( escaped .regexpCapture ( "\\\\u\\{([0-9a-fA-F]{1,6})\\}" , 1 ) ) .toUnicode ( )
197+ }
198+
199+ /**
200+ * Gets the result of unescaping a string text component by replacing `\\` and
201+ * `\'` with `\` and `'`, respectively.
202+ *
203+ * ```rb
204+ * 'foo\\bar \'baz\'' # foo\bar 'baz'
205+ * ```
206+ */
207+ bindingset [ text]
208+ string unescapeTextComponent ( string text ) { result = text .regexpReplaceAll ( "\\\\(['\\\\])" , "$1" ) }
209+
161210class TRegExpComponent =
162211 TStringTextComponentRegexp or TStringEscapeSequenceComponentRegexp or
163212 TStringInterpolationComponentRegexp ;
0 commit comments