@@ -36,28 +36,191 @@ abstract class RegexCreation extends DataFlow::Node {
3636 * created from.
3737 */
3838 abstract DataFlow:: Node getStringInput ( ) ;
39+
40+ /**
41+ * Gets a dataflow node for the options input that might contain parse mode
42+ * flags (if any).
43+ */
44+ DataFlow:: Node getOptionsInput ( ) { none ( ) }
3945}
4046
4147/**
42- * A data-flow node where a `Regex` or `NSRegularExpression` object is created.
48+ * A data-flow node where a `Regex` object is created.
4349 */
44- private class StandardRegexCreation extends RegexCreation {
50+ private class RegexRegexCreation extends RegexCreation {
4551 DataFlow:: Node input ;
4652
47- StandardRegexCreation ( ) {
53+ RegexRegexCreation ( ) {
4854 exists ( CallExpr call |
49- (
50- call .getStaticTarget ( ) .( Method ) .hasQualifiedName ( "Regex" , [ "init(_:)" , "init(_:as:)" ] ) or
51- call .getStaticTarget ( )
52- .( Method )
53- .hasQualifiedName ( "NSRegularExpression" , "init(pattern:options:)" )
54- ) and
55+ call .getStaticTarget ( ) .( Method ) .hasQualifiedName ( "Regex" , [ "init(_:)" , "init(_:as:)" ] ) and
56+ input .asExpr ( ) = call .getArgument ( 0 ) .getExpr ( ) and
57+ this .asExpr ( ) = call
58+ )
59+ }
60+
61+ override DataFlow:: Node getStringInput ( ) { result = input }
62+ }
63+
64+ /**
65+ * A data-flow node where an `NSRegularExpression` object is created.
66+ */
67+ private class NSRegularExpressionRegexCreation extends RegexCreation {
68+ DataFlow:: Node input ;
69+
70+ NSRegularExpressionRegexCreation ( ) {
71+ exists ( CallExpr call |
72+ call .getStaticTarget ( )
73+ .( Method )
74+ .hasQualifiedName ( "NSRegularExpression" , "init(pattern:options:)" ) and
5575 input .asExpr ( ) = call .getArgument ( 0 ) .getExpr ( ) and
5676 this .asExpr ( ) = call
5777 )
5878 }
5979
6080 override DataFlow:: Node getStringInput ( ) { result = input }
81+
82+ override DataFlow:: Node getOptionsInput ( ) {
83+ result .asExpr ( ) = this .asExpr ( ) .( CallExpr ) .getArgument ( 1 ) .getExpr ( )
84+ }
85+ }
86+
87+ private newtype TRegexParseMode =
88+ MkIgnoreCase ( ) or // case insensitive
89+ MkVerbose ( ) or // ignores whitespace and `#` comments within patterns
90+ MkDotAll ( ) or // dot matches all characters, including line terminators
91+ MkMultiLine ( ) or // `^` and `$` also match beginning and end of lines
92+ MkUnicode ( ) // Unicode UAX 29 word boundary mode
93+
94+ /**
95+ * A regular expression parse mode flag.
96+ */
97+ class RegexParseMode extends TRegexParseMode {
98+ /**
99+ * Gets the name of this parse mode flag.
100+ */
101+ string getName ( ) {
102+ this = MkIgnoreCase ( ) and result = "IGNORECASE"
103+ or
104+ this = MkVerbose ( ) and result = "VERBOSE"
105+ or
106+ this = MkDotAll ( ) and result = "DOTALL"
107+ or
108+ this = MkMultiLine ( ) and result = "MULTILINE"
109+ or
110+ this = MkUnicode ( ) and result = "UNICODE"
111+ }
112+
113+ /**
114+ * Gets a textual representation of this `RegexParseMode`.
115+ */
116+ string toString ( ) { result = this .getName ( ) }
117+ }
118+
119+ /**
120+ * A unit class for adding additional flow steps for regular expressions.
121+ */
122+ class RegexAdditionalFlowStep extends Unit {
123+ /**
124+ * Holds if the step from `node1` to `node2` should be considered a flow
125+ * step for regular expressions.
126+ */
127+ abstract predicate step ( DataFlow:: Node nodeFrom , DataFlow:: Node nodeTo ) ;
128+
129+ /**
130+ * Holds if a regular expression parse mode is either set (`isSet` = true)
131+ * or unset (`isSet` = false) at `node`. Parse modes propagate through
132+ * array construction and regex construction.
133+ */
134+ abstract predicate setsParseMode ( DataFlow:: Node node , RegexParseMode mode , boolean isSet ) ;
135+ }
136+
137+ /**
138+ * An additional flow step for `Regex`.
139+ */
140+ class RegexRegexAdditionalFlowStep extends RegexAdditionalFlowStep {
141+ override predicate step ( DataFlow:: Node nodeFrom , DataFlow:: Node nodeTo ) {
142+ this .setsParseModeEdge ( nodeFrom , nodeTo , _, _)
143+ }
144+
145+ override predicate setsParseMode ( DataFlow:: Node node , RegexParseMode mode , boolean isSet ) {
146+ this .setsParseModeEdge ( _, node , mode , isSet )
147+ }
148+
149+ private predicate setsParseModeEdge (
150+ DataFlow:: Node nodeFrom , DataFlow:: Node nodeTo , RegexParseMode mode , boolean isSet
151+ ) {
152+ // `Regex` methods that modify the parse mode of an existing `Regex` object.
153+ exists ( CallExpr ce |
154+ nodeFrom .asExpr ( ) = ce .getQualifier ( ) and
155+ nodeTo .asExpr ( ) = ce and
156+ // decode the parse mode being set
157+ (
158+ ce .getStaticTarget ( ) .( Method ) .hasQualifiedName ( "Regex" , "ignoresCase(_:)" ) and
159+ mode = MkIgnoreCase ( )
160+ or
161+ ce .getStaticTarget ( ) .( Method ) .hasQualifiedName ( "Regex" , "dotMatchesNewlines(_:)" ) and
162+ mode = MkDotAll ( )
163+ or
164+ ce .getStaticTarget ( ) .( Method ) .hasQualifiedName ( "Regex" , "anchorsMatchLineEndings(_:)" ) and
165+ mode = MkMultiLine ( )
166+ ) and
167+ // decode the value being set
168+ if ce .getArgument ( 0 ) .getExpr ( ) .( BooleanLiteralExpr ) .getValue ( ) = false
169+ then isSet = false // mode is set to false
170+ else isSet = true // mode is set to true OR mode is set to default (=true) OR mode is set to an unknown value
171+ )
172+ }
173+ }
174+
175+ /**
176+ * An additional flow step for `NSRegularExpression`.
177+ */
178+ class NSRegularExpressionRegexAdditionalFlowStep extends RegexAdditionalFlowStep {
179+ override predicate step ( DataFlow:: Node nodeFrom , DataFlow:: Node nodeTo ) { none ( ) }
180+
181+ override predicate setsParseMode ( DataFlow:: Node node , RegexParseMode mode , boolean isSet ) {
182+ // `NSRegularExpression.Options` values (these are typically combined, then passed into
183+ // the `NSRegularExpression` initializer).
184+ node .asExpr ( )
185+ .( MemberRefExpr )
186+ .getMember ( )
187+ .( FieldDecl )
188+ .hasQualifiedName ( "NSRegularExpression.Options" , "caseInsensitive" ) and
189+ mode = MkIgnoreCase ( ) and
190+ isSet = true
191+ or
192+ node .asExpr ( )
193+ .( MemberRefExpr )
194+ .getMember ( )
195+ .( FieldDecl )
196+ .hasQualifiedName ( "NSRegularExpression.Options" , "allowCommentsAndWhitespace" ) and
197+ mode = MkVerbose ( ) and
198+ isSet = true
199+ or
200+ node .asExpr ( )
201+ .( MemberRefExpr )
202+ .getMember ( )
203+ .( FieldDecl )
204+ .hasQualifiedName ( "NSRegularExpression.Options" , "dotMatchesLineSeparators" ) and
205+ mode = MkDotAll ( ) and
206+ isSet = true
207+ or
208+ node .asExpr ( )
209+ .( MemberRefExpr )
210+ .getMember ( )
211+ .( FieldDecl )
212+ .hasQualifiedName ( "NSRegularExpression.Options" , "anchorsMatchLines" ) and
213+ mode = MkMultiLine ( ) and
214+ isSet = true
215+ or
216+ node .asExpr ( )
217+ .( MemberRefExpr )
218+ .getMember ( )
219+ .( FieldDecl )
220+ .hasQualifiedName ( "NSRegularExpression.Options" , "useUnicodeWordBoundaries" ) and
221+ mode = MkUnicode ( ) and
222+ isSet = true
223+ }
61224}
62225
63226/**
@@ -91,6 +254,19 @@ abstract class RegexEval extends CallExpr {
91254 RegexUseFlow:: flow ( regexCreation , DataFlow:: exprNode ( this .getRegexInput ( ) ) )
92255 )
93256 }
257+
258+ /**
259+ * Gets a parse mode that is set at this evaluation (in at least one path
260+ * from the creation of the regular expression object).
261+ */
262+ RegexParseMode getAParseMode ( ) {
263+ exists ( DataFlow:: Node setNode |
264+ // parse mode flag is set
265+ any ( RegexAdditionalFlowStep s ) .setsParseMode ( setNode , result , true ) and
266+ // reaches this eval
267+ RegexParseModeFlow:: flow ( setNode , DataFlow:: exprNode ( this .getRegexInput ( ) ) )
268+ )
269+ }
94270}
95271
96272/**
0 commit comments