Skip to content

Commit 2ccbdbd

Browse files
committed
Swift: Identify strings that are used in regular expressions properly.
1 parent 712c3cc commit 2ccbdbd

4 files changed

Lines changed: 94 additions & 92 deletions

File tree

swift/ql/lib/codeql/swift/regex/Regex.qll

Lines changed: 53 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -4,65 +4,53 @@
44

55
import swift
66
import codeql.swift.dataflow.DataFlow
7-
87
import codeql.swift.regex.RegexTreeView // re-export
98
private import internal.ParseRegex
10-
//private import codeql.regex.internal.RegExpTracking as RegExpTracking
119

1210
/**
13-
* A node whose value may flow to a position where it is interpreted
14-
* as a part of a regular expression.
11+
* A data flow configuration for tracking string literals that are used as
12+
* regular expressions.
1513
*/
16-
abstract class RegExpPatternSource extends DataFlow::Node {
17-
/**
18-
* Gets a node where the pattern of this node is parsed as a part of
19-
* a regular expression.
20-
*/
21-
abstract DataFlow::Node getAParse();
22-
23-
/**
24-
* Gets the root term of the regular expression parsed from this pattern.
25-
*/
26-
abstract RegExpTerm getRegExpTerm();
27-
}
28-
29-
/* *
30-
* A node whose string value may flow to a position where it is interpreted
31-
* as a part of a regular expression.
32-
*
33-
private class StringRegExpPatternSource extends RegExpPatternSource {
34-
private DataFlow::Node parse;
35-
36-
StringRegExpPatternSource() {
37-
this = regExpSource(parse) and
38-
// `regExpSource()` tracks both strings and regex literals, narrow it down to strings.
39-
this.asExpr().getConstantValue().isString(_)
14+
private module RegexUseConfig implements DataFlow::ConfigSig {
15+
predicate isSource(DataFlow::Node node) { node.asExpr() instanceof StringLiteralExpr }
16+
17+
predicate isSink(DataFlow::Node node) { node.asExpr() = any(RegexEval eval).getRegexInput() }
18+
19+
predicate isAdditionalFlowStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) {
20+
// flow through `Regex` initializer, i.e. from a string to a `Regex` object.
21+
exists(CallExpr call |
22+
(
23+
call.getStaticTarget().(Method).hasQualifiedName("Regex", ["init(_:)", "init(_:as:)"]) or
24+
call.getStaticTarget()
25+
.(Method)
26+
.hasQualifiedName("NSRegularExpression", "init(pattern:options:)")
27+
) and
28+
nodeFrom.asExpr() = call.getArgument(0).getExpr() and
29+
nodeTo.asExpr() = call
30+
)
4031
}
32+
}
4133

42-
override DataFlow::Node getAParse() { result = parse }
43-
44-
override RegExpTerm getRegExpTerm() { result.getRegExp() = this.asExpr().getExpr() }
45-
}*/
34+
private module RegexUseFlow = DataFlow::Global<RegexUseConfig>;
4635

4736
/**
48-
* TODO
49-
* "(a|b).*"
37+
* A string literal that is used as a regular expression in a regular
38+
* expression evaluation. For example the string literal `"(a|b).*"` in:
39+
* ```
40+
* Regex("(a|b).*").firstMatch(in: myString)
41+
* ```
5042
*/
51-
private class ParsedStringRegExp extends RegExp, StringLiteralExpr {
52-
private DataFlow::Node parse;
43+
private class ParsedStringRegex extends RegExp, StringLiteralExpr {
44+
RegexEval eval;
5345

54-
ParsedStringRegExp() {
55-
//this = regExpSource(parse).asExpr().getExpr()
56-
parse.asExpr() = this
46+
ParsedStringRegex() {
47+
RegexUseFlow::flow(DataFlow::exprNode(this), DataFlow::exprNode(eval.getRegexInput()))
5748
}
5849

59-
DataFlow::Node getAParse() { result = parse }
60-
/*
61-
override predicate isDotAll() { none() }
62-
63-
override predicate isIgnoreCase() { none() }
64-
65-
override string getFlags() { none() }*/
50+
/**
51+
* Gets a call that evaluates this regular expression.
52+
*/
53+
RegexEval getEval() { result = eval }
6654
}
6755

6856
/**
@@ -72,18 +60,23 @@ private class ParsedStringRegExp extends RegExp, StringLiteralExpr {
7260
* ```
7361
*/
7462
abstract class RegexEval extends CallExpr {
75-
Expr regex;
76-
Expr input;
63+
Expr regexInput;
64+
Expr stringInput;
65+
66+
/**
67+
* Gets the input to this call that is the regular expression.
68+
*/
69+
Expr getRegexInput() { result = regexInput }
7770

7871
/**
79-
* Gets the regular expression that is evaluated.
72+
* Gets the input to this call that is the string the regular expression is evaluated on.
8073
*/
81-
Expr getRegex() { result = regex }
74+
Expr getStringInput() { result = stringInput }
8275

8376
/**
84-
* Gets the input string the regular expression is evaluated on.
77+
* Gets a regular expression value that is evaluated here (if any can be identified).
8578
*/
86-
Expr getInput() { result = input }
79+
RegExp getARegex() { exists(ParsedStringRegex regex | regex.getEval() = this and result = regex) }
8780
}
8881

8982
/**
@@ -94,8 +87,8 @@ private class AlwaysRegexEval extends RegexEval {
9487
this.getStaticTarget()
9588
.(Method)
9689
.hasQualifiedName("Regex", ["firstMatch(in:)", "prefixMatch(in:)", "wholeMatch(in:)"]) and
97-
regex = this.getQualifier() and
98-
input = this.getArgument(0).getExpr()
90+
regexInput = this.getQualifier() and
91+
stringInput = this.getArgument(0).getExpr()
9992
or
10093
this.getStaticTarget()
10194
.(Method)
@@ -107,8 +100,8 @@ private class AlwaysRegexEval extends RegexEval {
107100
"replaceMatches(in:options:range:withTemplate:)",
108101
"stringByReplacingMatches(in:options:range:withTemplate:)"
109102
]) and
110-
regex = this.getQualifier() and
111-
input = this.getArgument(0).getExpr()
103+
regexInput = this.getQualifier() and
104+
stringInput = this.getArgument(0).getExpr()
112105
or
113106
this.getStaticTarget()
114107
.(Method)
@@ -119,8 +112,8 @@ private class AlwaysRegexEval extends RegexEval {
119112
"split(separator:maxSplits:omittingEmptySubsequences:)", "starts(with:)",
120113
"trimmingPrefix(_:)", "wholeMatch(of:)"
121114
]) and
122-
regex = this.getArgument(0).getExpr() and
123-
input = this.getQualifier()
115+
regexInput = this.getArgument(0).getExpr() and
116+
stringInput = this.getQualifier()
124117
or
125118
this.getStaticTarget()
126119
.(Method)
@@ -131,7 +124,7 @@ private class AlwaysRegexEval extends RegexEval {
131124
"replacing(_:with:maxReplacements:)", "replacing(_:with:subrange:maxReplacements:)",
132125
"trimPrefix(_:)"
133126
]) and
134-
regex = this.getArgument(0).getExpr() and
135-
input = this.getQualifier()
127+
regexInput = this.getArgument(0).getExpr() and
128+
stringInput = this.getQualifier()
136129
}
137130
}

swift/ql/test/library-tests/regex/redos_variants.swift

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -35,20 +35,20 @@ func myRegexpVariantsTests(myUrl: URL) throws {
3535
let tainted = String(contentsOf: myUrl) // tainted
3636
let untainted = "abcdef"
3737

38-
_ = try Regex(".*").firstMatch(in: tainted) // $ regex="call to Regex<AnyRegexOutput>.init(_:)" input=tainted
38+
_ = try Regex(".*").firstMatch(in: tainted) // $ regex=.* input=tainted
3939

40-
_ = try Regex("a*b").firstMatch(in: tainted) // $ regex="call to Regex<AnyRegexOutput>.init(_:)" input=tainted
41-
_ = try Regex("(a*)b").firstMatch(in: tainted) // $ regex="call to Regex<AnyRegexOutput>.init(_:)" input=tainted
42-
_ = try Regex("(a)*b").firstMatch(in: tainted) // $ regex="call to Regex<AnyRegexOutput>.init(_:)" input=tainted
43-
_ = try Regex("(a*)*b").firstMatch(in: tainted) // $ regex="call to Regex<AnyRegexOutput>.init(_:)" input=tainted redos-vulnerable=
44-
_ = try Regex("((a*)*b)").firstMatch(in: tainted) // $ regex="call to Regex<AnyRegexOutput>.init(_:)" input=tainted redos-vulnerable=
40+
_ = try Regex("a*b").firstMatch(in: tainted) // $ regex=a*b input=tainted
41+
_ = try Regex("(a*)b").firstMatch(in: tainted) // $ regex=(a*)b input=tainted
42+
_ = try Regex("(a)*b").firstMatch(in: tainted) // $ regex=(a)*b input=tainted
43+
_ = try Regex("(a*)*b").firstMatch(in: tainted) // $ regex=(a*)*b input=tainted redos-vulnerable=
44+
_ = try Regex("((a*)*b)").firstMatch(in: tainted) // $ regex=((a*)*b) input=tainted redos-vulnerable=
4545

46-
_ = try Regex("(a|aa?)b").firstMatch(in: tainted) // $ regex="call to Regex<AnyRegexOutput>.init(_:)" input=tainted
47-
_ = try Regex("(a|aa?)*b").firstMatch(in: tainted) // $ regex="call to Regex<AnyRegexOutput>.init(_:)" input=tainted redos-vulnerable=
46+
_ = try Regex("(a|aa?)b").firstMatch(in: tainted) // $ regex=(a|aa?)b input=tainted
47+
_ = try Regex("(a|aa?)*b").firstMatch(in: tainted) // $ regex=(a|aa?)*b input=tainted redos-vulnerable=
4848

4949
// from the qhelp:
50-
_ = try Regex("^_(__|.)+_$").firstMatch(in: tainted) // $ regex="call to Regex<AnyRegexOutput>.init(_:)" input=tainted redos-vulnerable=
51-
_ = try Regex("^_(__|[^_])+_$").firstMatch(in: tainted) // $ regex="call to Regex<AnyRegexOutput>.init(_:)" input=tainted
50+
_ = try Regex("^_(__|.)+_$").firstMatch(in: tainted) // $ regex=^_(__|.)+_$ input=tainted redos-vulnerable=
51+
_ = try Regex("^_(__|[^_])+_$").firstMatch(in: tainted) // $ regex=^_(__|[^_])+_$ input=tainted
5252

5353
// TODO: test more variant expressions.
5454
}

swift/ql/test/library-tests/regex/regex.ql

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,15 @@ module RegexTest implements TestSig {
1212

1313
predicate hasActualResult(Location location, string element, string tag, string value) {
1414
exists(RegexEval eval, Expr regex |
15-
eval.getRegex() = regex and
16-
location = regex.getLocation() and
17-
element = regex.toString() and
15+
eval.getARegex() = regex and
16+
location = eval.getLocation() and
17+
element = eval.toString() and
1818
tag = "regex" and
1919
value = quote(regex.toString())
2020
)
2121
or
2222
exists(RegexEval eval, Expr input |
23-
eval.getInput() = input and
23+
eval.getStringInput() = input and
2424
location = input.getLocation() and
2525
element = input.toString() and
2626
tag = "input" and

swift/ql/test/library-tests/regex/regex.swift

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -96,42 +96,51 @@ class NSRegularExpression : NSObject {
9696

9797
// --- tests ---
9898

99-
func myRegexpMethodsTests() throws {
99+
func myRegexpMethodsTests(b: Bool) throws {
100100
let input = "abcdef"
101101
let regex = try Regex(".*")
102102

103103
// --- Regex ---
104104

105-
_ = try regex.firstMatch(in: input) // $ regex=regex input=input
106-
_ = try regex.prefixMatch(in: input) // $ regex=regex input=input
107-
_ = try regex.wholeMatch(in: input) // $ regex=regex input=input
105+
_ = try regex.firstMatch(in: input) // $ regex=.* input=input
106+
_ = try regex.prefixMatch(in: input) // $ regex=.* input=input
107+
_ = try regex.wholeMatch(in: input) // $ regex=.* input=input
108108

109109
// --- RangeReplaceableCollection ---
110110

111111
var inputVar = input
112-
inputVar.replace(regex, with: "") // $ regex=regex input=&...
113-
_ = input.replacing(regex, with: "") // $ regex=regex input=input
114-
inputVar.trimPrefix(regex) // $ regex=regex input=&...
112+
inputVar.replace(regex, with: "") // $ regex=.* input=&...
113+
_ = input.replacing(regex, with: "") // $ regex=.* input=input
114+
inputVar.trimPrefix(regex) // $ regex=.* input=&...
115115

116116
// --- StringProtocol ---
117117

118-
_ = input.range(of: ".*", options: .regularExpression, range: nil, locale: nil) // $ MISSING: regex=regex input=input
119-
_ = input.replacingOccurrences(of: ".*", with: "", options: .regularExpression) // $ MISSING: regex=regex input=input
118+
_ = input.range(of: ".*", options: .regularExpression, range: nil, locale: nil) // $ MISSING: regex=.* input=input
119+
_ = input.replacingOccurrences(of: ".*", with: "", options: .regularExpression) // $ MISSING: regex=.* input=input
120120

121121
// --- NSRegularExpression ---
122122

123123
let nsregex = try NSRegularExpression(pattern: ".*")
124-
_ = nsregex.numberOfMatches(in: input, options: [], range: NSRange(location: 0, length: input.utf16.count)) // $ regex=nsregex input=input
125-
nsregex.enumerateMatches(in: input, range: NSMakeRange(0, input.utf16.count), using: {a, b, c in } ) // $ regex=nsregex input=input
126-
_ = nsregex.matches(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=nsregex input=input
127-
_ = nsregex.firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=nsregex input=input
128-
_ = nsregex.rangeOfFirstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=nsregex input=input
129-
_ = nsregex.replaceMatches(in: NSMutableString(string: input), range: NSMakeRange(0, input.utf16.count), withTemplate: "") // $ regex=nsregex input="call to NSString.init(string:)"
130-
_ = nsregex.stringByReplacingMatches(in: input, range: NSMakeRange(0, input.utf16.count), withTemplate: "") // $ regex=nsregex input=input
124+
_ = nsregex.numberOfMatches(in: input, options: [], range: NSRange(location: 0, length: input.utf16.count)) // $ regex=.* input=input
125+
nsregex.enumerateMatches(in: input, range: NSMakeRange(0, input.utf16.count), using: {a, b, c in } ) // $ regex=.* input=input
126+
_ = nsregex.matches(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input
127+
_ = nsregex.firstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input
128+
_ = nsregex.rangeOfFirstMatch(in: input, range: NSMakeRange(0, input.utf16.count)) // $ regex=.* input=input
129+
_ = nsregex.replaceMatches(in: NSMutableString(string: input), range: NSMakeRange(0, input.utf16.count), withTemplate: "") // $ regex=.* input="call to NSString.init(string:)"
130+
_ = nsregex.stringByReplacingMatches(in: input, range: NSMakeRange(0, input.utf16.count), withTemplate: "") // $ regex=.* input=input
131131

132132
// --- NSString ---
133133

134134
let inputNS = NSString(string: "abcdef")
135-
_ = inputNS.range(of: "*", options: .regularExpression) // $ MISSING: regex=nsregex input=inputNS
136-
_ = inputNS.replacingOccurrences(of: ".*", with: "", options: .regularExpression, range: NSMakeRange(0, inputNS.length)) // $ MISSING: regex=nsregex input=inputNS
135+
_ = inputNS.range(of: "*", options: .regularExpression) // $ MISSING: regex=.* input=inputNS
136+
_ = inputNS.replacingOccurrences(of: ".*", with: "", options: .regularExpression, range: NSMakeRange(0, inputNS.length)) // $ MISSING: regex=.* input=inputNS
137+
138+
// --- flow ---
139+
140+
let either_regex = try Regex(b ? ".*" : ".+")
141+
_ = try either_regex.firstMatch(in: input) // $ regex=.* regex=.+ input=input
142+
143+
let base_str = "a"
144+
let append_regex = try Regex(base_str + "b")
145+
_ = try append_regex.firstMatch(in: input) // $ input=input MISSING: regex=ab
137146
}

0 commit comments

Comments
 (0)