11/** Provides classes and predicates related to regex injection in Java. */
22
33import java
4- import semmle.code.java.dataflow.FlowSources
5- import semmle.code.java.dataflow.TaintTracking
6- import semmle.code.java.regex.RegexFlowConfigs
4+ private import semmle.code.java.dataflow.DataFlow
5+ private import semmle.code.java.frameworks.Regex
6+ private import semmle.code.java.frameworks.apache.Lang
77
8- /**
9- * A data flow sink for untrusted user input used to construct regular expressions.
10- */
8+ /** A data flow sink for untrusted user input used to construct regular expressions. */
119abstract class Sink extends DataFlow:: ExprNode { }
1210
13- /**
14- * A sanitizer for untrusted user input used to construct regular expressions.
15- */
11+ /** A sanitizer for untrusted user input used to construct regular expressions. */
1612abstract class Sanitizer extends DataFlow:: ExprNode { }
1713
18- // TODO: look into further: Pattern.matcher, .pattern() and .toString() as taint steps, .split and .splitAsStream
19- /**
20- * A data flow sink for untrusted user input used to construct regular expressions.
21- */
22- private class RegexSink extends Sink {
23- RegexSink ( ) {
14+ private class RegexInjectionSink extends Sink {
15+ RegexInjectionSink ( ) {
2416 exists ( MethodAccess ma , Method m | m = ma .getMethod ( ) |
2517 ma .getArgument ( 0 ) = this .asExpr ( ) and
2618 (
27- m .getDeclaringType ( ) instanceof TypeString and
28- m .hasName ( [ "matches" , "split" , "replaceFirst" , "replaceAll" ] )
29- or
30- m .getDeclaringType ( ) instanceof RegexPattern and
31- m .hasName ( [ "compile" , "matches" ] )
19+ m instanceof StringRegexMethod or
20+ m instanceof PatternRegexMethod
3221 )
3322 or
34- m .getDeclaringType ( ) instanceof ApacheRegExUtils and
35- (
36- ma .getArgument ( 1 ) = this .asExpr ( ) and
37- // only handles String param here because the other param option, Pattern, is already handled by `java.util.regex.Pattern` above
38- m .getParameterType ( 1 ) instanceof TypeString and
39- m .hasName ( [
40- "removeAll" , "removeFirst" , "removePattern" , "replaceAll" , "replaceFirst" ,
41- "replacePattern"
42- ] )
43- )
23+ ma .getArgument ( 1 ) = this .asExpr ( ) and
24+ m instanceof ApacheRegExUtilsMethod
4425 )
4526 }
4627}
4728
48- /**
49- * A call to a function whose name suggests that it escapes regular
50- * expression meta-characters.
51- */
52- class RegexInjectionSanitizer extends Sanitizer {
29+ /** A call to a function which escapes regular expression meta-characters. */
30+ private class RegexInjectionSanitizer extends Sanitizer {
5331 RegexInjectionSanitizer ( ) {
32+ // a function whose name suggests that it escapes regular expression meta-characters
5433 exists ( string calleeName , string sanitize , string regexp |
5534 calleeName = this .asExpr ( ) .( Call ) .getCallee ( ) .getName ( ) and
56- // TODO: add test case for sanitize? I think current tests only check escape
57- // TODO: should this be broader and only look for "escape|saniti[sz]e" and not "regexp?" as well? -- e.g. err on side of FNs?
5835 sanitize = "(?:escape|saniti[sz]e)" and
5936 regexp = "regexp?"
6037 |
@@ -63,31 +40,70 @@ class RegexInjectionSanitizer extends Sanitizer {
6340 ".*)" )
6441 )
6542 or
66- // adds Pattern.quote() as a sanitizer
67- // https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html#quote-java.lang.String-: "Metacharacters or escape sequences in the input sequence will be given no special meaning."
68- // see https://rules.sonarsource.com/java/RSPEC-2631 and https://sensei.securecodewarrior.com/recipes/scw:java:regex-injection
43+ // a call to the `Pattern.quote` method, which gives metacharacters or escape sequences no special meaning
6944 exists ( MethodAccess ma , Method m | m = ma .getMethod ( ) |
70- m .getDeclaringType ( ) instanceof RegexPattern and
71- (
72- ma .getArgument ( 0 ) = this .asExpr ( ) and
73- m .hasName ( "quote" )
74- )
45+ ma .getArgument ( 0 ) = this .asExpr ( ) and
46+ m instanceof PatternQuoteMethod
47+ )
48+ or
49+ // use of Pattern.LITERAL flag with `Pattern.compile` which gives metacharacters or escape sequences no special meaning
50+ exists ( MethodAccess ma , Method m , Field field | m = ma .getMethod ( ) |
51+ ma .getArgument ( 0 ) = this .asExpr ( ) and
52+ m instanceof PatternRegexMethod and
53+ m .hasName ( "compile" ) and
54+ //ma.getArgument(1).toString() = "Pattern.LITERAL" and
55+ field instanceof PatternLiteral and
56+ ma .getArgument ( 1 ) = field .getAnAccess ( )
7557 )
7658 }
7759}
7860
79- // ******** HELPER CLASSES/METHODS (MAYBE MOVE ELSEWHERE?) ********
80- // TODO: move below to Regex.qll??
81- /** The Java class `java.util.regex.Pattern`. */
82- private class RegexPattern extends RefType {
83- RegexPattern ( ) { this .hasQualifiedName ( "java.util.regex" , "Pattern" ) }
61+ /**
62+ * The methods of the class `java.lang.String` that take a regular expression
63+ * as a parameter.
64+ */
65+ private class StringRegexMethod extends Method {
66+ StringRegexMethod ( ) {
67+ this .getDeclaringType ( ) instanceof TypeString and
68+ this .hasName ( [ "matches" , "split" , "replaceFirst" , "replaceAll" ] )
69+ }
70+ }
71+
72+ /**
73+ * The methods of the class `java.util.regex.Pattern` that take a regular
74+ * expression as a parameter.
75+ */
76+ private class PatternRegexMethod extends Method {
77+ PatternRegexMethod ( ) {
78+ this .getDeclaringType ( ) instanceof TypeRegexPattern and
79+ this .hasName ( [ "compile" , "matches" ] )
80+ }
8481}
8582
86- // /** The Java class `java.util.regex.Matcher`. */
87- // private class RegexMatcher extends RefType {
88- // RegexMatcher() { this.hasQualifiedName("java.util.regex", "Matcher") }
89- // }
90- /** The Java class `org.apache.commons.lang3.RegExUtils`. */
91- private class ApacheRegExUtils extends RefType {
92- ApacheRegExUtils ( ) { this .hasQualifiedName ( "org.apache.commons.lang3" , "RegExUtils" ) }
83+ /** The `quote` method of the `java.util.regex.Pattern` class. */
84+ private class PatternQuoteMethod extends Method {
85+ PatternQuoteMethod ( ) { this .hasName ( [ "quote" ] ) }
86+ }
87+
88+ /** The `LITERAL` field of the `java.util.regex.Pattern` class. */
89+ private class PatternLiteral extends Field {
90+ PatternLiteral ( ) {
91+ this .getDeclaringType ( ) instanceof TypeRegexPattern and
92+ this .hasName ( "LITERAL" )
93+ }
94+ }
95+
96+ /**
97+ * The methods of the class `org.apache.commons.lang3.RegExUtils` that take
98+ * a regular expression of type `String` as a parameter.
99+ */
100+ private class ApacheRegExUtilsMethod extends Method {
101+ ApacheRegExUtilsMethod ( ) {
102+ this .getDeclaringType ( ) instanceof TypeApacheRegExUtils and
103+ // only handles String param here because the other param option, Pattern, is already handled by `java.util.regex.Pattern`
104+ this .getParameterType ( 1 ) instanceof TypeString and
105+ this .hasName ( [
106+ "removeAll" , "removeFirst" , "removePattern" , "replaceAll" , "replaceFirst" , "replacePattern"
107+ ] )
108+ }
93109}
0 commit comments