22// --- stubs ---
33
44struct URL {
5- init ? ( string: String ) { }
5+ init ? ( string: String ) { }
66}
77
88struct AnyRegexOutput {
@@ -12,20 +12,20 @@ protocol RegexComponent {
1212}
1313
1414struct Regex < Output> : RegexComponent {
15- struct Match {
16- }
15+ struct Match {
16+ }
1717
18- init ( _ pattern: String ) throws where Output == AnyRegexOutput { }
18+ init ( _ pattern: String ) throws where Output == AnyRegexOutput { }
1919
20- func firstMatch( in string: String ) throws -> Regex < Output > . Match ? { return nil }
21- func prefixMatch( in string: String ) throws -> Regex < Output > . Match ? { return nil }
22- func wholeMatch( in string: String ) throws -> Regex < Output > . Match ? { return nil }
20+ func firstMatch( in string: String ) throws -> Regex < Output > . Match ? { return nil }
21+ func prefixMatch( in string: String ) throws -> Regex < Output > . Match ? { return nil }
22+ func wholeMatch( in string: String ) throws -> Regex < Output > . Match ? { return nil }
2323
24- typealias RegexOutput = Output
24+ typealias RegexOutput = Output
2525}
2626
2727extension String {
28- init ( contentsOf: URL ) {
28+ init ( contentsOf: URL ) {
2929 let data = " "
3030 self . init ( data)
3131 }
@@ -36,88 +36,88 @@ extension String {
3636// the focus for these tests is different vulnerable and non-vulnerable regexp strings.
3737
3838func myRegexpVariantsTests( myUrl: URL ) throws {
39- let tainted = String ( contentsOf: myUrl) // tainted
39+ let tainted = String ( contentsOf: myUrl) // tainted
4040
4141 // basic cases:
4242 // attack string: "a" x lots + "!"
4343
44- _ = try Regex ( " .* " ) . firstMatch ( in: tainted) // $ regex=.* input=tainted
44+ _ = try Regex ( " .* " ) . firstMatch ( in: tainted) // $ regex=.* input=tainted
4545
46- _ = try Regex ( " a*b " ) . firstMatch ( in: tainted) // $ regex=a*b input=tainted
47- _ = try Regex ( " (a*)b " ) . firstMatch ( in: tainted) // $ regex=(a*)b input=tainted
48- _ = try Regex ( " (a)*b " ) . firstMatch ( in: tainted) // $ regex=(a)*b input=tainted
49- _ = try Regex ( " (a*)*b " ) . firstMatch ( in: tainted) // $ regex=(a*)*b input=tainted redos-vulnerable=
50- _ = try Regex ( " ((a*)*b) " ) . firstMatch ( in: tainted) // $ regex=((a*)*b) input=tainted redos-vulnerable=
46+ _ = try Regex ( " a*b " ) . firstMatch ( in: tainted) // $ regex=a*b input=tainted
47+ _ = try Regex ( " (a*)b " ) . firstMatch ( in: tainted) // $ regex=(a*)b input=tainted
48+ _ = try Regex ( " (a)*b " ) . firstMatch ( in: tainted) // $ regex=(a)*b input=tainted
49+ _ = try Regex ( " (a*)*b " ) . firstMatch ( in: tainted) // $ regex=(a*)*b input=tainted redos-vulnerable=
50+ _ = try Regex ( " ((a*)*b) " ) . firstMatch ( in: tainted) // $ regex=((a*)*b) input=tainted redos-vulnerable=
5151
52- _ = try Regex ( " (a|aa?)b " ) . firstMatch ( in: tainted) // $ regex=(a|aa?)b input=tainted
53- _ = try Regex ( " (a|aa?)*b " ) . firstMatch ( in: tainted) // $ regex=(a|aa?)*b input=tainted redos-vulnerable=
52+ _ = try Regex ( " (a|aa?)b " ) . firstMatch ( in: tainted) // $ regex=(a|aa?)b input=tainted
53+ _ = try Regex ( " (a|aa?)*b " ) . firstMatch ( in: tainted) // $ regex=(a|aa?)*b input=tainted redos-vulnerable=
5454
5555 // from the qhelp:
56- // attack string: "_" x lots + "!"
56+ // attack string: "_" x lots + "!"
5757
58- _ = try Regex ( " ^_(__|.)+_$ " ) . firstMatch ( in: tainted) // $ regex=^_(__|.)+_$ input=tainted redos-vulnerable=
59- _ = try Regex ( " ^_(__|[^_])+_$ " ) . firstMatch ( in: tainted) // $ regex=^_(__|[^_])+_$ input=tainted
58+ _ = try Regex ( " ^_(__|.)+_$ " ) . firstMatch ( in: tainted) // $ regex=^_(__|.)+_$ input=tainted redos-vulnerable=
59+ _ = try Regex ( " ^_(__|[^_])+_$ " ) . firstMatch ( in: tainted) // $ regex=^_(__|[^_])+_$ input=tainted
6060
6161 // real world cases:
6262
6363 // Adapted from marked (https://github.com/markedjs/marked), which is licensed
6464 // under the MIT license; see file licenses/marked-LICENSE.
6565 // GOOD
66- _ = try Regex ( #"^\b_((?:__|[\s\S])+?)_\b|^\*((?:\*\*|[\s\S])+?)\*(?!\*)"# ) . firstMatch ( in: tainted) // $ SPURIOUS: redos-vulnerable=
67- // BAD
66+ _ = try Regex ( #"^\b_((?:__|[\s\S])+?)_\b|^\*((?:\*\*|[\s\S])+?)\*(?!\*)"# ) . firstMatch ( in: tainted) // $ SPURIOUS: redos-vulnerable=
67+ // BAD
6868 // attack string: "_" + "__".repeat(100)
69- _ = try Regex ( #"^\b_((?:__|[\s\S])+?)_\b|^\*((?:\*\*|[\s\S])+?)\*(?!\*)"# ) . wholeMatch ( in: tainted) // $ redos-vulnerable=
69+ _ = try Regex ( #"^\b_((?:__|[\s\S])+?)_\b|^\*((?:\*\*|[\s\S])+?)\*(?!\*)"# ) . wholeMatch ( in: tainted) // $ redos-vulnerable=
7070
7171 // GOOD
7272 // Adapted from marked (https://github.com/markedjs/marked), which is licensed
7373 // under the MIT license; see file licenses/marked-LICENSE.
74- _ = try Regex ( #"^\b_((?:__|[^_])+?)_\b|^\*((?:\*\*|[^*])+?)\*(?!\*)"# ) . firstMatch ( in: tainted)
74+ _ = try Regex ( #"^\b_((?:__|[^_])+?)_\b|^\*((?:\*\*|[^*])+?)\*(?!\*)"# ) . firstMatch ( in: tainted)
7575
7676 // GOOD - there is no witness in the end that could cause the regexp to not match
7777 // Adapted from brace-expansion (https://github.com/juliangruber/brace-expansion),
7878 // which is licensed under the MIT license; see file licenses/brace-expansion-LICENSE.
79- _ = try Regex ( " (.*,)+.+ " ) . firstMatch ( in: tainted)
79+ _ = try Regex ( " (.*,)+.+ " ) . firstMatch ( in: tainted)
8080
8181 // BAD
8282 // attack string: " '" + "\\\\".repeat(100)
8383 // Adapted from CodeMirror (https://github.com/codemirror/codemirror),
8484 // which is licensed under the MIT license; see file licenses/CodeMirror-LICENSE.
85- _ = try Regex ( #"^(?:\s+(?:"(?:[^"\\]|\\\\|\\.)+"|'(?:[^'\\]|\\\\|\\.)+'|\((?:[^)\\]|\\\\|\\.)+\)))?"# ) . firstMatch ( in: tainted) // $ redos-vulnerable=
85+ _ = try Regex ( #"^(?:\s+(?:"(?:[^"\\]|\\\\|\\.)+"|'(?:[^'\\]|\\\\|\\.)+'|\((?:[^)\\]|\\\\|\\.)+\)))?"# ) . firstMatch ( in: tainted) // $ redos-vulnerable=
8686
8787 // GOOD
8888 // Adapted from jest (https://github.com/facebook/jest), which is licensed
8989 // under the MIT license; see file licenses/jest-LICENSE.
90- _ = try Regex ( #"^ *(\S.*\|.*)\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)\n*"# ) . firstMatch ( in: tainted)
90+ _ = try Regex ( #"^ *(\S.*\|.*)\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)\n*"# ) . firstMatch ( in: tainted)
9191
9292 // BAD
9393 // attack string: "/" + "\\/a".repeat(100)
9494 // Adapted from ANodeBlog (https://github.com/gefangshuai/ANodeBlog),
9595 // which is licensed under the Apache License 2.0; see file licenses/ANodeBlog-LICENSE.
96- _ = try Regex ( #"\/(?![ *])(\\\/|.)*?\/[gim]*(?=\W|$)"# ) . firstMatch ( in: tainted) // $ redos-vulnerable=
96+ _ = try Regex ( #"\/(?![ *])(\\\/|.)*?\/[gim]*(?=\W|$)"# ) . firstMatch ( in: tainted) // $ redos-vulnerable=
9797
9898 // BAD
9999 // attack string: "##".repeat(100) + "\na"
100100 // Adapted from CodeMirror (https://github.com/codemirror/codemirror),
101101 // which is licensed under the MIT license; see file licenses/CodeMirror-LICENSE.
102- _ = try Regex ( #"^([\s\[\{\(]|#.*)*$"# ) . firstMatch ( in: tainted) // $ redos-vulnerable=
102+ _ = try Regex ( #"^([\s\[\{\(]|#.*)*$"# ) . firstMatch ( in: tainted) // $ redos-vulnerable=
103103
104104 // BAD
105105 // attack string: "a" + "[]".repeat(100) + ".b\n"
106106 // Adapted from Knockout (https://github.com/knockout/knockout), which is
107107 // licensed under the MIT license; see file licenses/knockout-LICENSE
108- _ = try Regex ( #"^[\_$a-z][\_$a-z0-9]*(\[.*?\])*(\.[\_$a-z][\_$a-z0-9]*(\[.*?\])*)*$"# ) . firstMatch ( in: tainted) // $ redos-vulnerable=
108+ _ = try Regex ( #"^[\_$a-z][\_$a-z0-9]*(\[.*?\])*(\.[\_$a-z][\_$a-z0-9]*(\[.*?\])*)*$"# ) . firstMatch ( in: tainted) // $ redos-vulnerable=
109109
110110 // BAD
111111 // attack string: "[" + "][".repeat(100) + "]!"
112112 // Adapted from Prototype.js (https://github.com/prototypejs/prototype), which
113113 // is licensed under the MIT license; see file licenses/Prototype.js-LICENSE.
114- _ = try Regex ( #"(([\w#:.~>+()\s-]+|\*|\[.*?\])+)\s*(,|$)"# ) . firstMatch ( in: tainted) // $ redos-vulnerable=
114+ _ = try Regex ( #"(([\w#:.~>+()\s-]+|\*|\[.*?\])+)\s*(,|$)"# ) . firstMatch ( in: tainted) // $ redos-vulnerable=
115115
116116 // BAD
117117 // attack string: "'" + "\\a".repeat(100) + '"'
118118 // Adapted from Prism (https://github.com/PrismJS/prism), which is licensed
119119 // under the MIT license; see file licenses/Prism-LICENSE.
120- _ = try Regex ( #"("|')(\\?.)*?\1"# ) . firstMatch ( in: tainted) // $ redos-vulnerable=
120+ _ = try Regex ( #"("|')(\\?.)*?\1"# ) . firstMatch ( in: tainted) // $ redos-vulnerable=
121121
122122 // more cases:
123123
@@ -148,7 +148,7 @@ func myRegexpVariantsTests(myUrl: URL) throws {
148148
149149 // GOOD
150150 _ = try Regex ( #"([\w.]+)*"# ) . firstMatch ( in: tainted)
151- // BAD
151+ // BAD
152152 // attack string: "a" x lots + "!"
153153 _ = try Regex ( #"([\w.]+)*"# ) . wholeMatch ( in: tainted) // $ MISSING: redos-vulnerable=
154154
@@ -214,7 +214,7 @@ func myRegexpVariantsTests(myUrl: URL) throws {
214214
215215 // BAD
216216 // attack string: "5" x lots + "!"
217- _ = try Regex ( #"((\d|\d)*)""# ) . firstMatch ( in: tainted) // $ redos-vulnerable=
217+ _ = try Regex ( #"((\d|\d)*)""# ) . firstMatch ( in: tainted) // $ redos-vulnerable=
218218
219219 // BAD
220220 // attack string: "0" x lots + "!"
@@ -262,7 +262,7 @@ func myRegexpVariantsTests(myUrl: URL) throws {
262262
263263 // GOOD - there is no witness in the end that could cause the regexp to not match
264264 _ = try Regex ( #"(\d+(X\d+)?)+"# ) . firstMatch ( in: tainted)
265- // BAD
265+ // BAD
266266 // attack string: "0" x lots + "!"
267267 _ = try Regex ( #"(\d+(X\d+)?)+"# ) . wholeMatch ( in: tainted) // $ MISSING: redos-vulnerable=
268268
@@ -305,7 +305,7 @@ func myRegexpVariantsTests(myUrl: URL) throws {
305305
306306 // GOOD
307307 _ = try Regex ( " (a+)+aaaaa*a+ " ) . firstMatch ( in: tainted)
308- // BAD
308+ // BAD
309309 // attack string: "a" x lots + "!"
310310 _ = try Regex ( " (a+)+aaaaa*a+ " ) . wholeMatch ( in: tainted) // $ MISSING: redos-vulnerable=
311311
@@ -333,7 +333,7 @@ func myRegexpVariantsTests(myUrl: URL) throws {
333333
334334 // GOOD
335335 _ = try Regex ( " (([^X]b)+)*($|[^X]b) " ) . firstMatch ( in: tainted)
336- // BAD
336+ // BAD
337337 // attack string: "b" x lots + "!"
338338 _ = try Regex ( " (([^X]b)+)*($|[^X]b) " ) . wholeMatch ( in: tainted) // $ MISSING: redos-vulnerable=
339339
@@ -544,7 +544,7 @@ func myRegexpVariantsTests(myUrl: URL) throws {
544544
545545 // GOOD
546546 _ = try Regex ( #"("[^"]*?"|[^"\s]+)+(?=\s*|\s*$)"# ) . firstMatch ( in: tainted)
547- // BAD
547+ // BAD
548548 // attack string: "##" x lots + "\na"
549549 _ = try Regex ( #"("[^"]*?"|[^"\s]+)+(?=\s*|\s*$)"# ) . wholeMatch ( in: tainted) // $ MISSING: redos-vulnerable=
550550
0 commit comments