Skip to content

Commit 44eb7bf

Browse files
committed
Swift: Import more test cases from other languages (this highlights some issues).
1 parent c540568 commit 44eb7bf

1 file changed

Lines changed: 356 additions & 1 deletion

File tree

swift/ql/test/library-tests/regex/redos_variants.swift

Lines changed: 356 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ extension String {
3535

3636
func myRegexpVariantsTests(myUrl: URL) throws {
3737
let tainted = String(contentsOf: myUrl) // tainted
38-
let untainted = "abcdef"
3938

4039
// basic cases:
4140

@@ -106,4 +105,360 @@ func myRegexpVariantsTests(myUrl: URL) throws {
106105
// Adapted from Prism (https://github.com/PrismJS/prism), which is licensed
107106
// under the MIT license; see file licenses/Prism-LICENSE.
108107
_ = try Regex("(\"|')(\\\\?.)*?\\1").firstMatch(in: tainted) // $ redos-vulnerable=
108+
109+
// more cases:
110+
111+
// GOOD
112+
_ = try Regex("(\\r\\n|\\r|\\n)+").firstMatch(in: tainted)
113+
114+
// GOOD
115+
_ = try Regex("(a|.)*").firstMatch(in: tainted)
116+
117+
// Testing the NFA - only some of the below are detected.
118+
_ = try Regex("^([a-z]+)+$").firstMatch(in: tainted) // $ redos-vulnerable=
119+
_ = try Regex("^([a-z]*)*$").firstMatch(in: tainted) // $ redos-vulnerable=
120+
_ = try Regex("^([a-zA-Z0-9])(([\\\\-.]|[_]+)?([a-zA-Z0-9]+))*(@){1}[a-z0-9]+[.]{1}(([a-z]{2,3})|([a-z]{2,3}[.]{1}[a-z]{2,3}))$").firstMatch(in: tainted) // $ redos-vulnerable=
121+
_ = try Regex("^(([a-z])+.)+[A-Z]([a-z])+$").firstMatch(in: tainted) // $ redos-vulnerable=
122+
123+
// NOT GOOD
124+
_ = try Regex("(b|a?b)*c").firstMatch(in: tainted) // $ redos-vulnerable=
125+
126+
// NOT GOOD
127+
_ = try Regex("(a|aa?)*b").firstMatch(in: tainted) // $ redos-vulnerable=
128+
129+
// GOOD
130+
_ = try Regex("(.|\\n)*!").firstMatch(in: tainted)
131+
132+
// NOT GOOD; attack: "\n".repeat(100) + "." TODO: investigate, we should be getting this one.
133+
_ = try Regex("(?s)(.|\\n)*!").firstMatch(in: tainted) // $ MISSING: redos-vulnerable=
134+
135+
// GOOD
136+
_ = try Regex("([\\w.]+)*").firstMatch(in: tainted)
137+
138+
// NOT GOOD
139+
_ = try Regex("(a|aa?)*b").firstMatch(in: tainted) // $ redos-vulnerable=
140+
141+
// NOT GOOD
142+
_ = try Regex("(([\\s\\S]|[^a])*)\"").firstMatch(in: tainted) // $ redos-vulnerable=
143+
144+
// GOOD - there is no witness in the end that could cause the regexp to not match
145+
_ = try Regex("([^\"']+)*").firstMatch(in: tainted)
146+
147+
// NOT GOOD
148+
_ = try Regex("((.|[^a])*)\"").firstMatch(in: tainted) // $ redos-vulnerable=
149+
150+
// GOOD
151+
_ = try Regex("((a|[^a])*)\"").firstMatch(in: tainted)
152+
153+
// NOT GOOD
154+
_ = try Regex("((b|[^a])*)\"").firstMatch(in: tainted) // $ redos-vulnerable=
155+
156+
// NOT GOOD
157+
_ = try Regex("((G|[^a])*)\"").firstMatch(in: tainted) // $ redos-vulnerable=
158+
159+
// NOT GOOD
160+
_ = try Regex("(([0-9]|[^a])*)\"").firstMatch(in: tainted) // $ redos-vulnerable=
161+
162+
// NOT GOOD (missing)
163+
_ = try Regex("(?:=(?:([!#\\$%&'\\*\\+\\-\\.\\^_`\\|~0-9A-Za-z]+)|\"((?:\\\\[\\x00-\\x7f]|[^\\x00-\\x08\\x0a-\\x1f\\x7f\"])*)\"))?").firstMatch(in: tainted) // $ MISSING: redos-vulnerable=
164+
165+
// NOT GOOD (missing)
166+
_ = try Regex("\"((?:\\\\[\\x00-\\x7f]|[^\\x00-\\x08\\x0a-\\x1f\\x7f\"])*)\"").firstMatch(in: tainted) // $ MISSING: redos-vulnerable=
167+
168+
// GOOD
169+
_ = try Regex("\"((?:\\\\[\\x00-\\x7f]|[^\\x00-\\x08\\x0a-\\x1f\\x7f\"\\\\])*)\"").firstMatch(in: tainted)
170+
171+
// NOT GOOD
172+
_ = try Regex("(([a-z]|[d-h])*)\"").firstMatch(in: tainted) // $ redos-vulnerable=
173+
174+
// NOT GOOD
175+
_ = try Regex("(([^a-z]|[^0-9])*)\"").firstMatch(in: tainted) // $ redos-vulnerable=
176+
177+
// NOT GOOD
178+
_ = try Regex("((\\d|[0-9])*)\"").firstMatch(in: tainted) // $ redos-vulnerable=
179+
180+
// NOT GOOD
181+
_ = try Regex("((\\s|\\s)*)\"").firstMatch(in: tainted) // $ redos-vulnerable=
182+
183+
// NOT GOOD
184+
_ = try Regex("((\\w|G)*)\"").firstMatch(in: tainted) // $ redos-vulnerable=
185+
186+
// GOOD
187+
_ = try Regex("((\\s|\\d)*)\"").firstMatch(in: tainted)
188+
189+
// NOT GOOD
190+
_ = try Regex("((\\d|\\w)*)\"").firstMatch(in: tainted) // $ redos-vulnerable=
191+
192+
// NOT GOOD
193+
_ = try Regex("((\\d|5)*)\"").firstMatch(in: tainted) // $ redos-vulnerable=
194+
195+
// NOT GOOD
196+
_ = try Regex("((\\s|[\\f])*)\"").firstMatch(in: tainted) // $ redos-vulnerable=
197+
198+
// NOT GOOD - but not detected (likely because \v is a character class in Java rather than a specific character in other langs)
199+
_ = try Regex("((\\s|[\\v]|\\\\v)*)\"").firstMatch(in: tainted) // $ redos-vulnerable=
200+
201+
// NOT GOOD
202+
_ = try Regex("((\\f|[\\f])*)\"").firstMatch(in: tainted) // $ redos-vulnerable=
203+
204+
// NOT GOOD
205+
_ = try Regex("((\\W|\\D)*)\"").firstMatch(in: tainted) // $ redos-vulnerable=
206+
207+
// NOT GOOD
208+
_ = try Regex("((\\S|\\w)*)\"").firstMatch(in: tainted) // $ redos-vulnerable=
209+
210+
// NOT GOOD
211+
_ = try Regex("((\\S|[\\w])*)\"").firstMatch(in: tainted) // $ redos-vulnerable=
212+
213+
// NOT GOOD
214+
_ = try Regex("((1s|[\\da-z])*)\"").firstMatch(in: tainted) // $ redos-vulnerable=
215+
216+
// NOT GOOD
217+
_ = try Regex("((0|[\\d])*)\"").firstMatch(in: tainted) // $ redos-vulnerable=
218+
219+
// NOT GOOD
220+
_ = try Regex("(([\\d]+)*)\"").firstMatch(in: tainted) // $ redos-vulnerable=
221+
222+
// GOOD - there is no witness in the end that could cause the regexp to not match
223+
_ = try Regex("(\\d+(X\\d+)?)+").firstMatch(in: tainted)
224+
225+
// GOOD - there is no witness in the end that could cause the regexp to not match
226+
_ = try Regex("([0-9]+(X[0-9]*)?)*").firstMatch(in: tainted)
227+
228+
// GOOD
229+
_ = try Regex("^([^>]+)*(>|$)").firstMatch(in: tainted)
230+
231+
// NOT GOOD
232+
_ = try Regex("^([^>a]+)*(>|$)").firstMatch(in: tainted) // $ redos-vulnerable=
233+
234+
// NOT GOOD
235+
_ = try Regex("(\\n\\s*)+$").firstMatch(in: tainted) // $ redos-vulnerable=
236+
237+
// NOT GOOD
238+
_ = try Regex("^(?:\\s+|#.*|\\(\\?#[^)]*\\))*(?:[?*+]|\\{\\d+(?:,\\d*)?})").firstMatch(in: tainted) // $ redos-vulnerable=
239+
240+
// NOT GOOD
241+
_ = try Regex("\\{\\[\\s*([a-zA-Z]+)\\(([a-zA-Z]+)\\)((\\s*([a-zA-Z]+)\\: ?([ a-zA-Z{}]+),?)+)*\\s*\\]\\}").firstMatch(in: tainted) // $ redos-vulnerable=
242+
243+
// NOT GOOD
244+
_ = try Regex("(a+|b+|c+)*c").firstMatch(in: tainted) // $ redos-vulnerable=
245+
246+
// NOT GOOD
247+
_ = try Regex("(((a+a?)*)+b+)").firstMatch(in: tainted) // $ redos-vulnerable=
248+
249+
// NOT GOOD
250+
_ = try Regex("(a+)+bbbb").firstMatch(in: tainted) // $ redos-vulnerable=
251+
252+
// GOOD
253+
_ = try Regex("(a+)+aaaaa*a+").firstMatch(in: tainted)
254+
255+
// NOT GOOD
256+
_ = try Regex("(a+)+aaaaa$").firstMatch(in: tainted) // $ redos-vulnerable=
257+
258+
// GOOD
259+
_ = try Regex("(\\n+)+\\n\\n").firstMatch(in: tainted)
260+
261+
// NOT GOOD
262+
_ = try Regex("(\\n+)+\\n\\n$").firstMatch(in: tainted) // $ redos-vulnerable=
263+
264+
// NOT GOOD
265+
_ = try Regex("([^X]+)*$").firstMatch(in: tainted) // $ redos-vulnerable=
266+
267+
// NOT GOOD
268+
_ = try Regex("(([^X]b)+)*$").firstMatch(in: tainted) // $ redos-vulnerable=
269+
270+
// GOOD
271+
_ = try Regex("(([^X]b)+)*($|[^X]b)").firstMatch(in: tainted)
272+
273+
// NOT GOOD
274+
_ = try Regex("(([^X]b)+)*($|[^X]c)").firstMatch(in: tainted) // $ redos-vulnerable=
275+
276+
// GOOD
277+
_ = try Regex("((ab)+)*ababab").firstMatch(in: tainted)
278+
279+
// GOOD
280+
_ = try Regex("((ab)+)*abab(ab)*(ab)+").firstMatch(in: tainted)
281+
282+
// GOOD
283+
_ = try Regex("((ab)+)*").firstMatch(in: tainted)
284+
285+
// NOT GOOD
286+
_ = try Regex("((ab)+)*$").firstMatch(in: tainted) // $ redos-vulnerable=
287+
288+
// GOOD
289+
_ = try Regex("((ab)+)*[a1][b1][a2][b2][a3][b3]").firstMatch(in: tainted)
290+
291+
// NOT GOOD
292+
_ = try Regex("([\\n\\s]+)*(.)").firstMatch(in: tainted) // $ redos-vulnerable=
293+
294+
// GOOD - any witness passes through the accept state.
295+
_ = try Regex("(A*A*X)*").firstMatch(in: tainted)
296+
297+
// GOOD
298+
_ = try Regex("([^\\\\\\]]+)*").firstMatch(in: tainted)
299+
300+
// NOT GOOD TODO: QL evaluation times out (for test, at 5 minutes)
301+
// _ = try Regex("(\\w*foobarbaz\\w*foobarbaz\\w*foobarbaz\\w*foobarbaz\\s*foobarbaz\\d*foobarbaz\\w*)+-").firstMatch(in: tainted) // $ redos-vulnerable=
302+
303+
// NOT GOOD (but cannot currently construct a prefix)
304+
_ = try Regex("a{2,3}(b+)+X").firstMatch(in: tainted) // $ redos-vulnerable=
305+
306+
// NOT GOOD (and a good prefix test)
307+
_ = try Regex("^<(\\w+)((?:\\s+\\w+(?:\\s*=\\s*(?:(?:\"[^\"]*\")|(?:'[^']*')|[^>\\s]+))?)*)\\s*(\\/?)>").firstMatch(in: tainted) // $ redos-vulnerable=
308+
309+
// GOOD
310+
_ = try Regex("(a+)*[\\s\\S][\\s\\S][\\s\\S]?").firstMatch(in: tainted)
311+
312+
// GOOD - but we fail to see that repeating the attack string ends in the "accept any" state (due to not parsing the range `[\s\S]{2,3}`).
313+
_ = try Regex("(a+)*[\\s\\S]{2,3}").firstMatch(in: tainted) // $ SPURIOUS: redos-vulnerable=
314+
315+
// GOOD - but we spuriously conclude that a rejecting suffix exists (due to not parsing the range `[\s\S]{2,}` when constructing the NFA).
316+
_ = try Regex("(a+)*([\\s\\S]{2,}|X)$").firstMatch(in: tainted) // $ SPURIOUS: redos-vulnerable=
317+
318+
// GOOD
319+
_ = try Regex("(a+)*([\\s\\S]*|X)$").firstMatch(in: tainted)
320+
321+
// NOT GOOD
322+
_ = try Regex("((a+)*$|[\\s\\S]+)").firstMatch(in: tainted) // $ redos-vulnerable=
323+
324+
// GOOD - but still flagged. The only change compared to the above is the order of alternatives, which we don't model.
325+
_ = try Regex("([\\s\\S]+|(a+)*$)").firstMatch(in: tainted) // $ SPURIOUS: redos-vulnerable=
326+
327+
// GOOD
328+
_ = try Regex("((;|^)a+)+$").firstMatch(in: tainted)
329+
330+
// NOT GOOD (a good prefix test)
331+
_ = try Regex("(^|;)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(0|1)(e+)+f").firstMatch(in: tainted) // $ redos-vulnerable=
332+
333+
// NOT GOOD
334+
_ = try Regex("^ab(c+)+$").firstMatch(in: tainted) // $ redos-vulnerable=
335+
336+
// NOT GOOD
337+
_ = try Regex("(\\d(\\s+)*){20}").firstMatch(in: tainted) // $ redos-vulnerable=
338+
339+
// GOOD - but we spuriously conclude that a rejecting suffix exists.
340+
_ = try Regex("(([^/]|X)+)(\\/[\\s\\S]*)*$").firstMatch(in: tainted) // $ SPURIOUS: redos-vulnerable=
341+
342+
// GOOD - but we spuriously conclude that a rejecting suffix exists.
343+
_ = try Regex("^((x([^Y]+)?)*(Y|$))").firstMatch(in: tainted) // $ SPURIOUS: redos-vulnerable=
344+
345+
// NOT GOOD
346+
_ = try Regex("(a*)+b").firstMatch(in: tainted) // $ redos-vulnerable=
347+
348+
// NOT GOOD
349+
_ = try Regex("foo([\\w-]*)+bar").firstMatch(in: tainted) // $ redos-vulnerable=
350+
351+
// NOT GOOD
352+
_ = try Regex("((ab)*)+c").firstMatch(in: tainted) // $ redos-vulnerable=
353+
354+
// NOT GOOD
355+
_ = try Regex("(a?a?)*b").firstMatch(in: tainted) // $ redos-vulnerable=
356+
357+
// GOOD
358+
_ = try Regex("(a?)*b").firstMatch(in: tainted)
359+
360+
// NOT GOOD - but not detected
361+
_ = try Regex("(c?a?)*b").firstMatch(in: tainted) // $ MISSING: redos-vulnerable=
362+
363+
// NOT GOOD
364+
_ = try Regex("(?:a|a?)+b").firstMatch(in: tainted) // $ redos-vulnerable=
365+
366+
// NOT GOOD - but not detected.
367+
_ = try Regex("(a?b?)*$").firstMatch(in: tainted) // $ MISSING: redos-vulnerable=
368+
369+
// NOT GOOD
370+
_ = try Regex("PRE(([a-c]|[c-d])T(e?e?e?e?|X))+(cTcT|cTXcTX$)").firstMatch(in: tainted) // $ redos-vulnerable=
371+
372+
// NOT GOOD
373+
_ = try Regex("^((a)+\\w)+$").firstMatch(in: tainted) // $ redos-vulnerable=
374+
375+
// NOT GOOD
376+
_ = try Regex("^(b+.)+$").firstMatch(in: tainted) // $ redos-vulnerable=
377+
378+
// GOOD
379+
_ = try Regex("a*b").firstMatch(in: tainted)
380+
381+
// All 4 bad combinations of nested * and +
382+
_ = try Regex("(a*)*b").firstMatch(in: tainted) // $ redos-vulnerable=
383+
_ = try Regex("(a+)*b").firstMatch(in: tainted) // $ redos-vulnerable=
384+
_ = try Regex("(a*)+b").firstMatch(in: tainted) // $ redos-vulnerable=
385+
_ = try Regex("(a+)+b").firstMatch(in: tainted) // $ redos-vulnerable=
386+
387+
// GOOD
388+
_ = try Regex("(a|b)+").firstMatch(in: tainted)
389+
_ = try Regex("(?:[\\s;,\"'<>(){}|\\[\\]@=+*]|:(?![/\\\\]))+").firstMatch(in: tainted)
390+
391+
// TODO: investigate; these were marked `hasParseFailure`
392+
_ = try Regex("^((?:a{|-)|\\w\\{)+X$").firstMatch(in: tainted) // $ SPURIOUS: redos-vulnerable=
393+
_ = try Regex("^((?:a{0|-)|\\w\\{\\d)+X$").firstMatch(in: tainted) // $ SPURIOUS: redos-vulnerable=
394+
_ = try Regex("^((?:a{0,|-)|\\w\\{\\d,)+X$").firstMatch(in: tainted) // $ SPURIOUS: redos-vulnerable=
395+
_ = try Regex("^((?:a{0,2|-)|\\w\\{\\d,\\d)+X$").firstMatch(in: tainted) // $ SPURIOUS: redos-vulnerable=
396+
397+
// GOOD
398+
_ = try Regex("^((?:a{0,2}|-)|\\w\\{\\d,\\d\\})+X$").firstMatch(in: tainted)
399+
400+
// NOT GOOD
401+
_ = try Regex("X(\\u0061|a)*Y").firstMatch(in: tainted) // $ redos-vulnerable=
402+
403+
// GOOD
404+
_ = try Regex("X(\\u0061|b)+Y").firstMatch(in: tainted)
405+
406+
// NOT GOOD TODO: we should get this one
407+
_ = try Regex("X(\\x61|a)*Y").firstMatch(in: tainted) // $ MISSING: redos-vulnerable=
408+
409+
// GOOD
410+
_ = try Regex("X(\\x61|b)+Y").firstMatch(in: tainted)
411+
412+
// NOT GOOD TODO: we should get this one
413+
_ = try Regex("X(\\x{061}|a)*Y").firstMatch(in: tainted) // $ MISSING: redos-vulnerable=
414+
415+
// GOOD
416+
_ = try Regex("X(\\x{061}|b)+Y").firstMatch(in: tainted)
417+
418+
// NOT GOOD
419+
_ = try Regex("X(\\p{Digit}|7)*Y").firstMatch(in: tainted) // $ redos-vulnerable=
420+
421+
// GOOD
422+
_ = try Regex("X(\\p{Digit}|b)+Y").firstMatch(in: tainted)
423+
424+
// NOT GOOD
425+
_ = try Regex("X(\\P{Digit}|b)*Y").firstMatch(in: tainted) // $ redos-vulnerable=
426+
427+
// GOOD
428+
_ = try Regex("X(\\P{Digit}|7)+Y").firstMatch(in: tainted)
429+
430+
// NOT GOOD TODO: we should get this one
431+
_ = try Regex("X(\\p{IsDigit}|7)*Y").firstMatch(in: tainted) // $ MISSING: redos-vulnerable=
432+
433+
// GOOD
434+
_ = try Regex("X(\\p{IsDigit}|b)+Y").firstMatch(in: tainted)
435+
436+
// NOT GOOD - but not detected
437+
_ = try Regex("X(\\p{Alpha}|a)*Y").firstMatch(in: tainted) // $ MISSING: redos-vulnerable=
438+
439+
// GOOD
440+
_ = try Regex("X(\\p{Alpha}|7)+Y").firstMatch(in: tainted)
441+
442+
// GOOD
443+
_ = try Regex("(\"[^\"]*?\"|[^\"\\s]+)+(?=\\s*|\\s*$)").firstMatch(in: tainted)
444+
445+
// BAD
446+
_ = try Regex("/(\"[^\"]*?\"|[^\"\\s]+)+(?=\\s*|\\s*$)X").firstMatch(in: tainted) // $ redos-vulnerable=
447+
_ = try Regex("/(\"[^\"]*?\"|[^\"\\s]+)+(?=X)").firstMatch(in: tainted) // $ redos-vulnerable=
448+
449+
// BAD
450+
_ = try Regex("\\A(\\d|0)*x").firstMatch(in: tainted) // $ redos-vulnerable=
451+
_ = try Regex("(\\d|0)*\\Z").firstMatch(in: tainted) // $ redos-vulnerable=
452+
_ = try Regex("\\b(\\d|0)*x").firstMatch(in: tainted) // $ redos-vulnerable=
453+
454+
// GOOD - possessive quantifiers don't backtrack
455+
_ = try Regex("(a*+)*+b").firstMatch(in: tainted)
456+
_ = try Regex("(a*)*+b").firstMatch(in: tainted)
457+
_ = try Regex("(a*+)*b").firstMatch(in: tainted)
458+
459+
// BAD
460+
_ = try Regex("(a*)*b").firstMatch(in: tainted) // $ redos-vulnerable=
461+
462+
// BAD - but not detected due to the way possessive quantifiers are approximated
463+
_ = try Regex("((aa|a*+)b)*c").firstMatch(in: tainted) // $ MISSING: redos-vulnerable=
109464
}

0 commit comments

Comments
 (0)