Skip to content

Commit 9d9abaf

Browse files
aibaarsyoff
andcommitted
Apply suggestions from code review
Co-authored-by: yoff <lerchedahl@gmail.com>
1 parent 69ed121 commit 9d9abaf

4 files changed

Lines changed: 34 additions & 26 deletions

File tree

python/ql/lib/semmle/python/RegexTreeView.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ newtype TRegExpParent =
4040
TRegExpSpecialChar(Regex re, int start, int end) { re.specialCharacter(start, end, _) } or
4141
/** A normal character */
4242
TRegExpNormalChar(Regex re, int start, int end) {
43-
re.normalCharacterSequence(start, end)
43+
re.simpleCharacterSequence(start, end)
4444
or
4545
re.escapedCharacter(start, end) and
4646
not re.specialCharacter(start, end, _)

python/ql/lib/semmle/python/regex.qll

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -447,18 +447,22 @@ abstract class RegexString extends Expr {
447447
}
448448

449449
/**
450-
* A sequence of 'normal' characters.
450+
* A sequence of 'simple' characters.
451451
*/
452-
predicate normalCharacterSequence(int start, int end) {
453-
this.normalCharacter(start, end) and
454-
end = start + 1 and
455-
exists(int x, int y | this.charSet(x, y) and x <= start and y >= end)
452+
predicate simpleCharacterSequence(int start, int end) {
453+
// a simple character inside a character set is interpreted on its own
454+
this.simpleCharacter(start, end) and
455+
this.inCharSet(start)
456456
or
457+
// a maximal run of simple characters is considered as one constant
457458
exists(int s, int e |
458-
e = max(int i | normalCharacterSub(s, i)) and
459-
not exists(int x, int y | this.charSet(x, y) and x <= s and y >= e)
459+
e = max(int i | simpleCharacterRun(s, i)) and
460+
not this.inCharSet(s)
460461
|
461-
if qualifier(e, _, _, _)
462+
// 'abc' can be considered one constant, but
463+
// 'abc+' has to be broken up into 'ab' and 'c+',
464+
// as the qualifier only applies to 'c'.
465+
if this.qualifier(e, _, _, _)
462466
then
463467
end = e and start = e - 1
464468
or
@@ -470,17 +474,17 @@ abstract class RegexString extends Expr {
470474
)
471475
}
472476

473-
private predicate normalCharacterSub(int start, int end) {
477+
private predicate simpleCharacterRun(int start, int end) {
474478
(
475-
normalCharacterSub(start, end - 1)
479+
simpleCharacterRun(start, end - 1)
476480
or
477481
start = end - 1 and not normalCharacter(start - 1, start)
478482
) and
479-
this.normalCharacter(end - 1, end)
483+
this.simpleCharacter(end - 1, end)
480484
}
481485

482486
private predicate characterItem(int start, int end) {
483-
this.normalCharacterSequence(start, end) or
487+
this.simpleCharacterSequence(start, end) or
484488
this.escapedCharacter(start, end) or
485489
this.specialCharacter(start, end, _)
486490
}

ruby/ql/lib/codeql/ruby/security/performance/ParseRegExp.qll

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -402,18 +402,22 @@ class RegExp extends AST::RegExpLiteral {
402402
}
403403

404404
/**
405-
* A sequence of 'normal' characters.
405+
* A sequence of 'simple' characters.
406406
*/
407-
predicate normalCharacterSequence(int start, int end) {
408-
this.normalCharacter(start, end) and
409-
end = start + 1 and
410-
exists(int x, int y | this.charSet(x, y) and x <= start and y >= end)
407+
predicate simpleCharacterSequence(int start, int end) {
408+
// a simple character inside a character set is interpreted on its own
409+
this.simpleCharacter(start, end) and
410+
this.inCharSet(start)
411411
or
412+
// a maximal run of simple characters is considered as one constant
412413
exists(int s, int e |
413-
e = max(int i | normalCharacterSub(s, i)) and
414-
not exists(int x, int y | this.charSet(x, y) and x <= s and y >= e)
414+
e = max(int i | simpleCharacterRun(s, i)) and
415+
not this.inCharSet(s)
415416
|
416-
if qualifier(e, _, _, _)
417+
// 'abc' can be considered one constant, but
418+
// 'abc+' has to be broken up into 'ab' and 'c+',
419+
// as the qualifier only applies to 'c'.
420+
if this.qualifier(e, _, _, _)
417421
then
418422
end = e and start = e - 1
419423
or
@@ -425,17 +429,17 @@ class RegExp extends AST::RegExpLiteral {
425429
)
426430
}
427431

428-
private predicate normalCharacterSub(int start, int end) {
432+
private predicate simpleCharacterRun(int start, int end) {
429433
(
430-
normalCharacterSub(start, end - 1)
434+
simpleCharacterRun(start, end - 1)
431435
or
432436
start = end - 1 and not normalCharacter(start - 1, start)
433437
) and
434-
this.normalCharacter(end - 1, end)
438+
this.simpleCharacter(end - 1, end)
435439
}
436440

437441
private predicate characterItem(int start, int end) {
438-
this.normalCharacterSequence(start, end) or
442+
this.simpleCharacterSequence(start, end) or
439443
this.escapedCharacter(start, end) or
440444
this.specialCharacter(start, end, _)
441445
}

ruby/ql/lib/codeql/ruby/security/performance/RegExpTreeView.qll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ newtype TRegExpParent =
229229
TRegExpGroup(RegExp re, int start, int end) { re.group(start, end) } or
230230
TRegExpSpecialChar(RegExp re, int start, int end) { re.specialCharacter(start, end, _) } or
231231
TRegExpNormalChar(RegExp re, int start, int end) {
232-
re.normalCharacterSequence(start, end)
232+
re.simpleCharacterSequence(start, end)
233233
or
234234
re.escapedCharacter(start, end) and
235235
not re.specialCharacter(start, end, _)

0 commit comments

Comments
 (0)