@@ -4,6 +4,10 @@ private import internal.ParseRegExp
44private import codeql.NumberUtils
55private import codeql.ruby.ast.Literal as Ast
66private import codeql.Locations
7+ private import codeql.regex.nfa.NfaUtils as NfaUtils
8+ private import codeql.regex.RegexTreeView
9+ // exporting as RegexTreeView, and in the top-level scope.
10+ import Impl as RegexTreeView
711import Impl
812
913/** Gets the parse tree resulting from parsing `re`, if such has been constructed. */
@@ -52,7 +56,7 @@ private newtype TRegExpParent =
5256 }
5357
5458/** An implementation that statisfies the RegexTreeView signature. */
55- private module Impl {
59+ private module Impl implements RegexTreeViewSig {
5660 /**
5761 * An element containing a regular expression term, that is, either
5862 * a string literal (parsed as a regular expression)
@@ -1157,4 +1161,67 @@ private module Impl {
11571161 */
11581162 predicate isInverted ( ) { re .namedCharacterPropertyIsInverted ( start , end ) }
11591163 }
1164+
1165+ class Top = RegExpParent ;
1166+
1167+ /**
1168+ * Holds if `term` is an escape class representing e.g. `\d`.
1169+ * `clazz` is which character class it represents, e.g. "d" for `\d`.
1170+ */
1171+ predicate isEscapeClass ( RegExpTerm term , string clazz ) {
1172+ exists ( RegExpCharacterClassEscape escape | term = escape | escape .getValue ( ) = clazz )
1173+ or
1174+ // TODO: expand to cover more properties
1175+ exists ( RegExpNamedCharacterProperty escape | term = escape |
1176+ escape .getName ( ) .toLowerCase ( ) = "digit" and
1177+ if escape .isInverted ( ) then clazz = "D" else clazz = "d"
1178+ or
1179+ escape .getName ( ) .toLowerCase ( ) = "space" and
1180+ if escape .isInverted ( ) then clazz = "S" else clazz = "s"
1181+ or
1182+ escape .getName ( ) .toLowerCase ( ) = "word" and
1183+ if escape .isInverted ( ) then clazz = "W" else clazz = "w"
1184+ )
1185+ }
1186+
1187+ /**
1188+ * Holds if the regular expression should not be considered.
1189+ */
1190+ predicate isExcluded ( RegExpParent parent ) {
1191+ parent .( RegExpTerm ) .getRegExp ( ) .( Ast:: RegExpLiteral ) .hasFreeSpacingFlag ( ) // exclude free-spacing mode regexes
1192+ }
1193+
1194+ /**
1195+ * Holds if `term` is a possessive quantifier.
1196+ * Not currently implemented, but is used by the shared library.
1197+ */
1198+ predicate isPossessive ( RegExpQuantifier term ) { none ( ) }
1199+
1200+ /**
1201+ * Holds if the regex that `term` is part of is used in a way that ignores any leading prefix of the input it's matched against.
1202+ * Not yet implemented for Ruby.
1203+ */
1204+ predicate matchesAnyPrefix ( RegExpTerm term ) { any ( ) }
1205+
1206+ /**
1207+ * Holds if the regex that `term` is part of is used in a way that ignores any trailing suffix of the input it's matched against.
1208+ * Not yet implemented for Ruby.
1209+ */
1210+ predicate matchesAnySuffix ( RegExpTerm term ) { any ( ) }
1211+
1212+ /**
1213+ * Holds if `root` has the `i` flag for case-insensitive matching.
1214+ */
1215+ predicate isIgnoreCase ( RegExpTerm root ) {
1216+ root .isRootTerm ( ) and
1217+ root .getLiteral ( ) .isIgnoreCase ( )
1218+ }
1219+
1220+ /**
1221+ * Holds if `root` has the `s` flag for multi-line matching.
1222+ */
1223+ predicate isDotAll ( RegExpTerm root ) {
1224+ root .isRootTerm ( ) and
1225+ root .getLiteral ( ) .isDotAll ( )
1226+ }
11601227}
0 commit comments