@@ -877,6 +877,101 @@ predicate isStartState(State state) {
877877 */
878878signature predicate isCandidateSig ( State state , string pump ) ;
879879
880+ /**
881+ * Holds if `state` is a candidate for ReDoS.
882+ */
883+ signature predicate isCandidateSig ( State state ) ;
884+
885+ /**
886+ * Predicates for constructing a prefix string that leads to a given state.
887+ */
888+ module PrefixConstruction< isCandidateSig / 1 isCandidate> {
889+ /**
890+ * Holds if `state` is the textually last start state for the regular expression.
891+ */
892+ private predicate lastStartState ( State state ) {
893+ exists ( RegExpRoot root |
894+ state =
895+ max ( State s , Location l |
896+ s = stateInPumpableRegexp ( ) and
897+ isStartState ( s ) and
898+ getRoot ( s .getRepr ( ) ) = root and
899+ l = s .getRepr ( ) .getLocation ( )
900+ |
901+ s
902+ order by
903+ l .getStartLine ( ) , l .getStartColumn ( ) , s .getRepr ( ) .toString ( ) , l .getEndColumn ( ) ,
904+ l .getEndLine ( )
905+ )
906+ )
907+ }
908+
909+ /**
910+ * Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
911+ */
912+ private predicate existsTransition ( State a , State b ) { delta ( a , _, b ) }
913+
914+ /**
915+ * Gets the minimum number of transitions it takes to reach `state` from the `start` state.
916+ */
917+ int prefixLength ( State start , State state ) =
918+ shortestDistances( lastStartState / 1 , existsTransition / 2 ) ( start , state , result )
919+
920+ /**
921+ * Gets the minimum number of transitions it takes to reach `state` from the start state.
922+ */
923+ private int lengthFromStart ( State state ) { result = prefixLength ( _, state ) }
924+
925+ /**
926+ * Gets a string for which the regular expression will reach `state`.
927+ *
928+ * Has at most one result for any given `state`.
929+ * This predicate will not always have a result even if there is a ReDoS issue in
930+ * the regular expression.
931+ */
932+ string prefix ( State state ) {
933+ lastStartState ( state ) and
934+ result = ""
935+ or
936+ // the search stops past the last redos candidate state.
937+ lengthFromStart ( state ) <= max ( lengthFromStart ( any ( State s | isCandidate ( s ) ) ) ) and
938+ exists ( State prev |
939+ // select a unique predecessor (by an arbitrary measure)
940+ prev =
941+ min ( State s , Location loc |
942+ lengthFromStart ( s ) = lengthFromStart ( state ) - 1 and
943+ loc = s .getRepr ( ) .getLocation ( ) and
944+ delta ( s , _, state )
945+ |
946+ s
947+ order by
948+ loc .getStartLine ( ) , loc .getStartColumn ( ) , loc .getEndLine ( ) , loc .getEndColumn ( ) ,
949+ s .getRepr ( ) .toString ( )
950+ )
951+ |
952+ // greedy search for the shortest prefix
953+ result = prefix ( prev ) and delta ( prev , Epsilon ( ) , state )
954+ or
955+ not delta ( prev , Epsilon ( ) , state ) and
956+ result = prefix ( prev ) + getCanonicalEdgeChar ( prev , state )
957+ )
958+ }
959+
960+ /**
961+ * Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
962+ */
963+ private string getCanonicalEdgeChar ( State prev , State next ) {
964+ result =
965+ min ( string c | delta ( prev , any ( InputSymbol symbol | c = intersect ( Any ( ) , symbol ) ) , next ) )
966+ }
967+
968+ /** Gets a state within a regular expression that has a pumpable state. */
969+ pragma [ noinline]
970+ State stateInPumpableRegexp ( ) {
971+ exists ( State s | isCandidate ( s ) | getRoot ( s .getRepr ( ) ) = getRoot ( result .getRepr ( ) ) )
972+ }
973+ }
974+
880975/**
881976 * A module for pruning candidate ReDoS states.
882977 * The candidates are specified by the `isCandidate` signature predicate.
@@ -910,95 +1005,9 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
9101005 /** Gets a state that can reach the `accept-any` state using only epsilon steps. */
9111006 private State acceptsAnySuffix ( ) { epsilonSucc * ( result ) = AcceptAnySuffix ( _) }
9121007
913- /**
914- * Predicates for constructing a prefix string that leads to a given state.
915- */
916- private module PrefixConstruction {
917- /**
918- * Holds if `state` is the textually last start state for the regular expression.
919- */
920- private predicate lastStartState ( State state ) {
921- exists ( RegExpRoot root |
922- state =
923- max ( State s , Location l |
924- s = stateInPumpableRegexp ( ) and
925- isStartState ( s ) and
926- getRoot ( s .getRepr ( ) ) = root and
927- l = s .getRepr ( ) .getLocation ( )
928- |
929- s
930- order by
931- l .getStartLine ( ) , l .getStartColumn ( ) , s .getRepr ( ) .toString ( ) , l .getEndColumn ( ) ,
932- l .getEndLine ( )
933- )
934- )
935- }
1008+ predicate isCandidateState ( State s ) { isReDoSCandidate ( s , _) }
9361009
937- /**
938- * Holds if there exists any transition (Epsilon() or other) from `a` to `b`.
939- */
940- private predicate existsTransition ( State a , State b ) { delta ( a , _, b ) }
941-
942- /**
943- * Gets the minimum number of transitions it takes to reach `state` from the `start` state.
944- */
945- int prefixLength ( State start , State state ) =
946- shortestDistances( lastStartState / 1 , existsTransition / 2 ) ( start , state , result )
947-
948- /**
949- * Gets the minimum number of transitions it takes to reach `state` from the start state.
950- */
951- private int lengthFromStart ( State state ) { result = prefixLength ( _, state ) }
952-
953- /**
954- * Gets a string for which the regular expression will reach `state`.
955- *
956- * Has at most one result for any given `state`.
957- * This predicate will not always have a result even if there is a ReDoS issue in
958- * the regular expression.
959- */
960- string prefix ( State state ) {
961- lastStartState ( state ) and
962- result = ""
963- or
964- // the search stops past the last redos candidate state.
965- lengthFromStart ( state ) <= max ( lengthFromStart ( any ( State s | isReDoSCandidate ( s , _) ) ) ) and
966- exists ( State prev |
967- // select a unique predecessor (by an arbitrary measure)
968- prev =
969- min ( State s , Location loc |
970- lengthFromStart ( s ) = lengthFromStart ( state ) - 1 and
971- loc = s .getRepr ( ) .getLocation ( ) and
972- delta ( s , _, state )
973- |
974- s
975- order by
976- loc .getStartLine ( ) , loc .getStartColumn ( ) , loc .getEndLine ( ) , loc .getEndColumn ( ) ,
977- s .getRepr ( ) .toString ( )
978- )
979- |
980- // greedy search for the shortest prefix
981- result = prefix ( prev ) and delta ( prev , Epsilon ( ) , state )
982- or
983- not delta ( prev , Epsilon ( ) , state ) and
984- result = prefix ( prev ) + getCanonicalEdgeChar ( prev , state )
985- )
986- }
987-
988- /**
989- * Gets a canonical char for which there exists a transition from `prev` to `next` in the NFA.
990- */
991- private string getCanonicalEdgeChar ( State prev , State next ) {
992- result =
993- min ( string c | delta ( prev , any ( InputSymbol symbol | c = intersect ( Any ( ) , symbol ) ) , next ) )
994- }
995-
996- /** Gets a state within a regular expression that has a pumpable state. */
997- pragma [ noinline]
998- State stateInPumpableRegexp ( ) {
999- exists ( State s | isReDoSCandidate ( s , _) | getRoot ( s .getRepr ( ) ) = getRoot ( result .getRepr ( ) ) )
1000- }
1001- }
1010+ import PrefixConstruction< isCandidateState / 1 > as Prefix
10021011
10031012 /**
10041013 * Predicates for testing the presence of a rejecting suffix.
@@ -1018,8 +1027,6 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
10181027 * using epsilon transitions. But any attempt at repeating `w` will end in a state that accepts all suffixes.
10191028 */
10201029 private module SuffixConstruction {
1021- import PrefixConstruction
1022-
10231030 /**
10241031 * Holds if all states reachable from `fork` by repeating `w`
10251032 * are likely rejectable by appending some suffix.
@@ -1036,7 +1043,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
10361043 */
10371044 pragma [ noinline]
10381045 private predicate isLikelyRejectable ( State s ) {
1039- s = stateInPumpableRegexp ( ) and
1046+ s = Prefix :: stateInPumpableRegexp ( ) and
10401047 (
10411048 // exists a reject edge with some char.
10421049 hasRejectEdge ( s )
@@ -1052,15 +1059,15 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
10521059 * Holds if `s` is not an accept state, and there is no epsilon transition to an accept state.
10531060 */
10541061 predicate isRejectState ( State s ) {
1055- s = stateInPumpableRegexp ( ) and not epsilonSucc * ( s ) = Accept ( _)
1062+ s = Prefix :: stateInPumpableRegexp ( ) and not epsilonSucc * ( s ) = Accept ( _)
10561063 }
10571064
10581065 /**
10591066 * Holds if there is likely a non-empty suffix leading to rejection starting in `s`.
10601067 */
10611068 pragma [ noopt]
10621069 predicate hasEdgeToLikelyRejectable ( State s ) {
1063- s = stateInPumpableRegexp ( ) and
1070+ s = Prefix :: stateInPumpableRegexp ( ) and
10641071 // all edges (at least one) with some char leads to another state that is rejectable.
10651072 // the `next` states might not share a common suffix, which can cause FPs.
10661073 exists ( string char | char = hasEdgeToLikelyRejectableHelper ( s ) |
@@ -1076,7 +1083,7 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
10761083 */
10771084 pragma [ noinline]
10781085 private string hasEdgeToLikelyRejectableHelper ( State s ) {
1079- s = stateInPumpableRegexp ( ) and
1086+ s = Prefix :: stateInPumpableRegexp ( ) and
10801087 not hasRejectEdge ( s ) and
10811088 not isRejectState ( s ) and
10821089 deltaClosedChar ( s , result , _)
@@ -1088,8 +1095,8 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
10881095 * `prev` to `next` that the character symbol `char`.
10891096 */
10901097 predicate deltaClosedChar ( State prev , string char , State next ) {
1091- prev = stateInPumpableRegexp ( ) and
1092- next = stateInPumpableRegexp ( ) and
1098+ prev = Prefix :: stateInPumpableRegexp ( ) and
1099+ next = Prefix :: stateInPumpableRegexp ( ) and
10931100 deltaClosed ( prev , getAnInputSymbolMatchingRelevant ( char ) , next )
10941101 }
10951102
@@ -1208,12 +1215,12 @@ module ReDoSPruning<isCandidateSig/2 isCandidate> {
12081215 predicate hasReDoSResult ( RegExpTerm t , string pump , State s , string prefixMsg ) {
12091216 isReDoSAttackable ( t , pump , s ) and
12101217 (
1211- prefixMsg = "starting with '" + escape ( PrefixConstruction :: prefix ( s ) ) + "' and " and
1212- not PrefixConstruction :: prefix ( s ) = ""
1218+ prefixMsg = "starting with '" + escape ( Prefix :: prefix ( s ) ) + "' and " and
1219+ not Prefix :: prefix ( s ) = ""
12131220 or
1214- PrefixConstruction :: prefix ( s ) = "" and prefixMsg = ""
1221+ Prefix :: prefix ( s ) = "" and prefixMsg = ""
12151222 or
1216- not exists ( PrefixConstruction :: prefix ( s ) ) and prefixMsg = ""
1223+ not exists ( Prefix :: prefix ( s ) ) and prefixMsg = ""
12171224 )
12181225 }
12191226
0 commit comments