@@ -2,6 +2,7 @@ private import codeql_ql.ast.Ast
22private import internal.NodesInternal
33private import internal.DataFlowNumbering
44private import internal.LocalFlow as LocalFlow
5+ private import internal.GlobalFlow as GlobalFlow
56
67/**
78 * An expression or variable in a formula, including some additional nodes
@@ -10,6 +11,8 @@ private import internal.LocalFlow as LocalFlow
1011 * Nodes that are locally bound together by equalities are clustered into a "super node",
1112 * which can be accessed using `getSuperNode()`. There is usually no reason to use `Node` directly
1213 * other than to reason about what kind of node is contained in a super node.
14+ *
15+ * To reason about global data flow, use `SuperNode.track()`.
1316 */
1417class Node extends TNode {
1518 string toString ( ) { none ( ) } // overridden in subclasses
@@ -31,6 +34,8 @@ class Node extends TNode {
3134 /**
3235 * Gets the collection of data-flow nodes locally bound by equalities, represented
3336 * by a "super node".
37+ *
38+ * Super nodes are the medium through which to propagate data-flow information globally.
3439 */
3540 SuperNode getSuperNode ( ) { result .getANode ( ) = this }
3641}
@@ -224,6 +229,8 @@ Node fieldNode(Predicate pred, FieldDecl fieldDecl) {
224229
225230/**
226231 * A collection of data-flow nodes in the same predicate, locally bound by equalities.
232+ *
233+ * To reason about global data flow, use `SuperNode.track()`.
227234 */
228235class SuperNode extends LocalFlow:: TSuperNode {
229236 private int repr ;
@@ -282,10 +289,154 @@ class SuperNode extends LocalFlow::TSuperNode {
282289 result = this .getALocalMemberCall ( ) and
283290 result .getMemberName ( ) = name
284291 }
292+
293+ /**
294+ * Gets a node that this node may "flow to" after one step.
295+ *
296+ * Basic usage of `track()` to track some expressions looks like this:
297+ * ```
298+ * DataFlow::SuperNode myThing(DataFlow::Tracker t) {
299+ * t.start() and
300+ * result = DataFlow::superNode(< some ast node >)
301+ * or
302+ * exists (DataFlow::Tracker t2 |
303+ * result = myThing(t2).track(t2, t)
304+ * )
305+ * }
306+ *
307+ * DataFlow::SuperNode myThing() { result = myThing(DataFlow::Tracker::end()) }
308+ * ```
309+ */
310+ pragma [ inline]
311+ SuperNode track ( Tracker t1 , Tracker t2 ) {
312+ // Return state -> return state
313+ // Store the return edge in t2
314+ not t1 .hasCall ( ) and
315+ GlobalFlow:: directedEdgeSuper ( result , this , t2 )
316+ or
317+ // Call state or initial state -> call state
318+ t1 .hasCallOrIsStart ( ) and
319+ t2 .hasCall ( ) and
320+ GlobalFlow:: directedEdgeSuper ( this , result , _)
321+ or
322+ // Return state -> call state
323+ // The last-used return edge must not be used as the initial call edge
324+ // (doing so would allow returning out of a disjunction and into another branch of that disjunction)
325+ not t1 .hasCall ( ) and
326+ t2 .hasCall ( ) and
327+ exists ( GlobalFlow:: EdgeLabel edge |
328+ GlobalFlow:: directedEdgeSuper ( this , result , edge ) and
329+ edge != t1
330+ )
331+ }
332+
333+ /**
334+ * Gets node containing a string flowing to this node via `t`.
335+ */
336+ cached
337+ private string getAStringValue ( Tracker t ) {
338+ t .start ( ) and
339+ result = asAstNode ( ) .( String ) .getValue ( )
340+ or
341+ exists ( SuperNode pred , Tracker t2 |
342+ this = pred .track ( t2 , t ) and
343+ result = pred .getAStringValue ( t2 )
344+ )
345+ or
346+ // Step through calls to a few built-ins that don't cause a blow-up
347+ exists ( SuperNode pred , string methodName , string oldValue |
348+ this .asAstNode ( ) = pred .getALocalMemberCall ( methodName ) and
349+ oldValue = pred .getAStringValue ( t )
350+ |
351+ methodName = "toLowerCase" and
352+ result = oldValue .toLowerCase ( )
353+ or
354+ methodName = "toUpperCase" and
355+ result = oldValue .toUpperCase ( )
356+ )
357+ }
358+
359+ /** Gets a string constant that may flow here (possibly from a caller context). */
360+ pragma [ inline]
361+ string getAStringValue ( ) { result = this .getAStringValue ( Tracker:: end ( ) ) }
362+
363+ /** Gets a string constant that may flow here, possibly out of callees, but not from caller contexts. */
364+ pragma [ inline]
365+ string getAStringValueNoCall ( ) { result = this .getAStringValue ( Tracker:: endNoCall ( ) ) }
366+
367+ /**
368+ * Gets a string constant that may flow here, which can safely be combined with another
369+ * value that was tracked here with `otherT`.
370+ *
371+ * This is under-approximate and will fail to accept valid matches when both values
372+ * came in from the same chain of calls.
373+ */
374+ bindingset [ otherT]
375+ string getAStringValueForContext ( Tracker otherT ) {
376+ exists ( Tracker stringT |
377+ result = this .getAStringValue ( stringT ) and
378+ otherT .isSafeToCombineWith ( stringT )
379+ )
380+ }
285381}
286382
287383/** Gets the super node for the given AST node. */
288384pragma [ inline]
289385SuperNode superNode ( AstNode node ) {
290386 result = astNode ( node ) .getSuperNode ( )
291387}
388+
389+ /**
390+ * A summary of the steps needed to reach a node in the global data flow graph,
391+ * to be used in combination with `SuperNode.track`.
392+ */
393+ class Tracker extends GlobalFlow:: TEdgeLabelOrTrackerState {
394+ /** Holds if this is the starting point, that is, the summary of the empty path. */
395+ predicate start ( ) { this = GlobalFlow:: MkNoEdge ( ) }
396+
397+ /** Holds if a call step has been used (possibly preceeded by return steps). */
398+ predicate hasCall ( ) { this = GlobalFlow:: MkHasCall ( ) }
399+
400+ /** Holds if either `start()` or `hasCall()` holds */
401+ predicate hasCallOrIsStart ( ) { this .start ( ) or this .hasCall ( ) }
402+
403+ /**
404+ * Holds if the two trackers are safe to combine, in the sense that
405+ * they don't make contradictory assumptions what context they're in.
406+ *
407+ * This is approximate and will reject any pair of trackers that have
408+ * both used a call or locally came from the same disjunction.
409+ */
410+ pragma [ inline]
411+ predicate isSafeToCombineWith ( Tracker other ) {
412+ not (
413+ // Both values came from a call, they could come from different call sites.
414+ this .hasCall ( ) and
415+ other .hasCall ( )
416+ or
417+ // Both values came from the same disjunction, they could come from different branches.
418+ this = other and
419+ this instanceof GlobalFlow:: MkDisjunction
420+ )
421+ }
422+
423+ /** Gets a string representation of this element. */
424+ string toString ( ) {
425+ this instanceof GlobalFlow:: MkNoEdge and
426+ result = "Tracker in initial state"
427+ or
428+ this instanceof GlobalFlow:: MkHasCall and
429+ result = "Tracker with calls"
430+ or
431+ this instanceof GlobalFlow:: EdgeLabel and
432+ result = "Tracker with return step out of " + this .( GlobalFlow:: EdgeLabel ) .toString ( )
433+ }
434+ }
435+
436+ module Tracker {
437+ /** Gets a valid end-point for tracking. */
438+ Tracker end ( ) { any ( ) }
439+
440+ /** Gets a valid end-point for tracking where no calls were used. */
441+ Tracker endNoCall ( ) { not result .hasCall ( ) }
442+ }
0 commit comments