Skip to content

Commit df9efbe

Browse files
committed
get mimimal def nodes to work in python
1 parent 52ca0d1 commit df9efbe

4 files changed

Lines changed: 259 additions & 4 deletions

File tree

python/ql/lib/semmle/python/ApiGraphs.qll

Lines changed: 217 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,30 @@ module API {
3939
)
4040
}
4141

42+
/**
43+
* Gets a data-flow node corresponding to the right-hand side of a definition of the API
44+
* component represented by this node.
45+
*
46+
* For example, in the property write `foo.bar = x`, variable `x` is the the right-hand side
47+
* of a write to the `bar` property of `foo`.
48+
*
49+
* Note that for parameters, it is the arguments flowing into that parameter that count as
50+
* right-hand sides of the definition, not the declaration of the parameter itself.
51+
* Consequently, in :
52+
* ```python
53+
* from mypkg import foo;
54+
* foo.bar(x)
55+
* ```
56+
* `x` is the right-hand side of a definition of the first parameter of `bar` from the `mypkg.foo` module.
57+
*/
58+
DataFlow::Node getARhs() { Impl::rhs(this, result) }
59+
60+
/**
61+
* Gets a data-flow node that may interprocedurally flow to the right-hand side of a definition
62+
* of the API component represented by this node.
63+
*/
64+
DataFlow::Node getAValueReachingRhs() { result = Impl::trackDefNode(this.getARhs()) }
65+
4266
/**
4367
* Gets an immediate use of the API component represented by this node.
4468
*
@@ -55,7 +79,7 @@ module API {
5579
/**
5680
* Gets a call to the function represented by this API component.
5781
*/
58-
DataFlow::CallCfgNode getACall() { result = this.getReturn().getAnImmediateUse() }
82+
DataFlow::CallCfgNode getACall() { result = this.getReturn().getAnImmediateUse() } // TODO: Make a API::CallNode.
5983

6084
/**
6185
* Gets a node representing member `m` of this API component.
@@ -92,6 +116,27 @@ module API {
92116
*/
93117
Node getReturn() { result = this.getASuccessor(Label::return()) }
94118

119+
/**
120+
* Gets a node representing the `i`th parameter of the function represented by this node.
121+
*
122+
* This predicate may have multiple results when there are multiple invocations of this API component.
123+
* Consider using `getAnInvocation()` if there is a need to distingiush between individual calls.
124+
*/
125+
Node getParameter(int i) { result = this.getASuccessor(Label::parameter(i)) }
126+
127+
/**
128+
* Gets the number of parameters of the function represented by this node.
129+
*/
130+
int getNumParameter() { result = max(int s | exists(this.getParameter(s))) + 1 }
131+
132+
/**
133+
* Gets a node representing the last parameter of the function represented by this node.
134+
*
135+
* This predicate may have multiple results when there are multiple invocations of this API component.
136+
* Consider using `getACall()` if there is a need to distingiush between individual calls.
137+
*/
138+
Node getLastParameter() { result = this.getParameter(this.getNumParameter() - 1) }
139+
95140
/**
96141
* Gets a node representing a subclass of the class represented by this node.
97142
*/
@@ -137,7 +182,7 @@ module API {
137182
/**
138183
* Gets the data-flow node that gives rise to this node, if any.
139184
*/
140-
DataFlow::Node getInducingNode() { this = Impl::MkUse(result) }
185+
DataFlow::Node getInducingNode() { this = Impl::MkUse(result) or this = Impl::MkDef(result) }
141186

142187
/**
143188
* Holds if this element is at the specified location.
@@ -210,6 +255,17 @@ module API {
210255
}
211256
}
212257

258+
/** A node corresponding to the rhs of an API component. */
259+
class Def extends Node, Impl::TDef {
260+
override string toString() {
261+
exists(string type | this = Impl::MkDef(_) and type = "Def " |
262+
result = type + this.getPath()
263+
or
264+
not exists(this.getPath()) and result = type + "with no path"
265+
)
266+
}
267+
}
268+
213269
/** Gets the root node. */
214270
Root root() { any() }
215271

@@ -325,10 +381,13 @@ module API {
325381
name = "builtins"
326382
} or
327383
/** A use of an API member at the node `nd`. */
328-
MkUse(DataFlow::Node nd) { use(_, _, nd) }
384+
MkUse(DataFlow::Node nd) { use(_, _, nd) } or
385+
MkDef(DataFlow::Node nd) { rhs(_, _, nd) }
329386

330387
class TUse = MkModuleImport or MkUse;
331388

389+
class TDef = MkDef;
390+
332391
/**
333392
* Holds if the dotted module name `sub` refers to the `member` member of `base`.
334393
*
@@ -381,6 +440,77 @@ module API {
381440
)
382441
}
383442

443+
/**
444+
* Holds if `rhs` is the right-hand side of a definition of a node that should have an
445+
* incoming edge from `base` labeled `lbl` in the API graph.
446+
*/
447+
cached
448+
predicate rhs(TApiNode base, Label::ApiLabel lbl, DataFlow::Node rhs) {
449+
/*
450+
* exists(string m, string prop | // TODO: Figure out module exports in Python
451+
* base = MkModuleExport(m) and
452+
* lbl = Label::member(prop) and
453+
* exports(m, prop, rhs)
454+
* )
455+
* or
456+
*/
457+
458+
exists(DataFlow::Node def, DataFlow::LocalSourceNode pred |
459+
rhs(base, def) and pred = trackDefNode(def)
460+
|
461+
// from `x` to a definition of `x.prop`
462+
exists(DataFlow::AttrWrite pw | pw = pred.getAnAttributeWrite() |
463+
lbl = Label::memberFromRef(pw) and
464+
rhs = pw.getValue()
465+
)
466+
// or
467+
// special case: from `require('m')` to an export of `prop` in `m`
468+
// TODO: Figure out if this is needed.
469+
/*
470+
* exists(Import imp, Module m, string prop |
471+
* pred = imp.getImportedModuleNode() and
472+
* m = imp.getImportedModule() and
473+
* lbl = Label::member(prop) and
474+
* rhs = m.getAnExportedValue(prop)
475+
* )
476+
* or
477+
* // TODO:
478+
* exists(DataFlow::FunctionNode fn | fn = pred |
479+
* not fn.getFunction().isAsync() and
480+
* lbl = Label::return() and
481+
* rhs = fn.getAReturn()
482+
* )
483+
* or
484+
* lbl = Label::promised() and
485+
* PromiseFlow::storeStep(rhs, pred, Promises::valueProp())
486+
*/
487+
488+
)
489+
or
490+
/*
491+
* or // TODO:
492+
* exists(DataFlow::FunctionNode f |
493+
* base = MkAsyncFuncResult(f) and
494+
* lbl = Label::promised() and
495+
* rhs = f.getAReturn()
496+
* )
497+
*/
498+
499+
exists(int i |
500+
lbl = Label::parameter(i) and
501+
argumentPassing(base, i, rhs)
502+
)
503+
/*
504+
* or // TODO:
505+
* exists(DataFlow::SourceNode src, DataFlow::PropWrite pw |
506+
* use(base, src) and pw = trackUseNode(src).getAPropertyWrite() and rhs = pw.getRhs()
507+
* |
508+
* lbl = Label::memberFromRef(pw)
509+
* )
510+
*/
511+
512+
}
513+
384514
/**
385515
* Holds if `ref` is a use of a node that should have an incoming edge from `base` labeled
386516
* `lbl` in the API graph.
@@ -419,6 +549,21 @@ module API {
419549
)
420550
)
421551
or
552+
exists(DataFlow::Node def, CallableExpr fn |
553+
rhs(base, def) and fn = trackDefNode(def).asExpr()
554+
|
555+
exists(int i |
556+
lbl = Label::parameter(i) and
557+
ref.asExpr() = fn.getInnerScope().getArg(i)
558+
)
559+
/*
560+
* or // TODO: Figure out self.
561+
* lbl = Label::receiver() and
562+
* ref = fn.getReceiver()
563+
*/
564+
565+
)
566+
or
422567
// Built-ins, treated as members of the module `builtins`
423568
base = MkModuleImport("builtins") and
424569
lbl = Label::member(any(string name | ref = Builtins::likelyBuiltin(name)))
@@ -466,6 +611,53 @@ module API {
466611
exists(DataFlow::TypeTracker t2 | result = trackUseNode(src, t2).track(t2, t))
467612
}
468613

614+
/**
615+
* Holds if `arg` is passed as the `i`th argument to a use of `base`, either by means of a
616+
* full invocation, or in a partial function application.
617+
*
618+
* The receiver is considered to be argument -1.
619+
*/
620+
private predicate argumentPassing(TApiNode base, int i, DataFlow::Node arg) {
621+
exists(DataFlow::Node use, DataFlow::LocalSourceNode pred |
622+
use(base, use) and pred = trackUseNode(use, _)
623+
|
624+
arg = pred.getACall().getArg(i)
625+
/*
626+
* or // TODO: Figure out self in argument.
627+
* arg = pred.getACall().getReceiver() and
628+
* i = -1
629+
*/
630+
631+
)
632+
}
633+
634+
/**
635+
* Gets a node that inter-procedurally flows into `nd`, which is a definition of some node.
636+
*/
637+
cached
638+
DataFlow::LocalSourceNode trackDefNode(DataFlow::Node nd) {
639+
result = trackDefNode(nd, DataFlow::TypeBackTracker::end())
640+
}
641+
642+
private DataFlow::LocalSourceNode trackDefNode(DataFlow::Node nd, DataFlow::TypeBackTracker t) {
643+
t.start() and
644+
rhs(_, nd) and
645+
result = nd.getALocalSource()
646+
or
647+
// TODO: Figure out module exports in Python, and if this thing is needed.
648+
// additional backwards step from `require('m')` to `exports` or `module.exports` in m
649+
/*
650+
* exists(Import imp | imp.getImportedModuleNode() = trackDefNode(nd, t.continue()) |
651+
* result = DataFlow::exportsVarNode(imp.getImportedModule())
652+
* or
653+
* result = DataFlow::moduleVarNode(imp.getImportedModule()).getAPropertyRead("exports")
654+
* )
655+
* or
656+
*/
657+
658+
exists(DataFlow::TypeBackTracker t2 | result = trackDefNode(nd, t2).backtrack(t2, t))
659+
}
660+
469661
/**
470662
* Gets a data-flow node to which `src`, which is a use of an API-graph node, flows.
471663
*
@@ -477,6 +669,16 @@ module API {
477669
not result instanceof DataFlow::ModuleVariableNode
478670
}
479671

672+
/**
673+
* Holds if `rhs` is the right-hand side of a definition of node `nd`.
674+
*/
675+
cached
676+
predicate rhs(TApiNode nd, DataFlow::Node rhs) {
677+
// exists(string m | nd = MkModuleExport(m) | exports(m, rhs)) // TODO: Figure out module exported in Py.
678+
// or
679+
nd = MkDef(rhs)
680+
}
681+
480682
/**
481683
* Holds if there is an edge from `pred` to `succ` in the API graph that is labeled with `lbl`.
482684
*/
@@ -503,6 +705,11 @@ module API {
503705
use(pred, lbl, ref) and
504706
succ = MkUse(ref)
505707
)
708+
or
709+
exists(DataFlow::Node rhs |
710+
rhs(pred, lbl, rhs) and
711+
succ = MkDef(rhs)
712+
)
506713
}
507714

508715
/**
@@ -539,7 +746,9 @@ module API {
539746
} or
540747
MkLabelUnknownMember() or
541748
MkLabelParameter(int i) {
542-
none() // TODO: Fill in when adding def nodes
749+
exists(any(DataFlow::CallCfgNode c).getArg(i))
750+
or
751+
i = [-1 .. 10] // TODO: Def nodes, figure out how to make this prettier.
543752
} or
544753
MkLabelReturn() or
545754
MkLabelSubclass() or
@@ -582,6 +791,7 @@ module API {
582791

583792
LabelParameter() { this = MkLabelParameter(i) }
584793

794+
// TODO: Named parameters, spread arguments.
585795
override string toString() { result = "getParameter(" + i + ")" }
586796

587797
/** Gets the index of the parameter for this label. */
@@ -627,6 +837,9 @@ module API {
627837
result = unknownMember()
628838
}
629839

840+
/** Gets the `parameter` edge label for parameter `i`. */
841+
LabelParameter parameter(int i) { result.getIndex() = i }
842+
630843
/** Gets the `return` edge label. */
631844
LabelReturn return() { any() }
632845

python/ql/test/library-tests/ApiGraphs/def.expected

Whitespace-only changes.
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import python
2+
import semmle.python.dataflow.new.DataFlow
3+
import TestUtilities.InlineExpectationsTest
4+
import semmle.python.ApiGraphs
5+
6+
class ApiDefTest extends InlineExpectationsTest {
7+
ApiDefTest() { this = "ApiDefTest" }
8+
9+
override string getARelevantTag() { result = "def" }
10+
11+
private predicate relevant_node(API::Node a, DataFlow::Node n, Location l) {
12+
n = a.getARhs() and
13+
l = n.getLocation() and
14+
// Module variable nodes have no suitable location, so it's best to simply exclude them entirely
15+
// from the inline tests.
16+
not n instanceof DataFlow::ModuleVariableNode and
17+
exists(l.getFile().getRelativePath()) and
18+
n.getLocation().getFile().getBaseName().matches("def%.py")
19+
}
20+
21+
override predicate hasActualResult(Location location, string element, string tag, string value) {
22+
exists(API::Node a, DataFlow::Node n | relevant_node(a, n, location) |
23+
tag = "def" and
24+
// Only report the longest path on this line:
25+
value =
26+
max(API::Node a2, Location l2 |
27+
relevant_node(a2, _, l2) and
28+
l2.getFile() = location.getFile() and
29+
l2.getStartLine() = location.getStartLine()
30+
|
31+
a2.getPath()
32+
) and
33+
element = n.toString()
34+
)
35+
}
36+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
from mypkg import foo #$ use=moduleImport("mypkg").getMember("foo")
2+
3+
def callback(x): #$ use=moduleImport("mypkg").getMember("foo").getMember("bar").getParameter(0).getParameter(0)
4+
x.baz() #$ use=moduleImport("mypkg").getMember("foo").getMember("bar").getParameter(0).getParameter(0).getMember("baz").getReturn()
5+
6+
foo.bar(callback) #$ def=moduleImport("mypkg").getMember("foo").getMember("bar").getParameter(0) use=moduleImport("mypkg").getMember("foo").getMember("bar").getReturn()

0 commit comments

Comments
 (0)