Skip to content

Commit 2000f22

Browse files
tausbnStephan Brandauer
authored andcommitted
Java: Port over characteristics from codex branch
1 parent 11ab7e2 commit 2000f22

1 file changed

Lines changed: 105 additions & 7 deletions

File tree

java/ql/src/Telemetry/AutomodelApplicationModeCharacteristics.qll

Lines changed: 105 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,8 @@ module ApplicationCandidatesImpl implements SharedCharacteristics::CandidateSig
5959
sinkSpec(e, package, type, name, signature, ext, input) and
6060
ExternalFlow::sinkModel(package, type, _, name, [signature, ""], ext, input, kind, _)
6161
)
62+
or
63+
isCustomSink(e, kind)
6264
}
6365

6466
predicate isNeutral(Endpoint e) {
@@ -109,6 +111,18 @@ module ApplicationCandidatesImpl implements SharedCharacteristics::CandidateSig
109111
}
110112
}
111113

114+
/**
115+
* Contains endpoints that are defined in QL code rather than as a MaD model. Ideally this predicate
116+
* should be empty.
117+
*/
118+
private predicate isCustomSink(Endpoint e, string kind) {
119+
e.asExpr() instanceof ArgumentToExec and kind = "command injection"
120+
or
121+
e instanceof RequestForgerySink and kind = "request forgery"
122+
or
123+
e instanceof QueryInjectionSink and kind = "sql"
124+
}
125+
112126
module CharacteristicsImpl =
113127
SharedCharacteristics::SharedCharacteristics<ApplicationCandidatesImpl>;
114128

@@ -220,6 +234,37 @@ private class ExceptionCharacteristic extends CharacteristicsImpl::NotASinkChara
220234
}
221235
}
222236

237+
/**
238+
* A negative characteristic that indicates that an endpoint is a MaD taint step. MaD modeled taint steps are global,
239+
* so they are not sinks for any query. Non-MaD taint steps might be specific to a particular query, so we don't
240+
* filter those out.
241+
*/
242+
private class IsMaDTaintStepCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic {
243+
IsMaDTaintStepCharacteristic() { this = "taint step" }
244+
245+
override predicate appliesToEndpoint(Endpoint e) {
246+
FlowSummaryImpl::Private::Steps::summaryThroughStepValue(e, _, _) or
247+
FlowSummaryImpl::Private::Steps::summaryThroughStepTaint(e, _, _) or
248+
FlowSummaryImpl::Private::Steps::summaryGetterStep(e, _, _, _) or
249+
FlowSummaryImpl::Private::Steps::summarySetterStep(e, _, _, _)
250+
}
251+
}
252+
253+
/**
254+
* A negative characteristic that filters out qualifiers that are classes (i.e. static calls). These
255+
*are unlikely to have any non-trivial flow going into them.
256+
*/
257+
private class ClassQualifierCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic {
258+
ClassQualifierCharacteristic() { this = "class qualifier" }
259+
260+
override predicate appliesToEndpoint(Endpoint e) {
261+
exists(Call c |
262+
e.asExpr() = c.getQualifier() and
263+
c.getQualifier() instanceof TypeAccess
264+
)
265+
}
266+
}
267+
223268
/**
224269
* A characteristic that limits candidates to parameters of methods that are recognized as `ModelApi`, iow., APIs that
225270
* are considered worth modeling.
@@ -255,20 +300,73 @@ private class NonPublicMethodCharacteristic extends CharacteristicsImpl::Uninter
255300
}
256301

257302
/**
258-
* A negative characteristic that filters out qualifiers that are classes (i.e. static calls). These
259-
*are unlikely to have any non-trivial flow going into them.
303+
* A negative characteristic that indicates that an endpoint is a non-sink argument to a method whose sinks have already
304+
* been modeled.
305+
*
306+
* WARNING: These endpoints should not be used as negative samples for training, because some sinks may have been missed
307+
* when the method was modeled. Specifically, as we start using ATM to merge in new declarations, we can be less sure
308+
* that a method with one argument modeled as a MaD sink has also had its remaining arguments manually reviewed. The
309+
* ML model might have predicted argument 0 of some method to be a sink but not argument 1, when in fact argument 1 is
310+
* also a sink.
260311
*/
261-
private class ClassQualifierCharacteristic extends CharacteristicsImpl::NotASinkCharacteristic {
262-
ClassQualifierCharacteristic() { this = "class qualifier" }
312+
private class OtherArgumentToModeledMethodCharacteristic extends CharacteristicsImpl::LikelyNotASinkCharacteristic
313+
{
314+
OtherArgumentToModeledMethodCharacteristic() {
315+
this = "other argument to a method that has already been modeled"
316+
}
263317

264318
override predicate appliesToEndpoint(Endpoint e) {
265-
exists(Call c |
266-
e.asExpr() = c.getQualifier() and
267-
c.getCallee().isStatic()
319+
not ApplicationCandidatesImpl::isSink(e, _) and
320+
exists(DataFlow::Node otherSink |
321+
ApplicationCandidatesImpl::isSink(otherSink, _) and
322+
e.asExpr() = otherSink.asExpr().(Argument).getCall().getAnArgument() and
323+
e != otherSink
268324
)
269325
}
270326
}
271327

328+
/**
329+
* A negative characteristic that indicates that an endpoint is not part of the source code for the project being
330+
* analyzed.
331+
*
332+
* WARNING: These endpoints should not be used as negative samples for training, because they are not necessarily
333+
* non-sinks. They are merely not interesting sinks to run through the ML model.
334+
*
335+
* TODO: Check that this actually does anything.
336+
*/
337+
private class IsExternalCharacteristic extends CharacteristicsImpl::LikelyNotASinkCharacteristic {
338+
IsExternalCharacteristic() { this = "external" }
339+
340+
override predicate appliesToEndpoint(Endpoint e) {
341+
not exists(e.getLocation().getFile().getRelativePath())
342+
}
343+
}
344+
345+
/**
346+
* A negative characteristic that indicates that an endpoint is not a `to` node for any known taint step. Such a node
347+
* cannot be tainted, because taint can't flow into it.
348+
*
349+
* WARNING: These endpoints should not be used as negative samples for training, because they may include sinks for
350+
* which our taint tracking modeling is incomplete.
351+
*/
352+
private class CannotBeTaintedCharacteristic extends CharacteristicsImpl::LikelyNotASinkCharacteristic
353+
{
354+
CannotBeTaintedCharacteristic() { this = "cannot be tainted" }
355+
356+
override predicate appliesToEndpoint(Endpoint e) { not this.isKnownOutNodeForStep(e) }
357+
358+
/**
359+
* Holds if the node `n` is known as the predecessor in a modeled flow step.
360+
*/
361+
private predicate isKnownOutNodeForStep(Endpoint e) {
362+
TaintTracking::localTaintStep(_, e) or
363+
FlowSummaryImpl::Private::Steps::summaryThroughStepValue(_, e, _) or
364+
FlowSummaryImpl::Private::Steps::summaryThroughStepTaint(_, e, _) or
365+
FlowSummaryImpl::Private::Steps::summaryGetterStep(_, _, e, _) or
366+
FlowSummaryImpl::Private::Steps::summarySetterStep(_, _, e, _)
367+
}
368+
}
369+
272370
/**
273371
* Holds if the given endpoint has a self-contradictory combination of characteristics. Detects errors in our endpoint
274372
* characteristics. Lists the problematic characteristics and their implications for all such endpoints, together with

0 commit comments

Comments
 (0)