@@ -59,6 +59,8 @@ module ApplicationCandidatesImpl implements SharedCharacteristics::CandidateSig
5959 sinkSpec ( e , package , type , name , signature , ext , input ) and
6060 ExternalFlow:: sinkModel ( package , type , _, name , [ signature , "" ] , ext , input , kind , _)
6161 )
62+ or
63+ isCustomSink ( e , kind )
6264 }
6365
6466 predicate isNeutral ( Endpoint e ) {
@@ -109,6 +111,18 @@ module ApplicationCandidatesImpl implements SharedCharacteristics::CandidateSig
109111 }
110112}
111113
114+ /**
115+ * Contains endpoints that are defined in QL code rather than as a MaD model. Ideally this predicate
116+ * should be empty.
117+ */
118+ private predicate isCustomSink ( Endpoint e , string kind ) {
119+ e .asExpr ( ) instanceof ArgumentToExec and kind = "command injection"
120+ or
121+ e instanceof RequestForgerySink and kind = "request forgery"
122+ or
123+ e instanceof QueryInjectionSink and kind = "sql"
124+ }
125+
112126module CharacteristicsImpl =
113127 SharedCharacteristics:: SharedCharacteristics< ApplicationCandidatesImpl > ;
114128
@@ -220,6 +234,37 @@ private class ExceptionCharacteristic extends CharacteristicsImpl::NotASinkChara
220234 }
221235}
222236
237+ /**
238+ * A negative characteristic that indicates that an endpoint is a MaD taint step. MaD modeled taint steps are global,
239+ * so they are not sinks for any query. Non-MaD taint steps might be specific to a particular query, so we don't
240+ * filter those out.
241+ */
242+ private class IsMaDTaintStepCharacteristic extends CharacteristicsImpl:: NotASinkCharacteristic {
243+ IsMaDTaintStepCharacteristic ( ) { this = "taint step" }
244+
245+ override predicate appliesToEndpoint ( Endpoint e ) {
246+ FlowSummaryImpl:: Private:: Steps:: summaryThroughStepValue ( e , _, _) or
247+ FlowSummaryImpl:: Private:: Steps:: summaryThroughStepTaint ( e , _, _) or
248+ FlowSummaryImpl:: Private:: Steps:: summaryGetterStep ( e , _, _, _) or
249+ FlowSummaryImpl:: Private:: Steps:: summarySetterStep ( e , _, _, _)
250+ }
251+ }
252+
253+ /**
254+ * A negative characteristic that filters out qualifiers that are classes (i.e. static calls). These
255+ *are unlikely to have any non-trivial flow going into them.
256+ */
257+ private class ClassQualifierCharacteristic extends CharacteristicsImpl:: NotASinkCharacteristic {
258+ ClassQualifierCharacteristic ( ) { this = "class qualifier" }
259+
260+ override predicate appliesToEndpoint ( Endpoint e ) {
261+ exists ( Call c |
262+ e .asExpr ( ) = c .getQualifier ( ) and
263+ c .getQualifier ( ) instanceof TypeAccess
264+ )
265+ }
266+ }
267+
223268/**
224269 * A characteristic that limits candidates to parameters of methods that are recognized as `ModelApi`, iow., APIs that
225270 * are considered worth modeling.
@@ -255,20 +300,73 @@ private class NonPublicMethodCharacteristic extends CharacteristicsImpl::Uninter
255300}
256301
257302/**
258- * A negative characteristic that filters out qualifiers that are classes (i.e. static calls). These
259- *are unlikely to have any non-trivial flow going into them.
303+ * A negative characteristic that indicates that an endpoint is a non-sink argument to a method whose sinks have already
304+ * been modeled.
305+ *
306+ * WARNING: These endpoints should not be used as negative samples for training, because some sinks may have been missed
307+ * when the method was modeled. Specifically, as we start using ATM to merge in new declarations, we can be less sure
308+ * that a method with one argument modeled as a MaD sink has also had its remaining arguments manually reviewed. The
309+ * ML model might have predicted argument 0 of some method to be a sink but not argument 1, when in fact argument 1 is
310+ * also a sink.
260311 */
261- private class ClassQualifierCharacteristic extends CharacteristicsImpl:: NotASinkCharacteristic {
262- ClassQualifierCharacteristic ( ) { this = "class qualifier" }
312+ private class OtherArgumentToModeledMethodCharacteristic extends CharacteristicsImpl:: LikelyNotASinkCharacteristic
313+ {
314+ OtherArgumentToModeledMethodCharacteristic ( ) {
315+ this = "other argument to a method that has already been modeled"
316+ }
263317
264318 override predicate appliesToEndpoint ( Endpoint e ) {
265- exists ( Call c |
266- e .asExpr ( ) = c .getQualifier ( ) and
267- c .getCallee ( ) .isStatic ( )
319+ not ApplicationCandidatesImpl:: isSink ( e , _) and
320+ exists ( DataFlow:: Node otherSink |
321+ ApplicationCandidatesImpl:: isSink ( otherSink , _) and
322+ e .asExpr ( ) = otherSink .asExpr ( ) .( Argument ) .getCall ( ) .getAnArgument ( ) and
323+ e != otherSink
268324 )
269325 }
270326}
271327
328+ /**
329+ * A negative characteristic that indicates that an endpoint is not part of the source code for the project being
330+ * analyzed.
331+ *
332+ * WARNING: These endpoints should not be used as negative samples for training, because they are not necessarily
333+ * non-sinks. They are merely not interesting sinks to run through the ML model.
334+ *
335+ * TODO: Check that this actually does anything.
336+ */
337+ private class IsExternalCharacteristic extends CharacteristicsImpl:: LikelyNotASinkCharacteristic {
338+ IsExternalCharacteristic ( ) { this = "external" }
339+
340+ override predicate appliesToEndpoint ( Endpoint e ) {
341+ not exists ( e .getLocation ( ) .getFile ( ) .getRelativePath ( ) )
342+ }
343+ }
344+
345+ /**
346+ * A negative characteristic that indicates that an endpoint is not a `to` node for any known taint step. Such a node
347+ * cannot be tainted, because taint can't flow into it.
348+ *
349+ * WARNING: These endpoints should not be used as negative samples for training, because they may include sinks for
350+ * which our taint tracking modeling is incomplete.
351+ */
352+ private class CannotBeTaintedCharacteristic extends CharacteristicsImpl:: LikelyNotASinkCharacteristic
353+ {
354+ CannotBeTaintedCharacteristic ( ) { this = "cannot be tainted" }
355+
356+ override predicate appliesToEndpoint ( Endpoint e ) { not this .isKnownOutNodeForStep ( e ) }
357+
358+ /**
359+ * Holds if the node `n` is known as the predecessor in a modeled flow step.
360+ */
361+ private predicate isKnownOutNodeForStep ( Endpoint e ) {
362+ TaintTracking:: localTaintStep ( _, e ) or
363+ FlowSummaryImpl:: Private:: Steps:: summaryThroughStepValue ( _, e , _) or
364+ FlowSummaryImpl:: Private:: Steps:: summaryThroughStepTaint ( _, e , _) or
365+ FlowSummaryImpl:: Private:: Steps:: summaryGetterStep ( _, _, e , _) or
366+ FlowSummaryImpl:: Private:: Steps:: summarySetterStep ( _, _, e , _)
367+ }
368+ }
369+
272370/**
273371 * Holds if the given endpoint has a self-contradictory combination of characteristics. Detects errors in our endpoint
274372 * characteristics. Lists the problematic characteristics and their implications for all such endpoints, together with
0 commit comments