@@ -312,6 +312,85 @@ class FrameworkModeMetadataExtractor extends string {
312312 }
313313}
314314
315+ /**
316+ * Holds if the given `endpoint` should be considered a candidate for the `extensibleType`.
317+ *
318+ * The other parameters record various other properties of interest.
319+ */
320+ predicate isCandidate (
321+ Endpoint endpoint , string package , string type , string subtypes , string name , string signature ,
322+ string input , string output , string parameterName , string extensibleType , string alreadyAiModeled
323+ ) {
324+ CharacteristicsImpl:: isCandidate ( endpoint , _) and
325+ not exists ( CharacteristicsImpl:: UninterestingToModelCharacteristic u |
326+ u .appliesToEndpoint ( endpoint )
327+ ) and
328+ any ( FrameworkModeMetadataExtractor meta )
329+ .hasMetadata ( endpoint , package , type , subtypes , name , signature , input , output , parameterName ,
330+ alreadyAiModeled , extensibleType ) and
331+ // If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
332+ // candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
333+ // already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
334+ // assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
335+ // types, and we don't need to reexamine it.
336+ alreadyAiModeled .matches ( [ "" , "%ai-%" ] ) and
337+ AutomodelJavaUtil:: includeAutomodelCandidate ( package , type , name , signature )
338+ }
339+
340+ /**
341+ * Holds if the given `endpoint` is a negative example for the `extensibleType`
342+ * because of the `characteristic`.
343+ *
344+ * The other parameters record various other properties of interest.
345+ */
346+ predicate isNegativeExample (
347+ Endpoint endpoint , EndpointCharacteristic characteristic , float confidence , string package ,
348+ string type , string subtypes , string name , string signature , string input , string output ,
349+ string parameterName , string extensibleType
350+ ) {
351+ characteristic .appliesToEndpoint ( endpoint ) and
352+ // the node is known not to be an endpoint of any appropriate type
353+ forall ( AutomodelEndpointTypes:: EndpointType tp |
354+ tp = CharacteristicsImpl:: getAPotentialType ( endpoint )
355+ |
356+ characteristic .hasImplications ( tp , false , _)
357+ ) and
358+ // the lowest confidence across all endpoint types should be at least highConfidence
359+ confidence =
360+ min ( float c |
361+ characteristic .hasImplications ( CharacteristicsImpl:: getAPotentialType ( endpoint ) , false , c )
362+ ) and
363+ confidence >= SharedCharacteristics:: highConfidence ( ) and
364+ any ( FrameworkModeMetadataExtractor meta )
365+ .hasMetadata ( endpoint , package , type , subtypes , name , signature , input , output , parameterName ,
366+ _, extensibleType ) and
367+ // It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
368+ // as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly exclude them here.
369+ not exists ( EndpointCharacteristic characteristic2 , float confidence2 |
370+ characteristic2 != characteristic
371+ |
372+ characteristic2 .appliesToEndpoint ( endpoint ) and
373+ confidence2 >= SharedCharacteristics:: maximalConfidence ( ) and
374+ characteristic2
375+ .hasImplications ( CharacteristicsImpl:: getAPotentialType ( endpoint ) , true , confidence2 )
376+ )
377+ }
378+
379+ /**
380+ * Holds if the given `endpoint` is a positive example for the `endpointType`.
381+ *
382+ * The other parameters record various other properties of interest.
383+ */
384+ predicate isPositiveExample (
385+ Endpoint endpoint , string endpointType , string package , string type , string subtypes , string name ,
386+ string signature , string input , string output , string parameterName , string extensibleType
387+ ) {
388+ any ( FrameworkModeMetadataExtractor meta )
389+ .hasMetadata ( endpoint , package , type , subtypes , name , signature , input , output , parameterName ,
390+ _, extensibleType ) and
391+ CharacteristicsImpl:: isKnownAs ( endpoint , endpointType , _)
392+ }
393+
315394/*
316395 * EndpointCharacteristic classes that are specific to Automodel for Java.
317396 */
0 commit comments