@@ -13,44 +13,41 @@ private import semmle.code.cpp.models.interfaces.DataFlow
1313private import DataFlowPrivate
1414private import ModelUtil
1515private import SsaInternals as Ssa
16+ private import DataFlowImplCommon as DataFlowImplCommon
1617
18+ /**
19+ * The IR dataflow graph consists of the following nodes:
20+ * - `Node0`, which injects most instructions and operands directly into the dataflow graph.
21+ * - `VariableNode`, which is used to model flow through global variables.
22+ * - `PostFieldUpdateNode`, which is used to model the state of a field after a value has been stored
23+ * into an address after a number of loads.
24+ * - `SsaPhiNode`, which represents phi nodes as computed by the shared SSA library.
25+ * - `IndirectArgumentOutNode`, which represents the value of an argument (and its indirections) after
26+ * it leaves a function call.
27+ * - `RawIndirectOperand`, which represents the value of `operand` after loading the address a number
28+ * of times.
29+ * - `RawIndirectInstruction`, which represents the value of `instr` after loading the address a number
30+ * of times.
31+ */
1732cached
18- private module Cached {
19- /**
20- * The IR dataflow graph consists of the following nodes:
21- * - `InstructionNode`, which injects most instructions directly into the dataflow graph.
22- * - `OperandNode`, which similarly injects most operands directly into the dataflow graph.
23- * - `VariableNode`, which is used to model flow through global variables.
24- * - `PostFieldUpdateNode`, which is used to model the state of a field after a value has been stored
25- * into an address after a number of loads.
26- * - `SsaPhiNode`, which represents phi nodes as computed by the shared SSA library.
27- * - `IndirectArgumentOutNode`, which represents the value of an argument (and its indirections) after
28- * it leaves a function call.
29- * - `RawIndirectOperand`, which represents the value of `operand` after loading the address a number
30- * of times.
31- * - `RawIndirectInstruction`, which represents the value of `instr` after loading the address a number
32- * of times.
33- */
34- cached
35- newtype TIRDataFlowNode =
36- TNode0 ( Node0Impl node ) or
37- TVariableNode ( Variable var ) or
38- TPostFieldUpdateNode ( FieldAddress operand , int indirectionIndex ) {
39- indirectionIndex =
40- [ 1 .. Ssa:: countIndirectionsForCppType ( operand .getObjectAddress ( ) .getResultLanguageType ( ) ) ]
41- } or
42- TSsaPhiNode ( Ssa:: PhiNode phi ) or
43- TIndirectArgumentOutNode ( ArgumentOperand operand , int indirectionIndex ) {
44- Ssa:: isModifiableByCall ( operand ) and
45- indirectionIndex = [ 1 .. Ssa:: countIndirectionsForCppType ( operand .getLanguageType ( ) ) ]
46- } or
47- TRawIndirectOperand ( Operand op , int indirectionIndex ) {
48- Ssa:: hasRawIndirectOperand ( op , indirectionIndex )
49- } or
50- TRawIndirectInstruction ( Instruction instr , int indirectionIndex ) {
51- Ssa:: hasRawIndirectInstruction ( instr , indirectionIndex )
52- }
53- }
33+ private newtype TIRDataFlowNode =
34+ TNode0 ( Node0Impl node ) { DataFlowImplCommon:: forceCachingInSameStage ( ) } or
35+ TVariableNode ( Variable var ) or
36+ TPostFieldUpdateNode ( FieldAddress operand , int indirectionIndex ) {
37+ indirectionIndex =
38+ [ 1 .. Ssa:: countIndirectionsForCppType ( operand .getObjectAddress ( ) .getResultLanguageType ( ) ) ]
39+ } or
40+ TSsaPhiNode ( Ssa:: PhiNode phi ) or
41+ TIndirectArgumentOutNode ( ArgumentOperand operand , int indirectionIndex ) {
42+ Ssa:: isModifiableByCall ( operand ) and
43+ indirectionIndex = [ 1 .. Ssa:: countIndirectionsForCppType ( operand .getLanguageType ( ) ) ]
44+ } or
45+ TRawIndirectOperand ( Operand op , int indirectionIndex ) {
46+ Ssa:: hasRawIndirectOperand ( op , indirectionIndex )
47+ } or
48+ TRawIndirectInstruction ( Instruction instr , int indirectionIndex ) {
49+ Ssa:: hasRawIndirectInstruction ( instr , indirectionIndex )
50+ }
5451
5552/**
5653 * An operand that is defined by a `FieldAddressInstruction`.
@@ -94,8 +91,6 @@ predicate conversionFlow(Operand opFrom, Instruction instrTo, boolean isPointerA
9491 instrTo .( PointerArithmeticInstruction ) .getLeftOperand ( ) = opFrom
9592}
9693
97- private import Cached
98-
9994/**
10095 * A node in a data flow graph.
10196 *
@@ -1180,36 +1175,6 @@ VariableNode variableNode(Variable v) { result.getVariable() = v }
11801175 */
11811176Node uninitializedNode ( LocalVariable v ) { none ( ) }
11821177
1183- /**
1184- * Holds if data flows from `nodeFrom` to `nodeTo` in exactly one local
1185- * (intra-procedural) step.
1186- */
1187- predicate localFlowStep = simpleLocalFlowStep / 2 ;
1188-
1189- private predicate indirectionOperandFlow ( RawIndirectOperand nodeFrom , Node nodeTo ) {
1190- // Reduce the indirection count by 1 if we're passing through a `LoadInstruction`.
1191- exists ( int ind , Instruction load , Operand address |
1192- Ssa:: isDereference ( load , address ) and
1193- hasOperandAndIndex ( nodeFrom , address , ind ) and
1194- nodeHasInstruction ( nodeTo , load , ind - 1 )
1195- )
1196- or
1197- // If an operand flows to an instruction, then the indirection of
1198- // the operand also flows to the indirction of the instruction.
1199- exists ( Operand operand , Instruction instr , int indirectionIndex |
1200- simpleInstructionLocalFlowStep ( operand , instr ) and
1201- hasOperandAndIndex ( nodeFrom , operand , indirectionIndex ) and
1202- hasInstructionAndIndex ( nodeTo , instr , indirectionIndex )
1203- )
1204- or
1205- // If there's indirect flow to an operand, then there's also indirect
1206- // flow to the operand after applying some pointer arithmetic.
1207- exists ( PointerArithmeticInstruction pointerArith , int indirectionIndex |
1208- hasOperandAndIndex ( nodeFrom , pointerArith .getAnOperand ( ) , indirectionIndex ) and
1209- hasInstructionAndIndex ( nodeTo , pointerArith , indirectionIndex )
1210- )
1211- }
1212-
12131178pragma [ noinline]
12141179predicate hasOperandAndIndex ( IndirectOperand indirectOperand , Operand operand , int indirectionIndex ) {
12151180 indirectOperand .getOperand ( ) = operand and
@@ -1224,92 +1189,130 @@ predicate hasInstructionAndIndex(
12241189 indirectInstr .getIndirectionIndex ( ) = indirectionIndex
12251190}
12261191
1227- private predicate indirectionInstructionFlow ( RawIndirectInstruction nodeFrom , IndirectOperand nodeTo ) {
1228- // If there's flow from an instruction to an operand, then there's also flow from the
1229- // indirect instruction to the indirect operand.
1230- exists ( Operand operand , Instruction instr , int indirectionIndex |
1231- simpleOperandLocalFlowStep ( pragma [ only_bind_into ] ( instr ) , pragma [ only_bind_into ] ( operand ) )
1232- |
1233- hasOperandAndIndex ( nodeTo , operand , indirectionIndex ) and
1234- hasInstructionAndIndex ( nodeFrom , instr , indirectionIndex )
1235- )
1236- }
1192+ cached
1193+ private module Cached {
1194+ /**
1195+ * Holds if data flows from `nodeFrom` to `nodeTo` in exactly one local
1196+ * (intra-procedural) step.
1197+ */
1198+ cached
1199+ predicate localFlowStep ( Node nodeFrom , Node nodeTo ) { simpleLocalFlowStep ( nodeFrom , nodeTo ) }
12371200
1238- /**
1239- * INTERNAL: do not use.
1240- *
1241- * This is the local flow predicate that's used as a building block in global
1242- * data flow. It may have less flow than the `localFlowStep` predicate.
1243- */
1244- predicate simpleLocalFlowStep ( Node nodeFrom , Node nodeTo ) {
1245- // Post update node -> Node flow
1246- Ssa:: ssaFlow ( nodeFrom .( PostUpdateNode ) .getPreUpdateNode ( ) , nodeTo )
1247- or
1248- // Def-use/Use-use flow
1249- Ssa:: ssaFlow ( nodeFrom , nodeTo )
1250- or
1251- // Operand -> Instruction flow
1252- simpleInstructionLocalFlowStep ( nodeFrom .asOperand ( ) , nodeTo .asInstruction ( ) )
1253- or
1254- // Instruction -> Operand flow
1255- simpleOperandLocalFlowStep ( nodeFrom .asInstruction ( ) , nodeTo .asOperand ( ) )
1256- or
1257- // Phi node -> Node flow
1258- Ssa:: fromPhiNode ( nodeFrom , nodeTo )
1259- or
1260- // Indirect operand -> (indirect) instruction flow
1261- indirectionOperandFlow ( nodeFrom , nodeTo )
1262- or
1263- // Indirect instruction -> indirect operand flow
1264- indirectionInstructionFlow ( nodeFrom , nodeTo )
1265- or
1266- // Flow through modeled functions
1267- modelFlow ( nodeFrom , nodeTo )
1268- or
1269- // Reverse flow: data that flows from the definition node back into the indirection returned
1270- // by a function. This allows data to flow 'in' through references returned by a modeled
1271- // function such as `operator[]`.
1272- exists ( Operand address , int indirectionIndex |
1273- nodeHasOperand ( nodeTo .( IndirectReturnOutNode ) , address , indirectionIndex )
1274- |
1275- exists ( StoreInstruction store |
1276- nodeHasInstruction ( nodeFrom , store , indirectionIndex - 1 ) and
1277- store .getDestinationAddressOperand ( ) = address
1201+ private predicate indirectionOperandFlow ( RawIndirectOperand nodeFrom , Node nodeTo ) {
1202+ // Reduce the indirection count by 1 if we're passing through a `LoadInstruction`.
1203+ exists ( int ind , LoadInstruction load |
1204+ hasOperandAndIndex ( nodeFrom , load .getSourceAddressOperand ( ) , ind ) and
1205+ nodeHasInstruction ( nodeTo , load , ind - 1 )
12781206 )
12791207 or
1280- Ssa:: outNodeHasAddressAndIndex ( nodeFrom , address , indirectionIndex )
1281- )
1282- }
1208+ // If an operand flows to an instruction, then the indirection of
1209+ // the operand also flows to the indirction of the instruction.
1210+ exists ( Operand operand , Instruction instr , int indirectionIndex |
1211+ simpleInstructionLocalFlowStep ( operand , instr ) and
1212+ hasOperandAndIndex ( nodeFrom , operand , indirectionIndex ) and
1213+ hasInstructionAndIndex ( nodeTo , instr , indirectionIndex )
1214+ )
1215+ or
1216+ // If there's indirect flow to an operand, then there's also indirect
1217+ // flow to the operand after applying some pointer arithmetic.
1218+ exists ( PointerArithmeticInstruction pointerArith , int indirectionIndex |
1219+ hasOperandAndIndex ( nodeFrom , pointerArith .getAnOperand ( ) , indirectionIndex ) and
1220+ hasInstructionAndIndex ( nodeTo , pointerArith , indirectionIndex )
1221+ )
1222+ }
12831223
1284- private predicate simpleInstructionLocalFlowStep ( Operand opFrom , Instruction iTo ) {
1285- // Treat all conversions as flow, even conversions between different numeric types.
1286- conversionFlow ( opFrom , iTo , false )
1287- or
1288- iTo .( CopyInstruction ) .getSourceValueOperand ( ) = opFrom
1289- }
1224+ private predicate indirectionInstructionFlow (
1225+ RawIndirectInstruction nodeFrom , IndirectOperand nodeTo
1226+ ) {
1227+ // If there's flow from an instruction to an operand, then there's also flow from the
1228+ // indirect instruction to the indirect operand.
1229+ exists ( Operand operand , Instruction instr , int indirectionIndex |
1230+ simpleOperandLocalFlowStep ( pragma [ only_bind_into ] ( instr ) , pragma [ only_bind_into ] ( operand ) )
1231+ |
1232+ hasOperandAndIndex ( nodeTo , operand , indirectionIndex ) and
1233+ hasInstructionAndIndex ( nodeFrom , instr , indirectionIndex )
1234+ )
1235+ }
12901236
1291- private predicate simpleOperandLocalFlowStep ( Instruction iFrom , Operand opTo ) {
1292- not opTo instanceof MemoryOperand and
1293- opTo .getDef ( ) = iFrom
1294- }
1237+ /**
1238+ * INTERNAL: do not use.
1239+ *
1240+ * This is the local flow predicate that's used as a building block in global
1241+ * data flow. It may have less flow than the `localFlowStep` predicate.
1242+ */
1243+ cached
1244+ predicate simpleLocalFlowStep ( Node nodeFrom , Node nodeTo ) {
1245+ // Post update node -> Node flow
1246+ Ssa:: ssaFlow ( nodeFrom .( PostUpdateNode ) .getPreUpdateNode ( ) , nodeTo )
1247+ or
1248+ // Def-use/Use-use flow
1249+ Ssa:: ssaFlow ( nodeFrom , nodeTo )
1250+ or
1251+ // Operand -> Instruction flow
1252+ simpleInstructionLocalFlowStep ( nodeFrom .asOperand ( ) , nodeTo .asInstruction ( ) )
1253+ or
1254+ // Instruction -> Operand flow
1255+ simpleOperandLocalFlowStep ( nodeFrom .asInstruction ( ) , nodeTo .asOperand ( ) )
1256+ or
1257+ // Phi node -> Node flow
1258+ Ssa:: fromPhiNode ( nodeFrom , nodeTo )
1259+ or
1260+ // Indirect operand -> (indirect) instruction flow
1261+ indirectionOperandFlow ( nodeFrom , nodeTo )
1262+ or
1263+ // Indirect instruction -> indirect operand flow
1264+ indirectionInstructionFlow ( nodeFrom , nodeTo )
1265+ or
1266+ // Flow through modeled functions
1267+ modelFlow ( nodeFrom , nodeTo )
1268+ or
1269+ // Reverse flow: data that flows from the definition node back into the indirection returned
1270+ // by a function. This allows data to flow 'in' through references returned by a modeled
1271+ // function such as `operator[]`.
1272+ exists ( Operand address , int indirectionIndex |
1273+ nodeHasOperand ( nodeTo .( IndirectReturnOutNode ) , address , indirectionIndex )
1274+ |
1275+ exists ( StoreInstruction store |
1276+ nodeHasInstruction ( nodeFrom , store , indirectionIndex - 1 ) and
1277+ store .getDestinationAddressOperand ( ) = address
1278+ )
1279+ or
1280+ Ssa:: outNodeHasAddressAndIndex ( nodeFrom , address , indirectionIndex )
1281+ )
1282+ }
12951283
1296- private predicate modelFlow ( Node nodeFrom , Node nodeTo ) {
1297- exists (
1298- CallInstruction call , DataFlowFunction func , FunctionInput modelIn , FunctionOutput modelOut
1299- |
1300- call .getStaticCallTarget ( ) = func and
1301- func .hasDataFlow ( modelIn , modelOut )
1302- |
1303- nodeFrom = callInput ( call , modelIn ) and
1304- nodeTo = callOutput ( call , modelOut )
1284+ private predicate simpleInstructionLocalFlowStep ( Operand opFrom , Instruction iTo ) {
1285+ // Treat all conversions as flow, even conversions between different numeric types.
1286+ conversionFlow ( opFrom , iTo , false )
13051287 or
1306- exists ( int d |
1307- nodeFrom = callInput ( call , modelIn , d ) and
1308- nodeTo = callOutput ( call , modelOut , d )
1288+ iTo .( CopyInstruction ) .getSourceValueOperand ( ) = opFrom
1289+ }
1290+
1291+ private predicate simpleOperandLocalFlowStep ( Instruction iFrom , Operand opTo ) {
1292+ not opTo instanceof MemoryOperand and
1293+ opTo .getDef ( ) = iFrom
1294+ }
1295+
1296+ private predicate modelFlow ( Node nodeFrom , Node nodeTo ) {
1297+ exists (
1298+ CallInstruction call , DataFlowFunction func , FunctionInput modelIn , FunctionOutput modelOut
1299+ |
1300+ call .getStaticCallTarget ( ) = func and
1301+ func .hasDataFlow ( modelIn , modelOut )
1302+ |
1303+ nodeFrom = callInput ( call , modelIn ) and
1304+ nodeTo = callOutput ( call , modelOut )
1305+ or
1306+ exists ( int d |
1307+ nodeFrom = callInput ( call , modelIn , d ) and
1308+ nodeTo = callOutput ( call , modelOut , d )
1309+ )
13091310 )
1310- )
1311+ }
13111312}
13121313
1314+ import Cached
1315+
13131316/**
13141317 * Holds if data flows from `source` to `sink` in zero or more local
13151318 * (intra-procedural) steps.
0 commit comments