Skip to content

Commit 9e7b73a

Browse files
authored
Merge pull request #11645 from MathiasVP/more-caching
2 parents 2ad61df + 9f9ffef commit 9e7b73a

1 file changed

Lines changed: 147 additions & 144 deletions

File tree

cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll

Lines changed: 147 additions & 144 deletions
Original file line numberDiff line numberDiff line change
@@ -13,44 +13,41 @@ private import semmle.code.cpp.models.interfaces.DataFlow
1313
private import DataFlowPrivate
1414
private import ModelUtil
1515
private import SsaInternals as Ssa
16+
private import DataFlowImplCommon as DataFlowImplCommon
1617

18+
/**
19+
* The IR dataflow graph consists of the following nodes:
20+
* - `Node0`, which injects most instructions and operands directly into the dataflow graph.
21+
* - `VariableNode`, which is used to model flow through global variables.
22+
* - `PostFieldUpdateNode`, which is used to model the state of a field after a value has been stored
23+
* into an address after a number of loads.
24+
* - `SsaPhiNode`, which represents phi nodes as computed by the shared SSA library.
25+
* - `IndirectArgumentOutNode`, which represents the value of an argument (and its indirections) after
26+
* it leaves a function call.
27+
* - `RawIndirectOperand`, which represents the value of `operand` after loading the address a number
28+
* of times.
29+
* - `RawIndirectInstruction`, which represents the value of `instr` after loading the address a number
30+
* of times.
31+
*/
1732
cached
18-
private module Cached {
19-
/**
20-
* The IR dataflow graph consists of the following nodes:
21-
* - `InstructionNode`, which injects most instructions directly into the dataflow graph.
22-
* - `OperandNode`, which similarly injects most operands directly into the dataflow graph.
23-
* - `VariableNode`, which is used to model flow through global variables.
24-
* - `PostFieldUpdateNode`, which is used to model the state of a field after a value has been stored
25-
* into an address after a number of loads.
26-
* - `SsaPhiNode`, which represents phi nodes as computed by the shared SSA library.
27-
* - `IndirectArgumentOutNode`, which represents the value of an argument (and its indirections) after
28-
* it leaves a function call.
29-
* - `RawIndirectOperand`, which represents the value of `operand` after loading the address a number
30-
* of times.
31-
* - `RawIndirectInstruction`, which represents the value of `instr` after loading the address a number
32-
* of times.
33-
*/
34-
cached
35-
newtype TIRDataFlowNode =
36-
TNode0(Node0Impl node) or
37-
TVariableNode(Variable var) or
38-
TPostFieldUpdateNode(FieldAddress operand, int indirectionIndex) {
39-
indirectionIndex =
40-
[1 .. Ssa::countIndirectionsForCppType(operand.getObjectAddress().getResultLanguageType())]
41-
} or
42-
TSsaPhiNode(Ssa::PhiNode phi) or
43-
TIndirectArgumentOutNode(ArgumentOperand operand, int indirectionIndex) {
44-
Ssa::isModifiableByCall(operand) and
45-
indirectionIndex = [1 .. Ssa::countIndirectionsForCppType(operand.getLanguageType())]
46-
} or
47-
TRawIndirectOperand(Operand op, int indirectionIndex) {
48-
Ssa::hasRawIndirectOperand(op, indirectionIndex)
49-
} or
50-
TRawIndirectInstruction(Instruction instr, int indirectionIndex) {
51-
Ssa::hasRawIndirectInstruction(instr, indirectionIndex)
52-
}
53-
}
33+
private newtype TIRDataFlowNode =
34+
TNode0(Node0Impl node) { DataFlowImplCommon::forceCachingInSameStage() } or
35+
TVariableNode(Variable var) or
36+
TPostFieldUpdateNode(FieldAddress operand, int indirectionIndex) {
37+
indirectionIndex =
38+
[1 .. Ssa::countIndirectionsForCppType(operand.getObjectAddress().getResultLanguageType())]
39+
} or
40+
TSsaPhiNode(Ssa::PhiNode phi) or
41+
TIndirectArgumentOutNode(ArgumentOperand operand, int indirectionIndex) {
42+
Ssa::isModifiableByCall(operand) and
43+
indirectionIndex = [1 .. Ssa::countIndirectionsForCppType(operand.getLanguageType())]
44+
} or
45+
TRawIndirectOperand(Operand op, int indirectionIndex) {
46+
Ssa::hasRawIndirectOperand(op, indirectionIndex)
47+
} or
48+
TRawIndirectInstruction(Instruction instr, int indirectionIndex) {
49+
Ssa::hasRawIndirectInstruction(instr, indirectionIndex)
50+
}
5451

5552
/**
5653
* An operand that is defined by a `FieldAddressInstruction`.
@@ -94,8 +91,6 @@ predicate conversionFlow(Operand opFrom, Instruction instrTo, boolean isPointerA
9491
instrTo.(PointerArithmeticInstruction).getLeftOperand() = opFrom
9592
}
9693

97-
private import Cached
98-
9994
/**
10095
* A node in a data flow graph.
10196
*
@@ -1180,36 +1175,6 @@ VariableNode variableNode(Variable v) { result.getVariable() = v }
11801175
*/
11811176
Node uninitializedNode(LocalVariable v) { none() }
11821177

1183-
/**
1184-
* Holds if data flows from `nodeFrom` to `nodeTo` in exactly one local
1185-
* (intra-procedural) step.
1186-
*/
1187-
predicate localFlowStep = simpleLocalFlowStep/2;
1188-
1189-
private predicate indirectionOperandFlow(RawIndirectOperand nodeFrom, Node nodeTo) {
1190-
// Reduce the indirection count by 1 if we're passing through a `LoadInstruction`.
1191-
exists(int ind, Instruction load, Operand address |
1192-
Ssa::isDereference(load, address) and
1193-
hasOperandAndIndex(nodeFrom, address, ind) and
1194-
nodeHasInstruction(nodeTo, load, ind - 1)
1195-
)
1196-
or
1197-
// If an operand flows to an instruction, then the indirection of
1198-
// the operand also flows to the indirction of the instruction.
1199-
exists(Operand operand, Instruction instr, int indirectionIndex |
1200-
simpleInstructionLocalFlowStep(operand, instr) and
1201-
hasOperandAndIndex(nodeFrom, operand, indirectionIndex) and
1202-
hasInstructionAndIndex(nodeTo, instr, indirectionIndex)
1203-
)
1204-
or
1205-
// If there's indirect flow to an operand, then there's also indirect
1206-
// flow to the operand after applying some pointer arithmetic.
1207-
exists(PointerArithmeticInstruction pointerArith, int indirectionIndex |
1208-
hasOperandAndIndex(nodeFrom, pointerArith.getAnOperand(), indirectionIndex) and
1209-
hasInstructionAndIndex(nodeTo, pointerArith, indirectionIndex)
1210-
)
1211-
}
1212-
12131178
pragma[noinline]
12141179
predicate hasOperandAndIndex(IndirectOperand indirectOperand, Operand operand, int indirectionIndex) {
12151180
indirectOperand.getOperand() = operand and
@@ -1224,92 +1189,130 @@ predicate hasInstructionAndIndex(
12241189
indirectInstr.getIndirectionIndex() = indirectionIndex
12251190
}
12261191

1227-
private predicate indirectionInstructionFlow(RawIndirectInstruction nodeFrom, IndirectOperand nodeTo) {
1228-
// If there's flow from an instruction to an operand, then there's also flow from the
1229-
// indirect instruction to the indirect operand.
1230-
exists(Operand operand, Instruction instr, int indirectionIndex |
1231-
simpleOperandLocalFlowStep(pragma[only_bind_into](instr), pragma[only_bind_into](operand))
1232-
|
1233-
hasOperandAndIndex(nodeTo, operand, indirectionIndex) and
1234-
hasInstructionAndIndex(nodeFrom, instr, indirectionIndex)
1235-
)
1236-
}
1192+
cached
1193+
private module Cached {
1194+
/**
1195+
* Holds if data flows from `nodeFrom` to `nodeTo` in exactly one local
1196+
* (intra-procedural) step.
1197+
*/
1198+
cached
1199+
predicate localFlowStep(Node nodeFrom, Node nodeTo) { simpleLocalFlowStep(nodeFrom, nodeTo) }
12371200

1238-
/**
1239-
* INTERNAL: do not use.
1240-
*
1241-
* This is the local flow predicate that's used as a building block in global
1242-
* data flow. It may have less flow than the `localFlowStep` predicate.
1243-
*/
1244-
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
1245-
// Post update node -> Node flow
1246-
Ssa::ssaFlow(nodeFrom.(PostUpdateNode).getPreUpdateNode(), nodeTo)
1247-
or
1248-
// Def-use/Use-use flow
1249-
Ssa::ssaFlow(nodeFrom, nodeTo)
1250-
or
1251-
// Operand -> Instruction flow
1252-
simpleInstructionLocalFlowStep(nodeFrom.asOperand(), nodeTo.asInstruction())
1253-
or
1254-
// Instruction -> Operand flow
1255-
simpleOperandLocalFlowStep(nodeFrom.asInstruction(), nodeTo.asOperand())
1256-
or
1257-
// Phi node -> Node flow
1258-
Ssa::fromPhiNode(nodeFrom, nodeTo)
1259-
or
1260-
// Indirect operand -> (indirect) instruction flow
1261-
indirectionOperandFlow(nodeFrom, nodeTo)
1262-
or
1263-
// Indirect instruction -> indirect operand flow
1264-
indirectionInstructionFlow(nodeFrom, nodeTo)
1265-
or
1266-
// Flow through modeled functions
1267-
modelFlow(nodeFrom, nodeTo)
1268-
or
1269-
// Reverse flow: data that flows from the definition node back into the indirection returned
1270-
// by a function. This allows data to flow 'in' through references returned by a modeled
1271-
// function such as `operator[]`.
1272-
exists(Operand address, int indirectionIndex |
1273-
nodeHasOperand(nodeTo.(IndirectReturnOutNode), address, indirectionIndex)
1274-
|
1275-
exists(StoreInstruction store |
1276-
nodeHasInstruction(nodeFrom, store, indirectionIndex - 1) and
1277-
store.getDestinationAddressOperand() = address
1201+
private predicate indirectionOperandFlow(RawIndirectOperand nodeFrom, Node nodeTo) {
1202+
// Reduce the indirection count by 1 if we're passing through a `LoadInstruction`.
1203+
exists(int ind, LoadInstruction load |
1204+
hasOperandAndIndex(nodeFrom, load.getSourceAddressOperand(), ind) and
1205+
nodeHasInstruction(nodeTo, load, ind - 1)
12781206
)
12791207
or
1280-
Ssa::outNodeHasAddressAndIndex(nodeFrom, address, indirectionIndex)
1281-
)
1282-
}
1208+
// If an operand flows to an instruction, then the indirection of
1209+
// the operand also flows to the indirction of the instruction.
1210+
exists(Operand operand, Instruction instr, int indirectionIndex |
1211+
simpleInstructionLocalFlowStep(operand, instr) and
1212+
hasOperandAndIndex(nodeFrom, operand, indirectionIndex) and
1213+
hasInstructionAndIndex(nodeTo, instr, indirectionIndex)
1214+
)
1215+
or
1216+
// If there's indirect flow to an operand, then there's also indirect
1217+
// flow to the operand after applying some pointer arithmetic.
1218+
exists(PointerArithmeticInstruction pointerArith, int indirectionIndex |
1219+
hasOperandAndIndex(nodeFrom, pointerArith.getAnOperand(), indirectionIndex) and
1220+
hasInstructionAndIndex(nodeTo, pointerArith, indirectionIndex)
1221+
)
1222+
}
12831223

1284-
private predicate simpleInstructionLocalFlowStep(Operand opFrom, Instruction iTo) {
1285-
// Treat all conversions as flow, even conversions between different numeric types.
1286-
conversionFlow(opFrom, iTo, false)
1287-
or
1288-
iTo.(CopyInstruction).getSourceValueOperand() = opFrom
1289-
}
1224+
private predicate indirectionInstructionFlow(
1225+
RawIndirectInstruction nodeFrom, IndirectOperand nodeTo
1226+
) {
1227+
// If there's flow from an instruction to an operand, then there's also flow from the
1228+
// indirect instruction to the indirect operand.
1229+
exists(Operand operand, Instruction instr, int indirectionIndex |
1230+
simpleOperandLocalFlowStep(pragma[only_bind_into](instr), pragma[only_bind_into](operand))
1231+
|
1232+
hasOperandAndIndex(nodeTo, operand, indirectionIndex) and
1233+
hasInstructionAndIndex(nodeFrom, instr, indirectionIndex)
1234+
)
1235+
}
12901236

1291-
private predicate simpleOperandLocalFlowStep(Instruction iFrom, Operand opTo) {
1292-
not opTo instanceof MemoryOperand and
1293-
opTo.getDef() = iFrom
1294-
}
1237+
/**
1238+
* INTERNAL: do not use.
1239+
*
1240+
* This is the local flow predicate that's used as a building block in global
1241+
* data flow. It may have less flow than the `localFlowStep` predicate.
1242+
*/
1243+
cached
1244+
predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) {
1245+
// Post update node -> Node flow
1246+
Ssa::ssaFlow(nodeFrom.(PostUpdateNode).getPreUpdateNode(), nodeTo)
1247+
or
1248+
// Def-use/Use-use flow
1249+
Ssa::ssaFlow(nodeFrom, nodeTo)
1250+
or
1251+
// Operand -> Instruction flow
1252+
simpleInstructionLocalFlowStep(nodeFrom.asOperand(), nodeTo.asInstruction())
1253+
or
1254+
// Instruction -> Operand flow
1255+
simpleOperandLocalFlowStep(nodeFrom.asInstruction(), nodeTo.asOperand())
1256+
or
1257+
// Phi node -> Node flow
1258+
Ssa::fromPhiNode(nodeFrom, nodeTo)
1259+
or
1260+
// Indirect operand -> (indirect) instruction flow
1261+
indirectionOperandFlow(nodeFrom, nodeTo)
1262+
or
1263+
// Indirect instruction -> indirect operand flow
1264+
indirectionInstructionFlow(nodeFrom, nodeTo)
1265+
or
1266+
// Flow through modeled functions
1267+
modelFlow(nodeFrom, nodeTo)
1268+
or
1269+
// Reverse flow: data that flows from the definition node back into the indirection returned
1270+
// by a function. This allows data to flow 'in' through references returned by a modeled
1271+
// function such as `operator[]`.
1272+
exists(Operand address, int indirectionIndex |
1273+
nodeHasOperand(nodeTo.(IndirectReturnOutNode), address, indirectionIndex)
1274+
|
1275+
exists(StoreInstruction store |
1276+
nodeHasInstruction(nodeFrom, store, indirectionIndex - 1) and
1277+
store.getDestinationAddressOperand() = address
1278+
)
1279+
or
1280+
Ssa::outNodeHasAddressAndIndex(nodeFrom, address, indirectionIndex)
1281+
)
1282+
}
12951283

1296-
private predicate modelFlow(Node nodeFrom, Node nodeTo) {
1297-
exists(
1298-
CallInstruction call, DataFlowFunction func, FunctionInput modelIn, FunctionOutput modelOut
1299-
|
1300-
call.getStaticCallTarget() = func and
1301-
func.hasDataFlow(modelIn, modelOut)
1302-
|
1303-
nodeFrom = callInput(call, modelIn) and
1304-
nodeTo = callOutput(call, modelOut)
1284+
private predicate simpleInstructionLocalFlowStep(Operand opFrom, Instruction iTo) {
1285+
// Treat all conversions as flow, even conversions between different numeric types.
1286+
conversionFlow(opFrom, iTo, false)
13051287
or
1306-
exists(int d |
1307-
nodeFrom = callInput(call, modelIn, d) and
1308-
nodeTo = callOutput(call, modelOut, d)
1288+
iTo.(CopyInstruction).getSourceValueOperand() = opFrom
1289+
}
1290+
1291+
private predicate simpleOperandLocalFlowStep(Instruction iFrom, Operand opTo) {
1292+
not opTo instanceof MemoryOperand and
1293+
opTo.getDef() = iFrom
1294+
}
1295+
1296+
private predicate modelFlow(Node nodeFrom, Node nodeTo) {
1297+
exists(
1298+
CallInstruction call, DataFlowFunction func, FunctionInput modelIn, FunctionOutput modelOut
1299+
|
1300+
call.getStaticCallTarget() = func and
1301+
func.hasDataFlow(modelIn, modelOut)
1302+
|
1303+
nodeFrom = callInput(call, modelIn) and
1304+
nodeTo = callOutput(call, modelOut)
1305+
or
1306+
exists(int d |
1307+
nodeFrom = callInput(call, modelIn, d) and
1308+
nodeTo = callOutput(call, modelOut, d)
1309+
)
13091310
)
1310-
)
1311+
}
13111312
}
13121313

1314+
import Cached
1315+
13131316
/**
13141317
* Holds if data flows from `source` to `sink` in zero or more local
13151318
* (intra-procedural) steps.

0 commit comments

Comments
 (0)