@@ -4,6 +4,8 @@ private import semmle.code.cpp.ir.dataflow.DataFlow
44private import semmle.code.cpp.ir.dataflow.DataFlow2
55private import semmle.code.cpp.ir.IR
66private import semmle.code.cpp.ir.dataflow.internal.DataFlowDispatch as Dispatch
7+ private import semmle.code.cpp.models.interfaces.Taint
8+ private import semmle.code.cpp.models.interfaces.DataFlow
79
810/**
911 * A predictable instruction is one where an external user can predict
@@ -156,18 +158,79 @@ private predicate instructionTaintStep(Instruction i1, Instruction i2) {
156158 // This is part of the translation of `a[i]`, where we want taint to flow
157159 // from `a`.
158160 i2 .( PointerAddInstruction ) .getLeft ( ) = i1
159- // TODO: robust Chi handling
160- //
161- // TODO: Flow from argument to return of known functions: Port missing parts
162- // of `returnArgument` to the `interfaces.Taint` and `interfaces.DataFlow`
163- // libraries.
164- //
165- // TODO: Flow from input argument to output argument of known functions: Port
166- // missing parts of `copyValueBetweenArguments` to the `interfaces.Taint` and
167- // `interfaces.DataFlow` libraries and implement call side-effect nodes. This
168- // will help with the test for `ExecTainted.ql`. The test for
169- // `TaintedPath.ql` is more tricky because the output arg is a pointer
170- // addition expression.
161+ or
162+ // Flow from argument to return value
163+ i2 = any ( CallInstruction call |
164+ exists ( int indexIn |
165+ modelTaintToReturnValue ( call .getStaticCallTarget ( ) , indexIn ) and
166+ i1 = getACallArgumentOrIndirection ( call , indexIn )
167+ )
168+ )
169+ or
170+ // Flow from input argument to output argument
171+ // TODO: This won't work in practice as long as all aliased memory is tracked
172+ // together in a single virtual variable.
173+ // TODO: Will this work on the test for `TaintedPath.ql`, where the output arg
174+ // is a pointer addition expression?
175+ i2 = any ( WriteSideEffectInstruction outNode |
176+ exists ( CallInstruction call , int indexIn , int indexOut |
177+ modelTaintToParameter ( call .getStaticCallTarget ( ) , indexIn , indexOut ) and
178+ i1 = getACallArgumentOrIndirection ( call , indexIn ) and
179+ outNode .getIndex ( ) = indexOut and
180+ outNode .getPrimaryInstruction ( ) = call
181+ )
182+ )
183+ }
184+
185+ /**
186+ * Get an instruction that goes into argument `argumentIndex` of `call`. This
187+ * can be either directly or through one pointer indirection.
188+ */
189+ private Instruction getACallArgumentOrIndirection ( CallInstruction call , int argumentIndex ) {
190+ result = call .getPositionalArgument ( argumentIndex )
191+ or
192+ exists ( ReadSideEffectInstruction readSE |
193+ // TODO: why are read side effect operands imprecise?
194+ result = readSE .getSideEffectOperand ( ) .getAnyDef ( ) and
195+ readSE .getPrimaryInstruction ( ) = call and
196+ readSE .getIndex ( ) = argumentIndex
197+ )
198+ }
199+
200+ private predicate modelTaintToParameter ( Function f , int parameterIn , int parameterOut ) {
201+ exists ( FunctionInput modelIn , FunctionOutput modelOut |
202+ f .( TaintFunction ) .hasTaintFlow ( modelIn , modelOut ) and
203+ ( modelIn .isParameter ( parameterIn ) or modelIn .isParameterDeref ( parameterIn ) ) and
204+ modelOut .isParameterDeref ( parameterOut )
205+ )
206+ }
207+
208+ private predicate modelTaintToReturnValue ( Function f , int parameterIn ) {
209+ // Taint flow from parameter to return value
210+ exists ( FunctionInput modelIn , FunctionOutput modelOut |
211+ f .( TaintFunction ) .hasTaintFlow ( modelIn , modelOut ) and
212+ ( modelIn .isParameter ( parameterIn ) or modelIn .isParameterDeref ( parameterIn ) ) and
213+ ( modelOut .isReturnValue ( ) or modelOut .isReturnValueDeref ( ) )
214+ )
215+ or
216+ // Data flow (not taint flow) to where the return value points. For the time
217+ // being we will conflate pointers and objects in taint tracking.
218+ exists ( FunctionInput modelIn , FunctionOutput modelOut |
219+ f .( DataFlowFunction ) .hasDataFlow ( modelIn , modelOut ) and
220+ ( modelIn .isParameter ( parameterIn ) or modelIn .isParameterDeref ( parameterIn ) ) and
221+ modelOut .isReturnValueDeref ( )
222+ )
223+ or
224+ // Taint flow from one argument to another and data flow from an argument to a
225+ // return value. This happens in functions like `strcat` and `memcpy`. We
226+ // could model this flow in two separate steps, but that would add reverse
227+ // flow from the write side-effect to the call instruction, which may not be
228+ // desirable.
229+ exists ( int parameterMid , InParameter modelMid , OutReturnValue returnOut |
230+ modelTaintToParameter ( f , parameterIn , parameterMid ) and
231+ modelMid .isParameter ( parameterMid ) and
232+ f .( DataFlowFunction ) .hasDataFlow ( modelMid , returnOut )
233+ )
171234}
172235
173236private Element adjustedSink ( DataFlow:: Node sink ) {
0 commit comments