|
| 1 | +/** |
| 2 | + * @id cpp/misra/pointer-arithmetic-forms-an-invalid-pointer |
| 3 | + * @name RULE-8-7-1: Pointer arithmetic shall not form an invalid pointer |
| 4 | + * @description Pointers obtained as result of performing arithmetic should point to an initialized |
| 5 | + * object, or an element right next to the last element of an array. |
| 6 | + * @kind path-problem |
| 7 | + * @precision medium |
| 8 | + * @problem.severity error |
| 9 | + * @tags external/misra/id/rule-8-7-1 |
| 10 | + * scope/system |
| 11 | + * external/misra/enforcement/undecidable |
| 12 | + * external/misra/obligation/required |
| 13 | + */ |
| 14 | + |
| 15 | +import cpp |
| 16 | +import codingstandards.cpp.misra |
| 17 | +import semmle.code.cpp.dataflow.new.DataFlow |
| 18 | +import semmle.code.cpp.ir.dataflow.internal.DataFlowUtil |
| 19 | +import semmle.code.cpp.security.BufferAccess |
| 20 | + |
| 21 | +/** |
| 22 | + * A declaration of a variable that is of an array type. |
| 23 | + */ |
| 24 | +class ArrayDeclaration extends VariableDeclarationEntry { |
| 25 | + int length; |
| 26 | + |
| 27 | + ArrayDeclaration() { this.getType().getUnderlyingType().(ArrayType).getArraySize() = length } |
| 28 | + |
| 29 | + /** |
| 30 | + * Gets the declared length of this array. |
| 31 | + */ |
| 32 | + int getLength() { result = length } |
| 33 | +} |
| 34 | + |
| 35 | +/** |
| 36 | + * A call to a function that dynamically allocates memory on the heap. |
| 37 | + */ |
| 38 | +class HeapAllocationFunctionCall extends FunctionCall { |
| 39 | + AllocationFunction heapAllocFunction; |
| 40 | + |
| 41 | + HeapAllocationFunctionCall() { this.getTarget() = heapAllocFunction } |
| 42 | + |
| 43 | + predicate isMallocCall() { heapAllocFunction.getName() = "malloc" } |
| 44 | + |
| 45 | + predicate isCallocCall() { heapAllocFunction.getName() = "calloc" } |
| 46 | + |
| 47 | + predicate isReallocCall() { heapAllocFunction.getName() = "realloc" } |
| 48 | + |
| 49 | + /** |
| 50 | + * Get the minimum estimated number of bytes allocated. |
| 51 | + */ |
| 52 | + abstract int getMinNumBytes(); |
| 53 | +} |
| 54 | + |
| 55 | +class MallocFunctionCall extends HeapAllocationFunctionCall { |
| 56 | + MallocFunctionCall() { this.isMallocCall() } |
| 57 | + |
| 58 | + override int getMinNumBytes() { result = lowerBound(this.getArgument(0)) } |
| 59 | +} |
| 60 | + |
| 61 | +class CallocFunctionCall extends HeapAllocationFunctionCall { |
| 62 | + CallocFunctionCall() { this.isCallocCall() } |
| 63 | + |
| 64 | + override int getMinNumBytes() { |
| 65 | + result = lowerBound(this.getArgument(0)) * lowerBound(this.getArgument(1)) |
| 66 | + } |
| 67 | +} |
| 68 | + |
| 69 | +class ReallocFunctionCall extends HeapAllocationFunctionCall { |
| 70 | + ReallocFunctionCall() { this.isReallocCall() } |
| 71 | + |
| 72 | + override int getMinNumBytes() { result = lowerBound(this.getArgument(1)) } |
| 73 | +} |
| 74 | + |
| 75 | +/** |
| 76 | + * A cast that converts the pointer to an allocated byte array to that of a specialized type. |
| 77 | + * e.g. |
| 78 | + * |
| 79 | + * ``` C++ |
| 80 | + * int *x = (int*)malloc(SIZE * sizeof(int)); |
| 81 | + * ``` |
| 82 | + * This class captures the cast `(int*)malloc(SIZE * sizeof(int))` above. |
| 83 | + */ |
| 84 | +class NarrowedHeapAllocationFunctionCall extends Cast { |
| 85 | + HeapAllocationFunctionCall alloc; |
| 86 | + |
| 87 | + NarrowedHeapAllocationFunctionCall() { alloc = this.getExpr() } |
| 88 | + |
| 89 | + int getMinNumElements() { |
| 90 | + exists(int rawResult | |
| 91 | + rawResult = |
| 92 | + alloc.getMinNumBytes() / this.getUnderlyingType().(PointerType).getBaseType().getSize() |
| 93 | + | |
| 94 | + /* |
| 95 | + * The `SimpleRangeAnalysis` library is not perfect, and sometimes can widen to both ends |
| 96 | + * of the type bound. |
| 97 | + * |
| 98 | + * Since it does not make sense for a object to have negative length or even zero (the |
| 99 | + * rule dictates that non-array objects should have length of 0), we clip the range and |
| 100 | + * make the minimum number of elements to 1. |
| 101 | + */ |
| 102 | + |
| 103 | + result = rawResult.maximum(1) |
| 104 | + ) |
| 105 | + } |
| 106 | +} |
| 107 | + |
| 108 | +newtype TArrayAllocation = |
| 109 | + TStackAllocation(ArrayDeclaration arrayDecl) or |
| 110 | + TDynamicAllocation(NarrowedHeapAllocationFunctionCall narrowedAlloc) |
| 111 | + |
| 112 | +newtype TPointerFormation = |
| 113 | + TArrayExpr(ArrayExprBA arrayExpr) or |
| 114 | + TPointerArithmetic(PointerArithmeticOperation pointerArithmetic) |
| 115 | + |
| 116 | +/** |
| 117 | + * Any kind of allocation of an array, either allocated on the stack or the heap. |
| 118 | + */ |
| 119 | +class ArrayAllocation extends TArrayAllocation { |
| 120 | + ArrayDeclaration asStackAllocation() { this = TStackAllocation(result) } |
| 121 | + |
| 122 | + NarrowedHeapAllocationFunctionCall asDynamicAllocation() { this = TDynamicAllocation(result) } |
| 123 | + |
| 124 | + string toString() { |
| 125 | + result = this.asStackAllocation().toString() or |
| 126 | + result = this.asDynamicAllocation().toString() |
| 127 | + } |
| 128 | + |
| 129 | + /** |
| 130 | + * Gets the number of the object that the array holds. This number is exact for a stack-allocated |
| 131 | + * array, and the minimum estimated value for a heap-allocated one. |
| 132 | + */ |
| 133 | + int getLength() { |
| 134 | + result = this.asStackAllocation().getLength() or |
| 135 | + result = this.asDynamicAllocation().getMinNumElements() |
| 136 | + } |
| 137 | + |
| 138 | + Location getLocation() { |
| 139 | + result = this.asStackAllocation().getLocation() or |
| 140 | + result = this.asDynamicAllocation().getLocation() |
| 141 | + } |
| 142 | + |
| 143 | + /** |
| 144 | + * Gets the node associated with this allocation. |
| 145 | + */ |
| 146 | + DataFlow::Node getNode() { |
| 147 | + result.asUninitialized() = this.asStackAllocation().getVariable() or |
| 148 | + result.asConvertedExpr() = this.asDynamicAllocation() |
| 149 | + } |
| 150 | + |
| 151 | + Expr asExpr() { |
| 152 | + result = this.asStackAllocation().getVariable().getAnAccess() or |
| 153 | + result = this.asDynamicAllocation() |
| 154 | + } |
| 155 | +} |
| 156 | + |
| 157 | +/** |
| 158 | + * Any kind of pointer formation that derives from a base pointer, either as an arithmetic operation |
| 159 | + * on pointers, or an array access expression. |
| 160 | + */ |
| 161 | +class PointerFormation extends TPointerFormation { |
| 162 | + ArrayExprBA asArrayExpr() { this = TArrayExpr(result) } |
| 163 | + |
| 164 | + PointerArithmeticOperation asPointerArithmetic() { this = TPointerArithmetic(result) } |
| 165 | + |
| 166 | + string toString() { |
| 167 | + result = this.asArrayExpr().toString() or |
| 168 | + result = this.asPointerArithmetic().toString() |
| 169 | + } |
| 170 | + |
| 171 | + /** |
| 172 | + * Gets the sub-expression of this pointer formation that corresponds to the offset. |
| 173 | + */ |
| 174 | + private Expr getOffsetExpr() { |
| 175 | + result = this.asArrayExpr().getArrayOffset() |
| 176 | + or |
| 177 | + exists(PointerArithmeticOperation pointerArithmetic | |
| 178 | + pointerArithmetic = this.asPointerArithmetic() |
| 179 | + | |
| 180 | + result = pointerArithmetic.getAnOperand() // TODO: only get the number being added |
| 181 | + ) |
| 182 | + } |
| 183 | + |
| 184 | + /** |
| 185 | + * Gets the offset of this pointer formation as calculated in relation to the base pointer. |
| 186 | + */ |
| 187 | + int getOffset() { |
| 188 | + if this.asPointerArithmetic() instanceof PointerSubExpr |
| 189 | + then result = -this.getOffsetExpr().getValue().toInt() |
| 190 | + else result = this.getOffsetExpr().getValue().toInt() |
| 191 | + } |
| 192 | + |
| 193 | + /** |
| 194 | + * Gets the base pointer to which the offset is applied. |
| 195 | + */ |
| 196 | + Expr getBase() { |
| 197 | + result = this.asArrayExpr().getArrayBase() |
| 198 | + or |
| 199 | + exists(PointerAddExpr pointerAddition | pointerAddition = this.asPointerArithmetic() | |
| 200 | + result = pointerAddition.getAnOperand() and result != this.getOffsetExpr() |
| 201 | + ) |
| 202 | + or |
| 203 | + exists(PointerSubExpr pointerSubtraction | pointerSubtraction = this.asPointerArithmetic() | |
| 204 | + result = pointerSubtraction.getAnOperand() and result != this.getOffsetExpr() |
| 205 | + ) |
| 206 | + } |
| 207 | + |
| 208 | + /** |
| 209 | + * Gets the expression associated with this pointer formation. |
| 210 | + */ |
| 211 | + Expr asExpr() { |
| 212 | + result = this.asArrayExpr() or |
| 213 | + result = this.asPointerArithmetic() |
| 214 | + } |
| 215 | + |
| 216 | + /** |
| 217 | + * Gets the data-flow node associated with this pointer formation. |
| 218 | + */ |
| 219 | + DataFlow::Node getNode() { result.asExpr() = this.asExpr() } |
| 220 | + |
| 221 | + Location getLocation() { |
| 222 | + result = this.asArrayExpr().getLocation() or |
| 223 | + result = this.asPointerArithmetic().getLocation() |
| 224 | + } |
| 225 | +} |
| 226 | + |
| 227 | +/** |
| 228 | + * A "fat pointer" is a pointer that is augmented with offset and length |
| 229 | + * information of the underlying data. |
| 230 | + * |
| 231 | + * It is either a "declared pointer" or a "index-adjusted pointer": |
| 232 | + * - *Allocated pointer*: a pointer is declared with a predetermined length. |
| 233 | + * The offset is 0. |
| 234 | + * - This length info can be determined statically in some cases. |
| 235 | + * - *Index-adjusted pointer*: a new pointer is derived from an existing |
| 236 | + * fat pointer through pointer arithmetic. |
| 237 | + */ |
| 238 | +newtype TFatPointer = |
| 239 | + TAllocated(ArrayAllocation arrayDeclaration) or |
| 240 | + TIndexAdjusted(PointerFormation pointerFormation) |
| 241 | + |
| 242 | +class FatPointer extends TFatPointer { |
| 243 | + private ArrayAllocation asAllocated() { this = TAllocated(result) } |
| 244 | + |
| 245 | + private PointerFormation asIndexAdjusted() { this = TIndexAdjusted(result) } |
| 246 | + |
| 247 | + predicate isAllocated() { exists(this.asAllocated()) } |
| 248 | + |
| 249 | + predicate isIndexAdjusted() { exists(this.asIndexAdjusted()) } |
| 250 | + |
| 251 | + /** |
| 252 | + * Gets the length of the underlying object, given that this fat pointer is |
| 253 | + * an *allocated pointer*. |
| 254 | + */ |
| 255 | + int getLength() { result = this.asAllocated().getLength() } |
| 256 | + |
| 257 | + string toString() { |
| 258 | + result = this.asAllocated().toString() or |
| 259 | + result = this.asIndexAdjusted().toString() |
| 260 | + } |
| 261 | + |
| 262 | + Location getLocation() { |
| 263 | + result = this.asAllocated().getLocation() or |
| 264 | + result = this.asIndexAdjusted().getLocation() |
| 265 | + } |
| 266 | + |
| 267 | + int getOffset() { |
| 268 | + exists(this.asAllocated()) and result = 0 |
| 269 | + or |
| 270 | + result = this.asIndexAdjusted().getOffset() |
| 271 | + } |
| 272 | + |
| 273 | + DataFlow::Node getNode() { |
| 274 | + result = this.asAllocated().getNode() or |
| 275 | + result = this.asIndexAdjusted().getNode() |
| 276 | + } |
| 277 | + |
| 278 | + Expr getBasePointer() { |
| 279 | + result = this.asAllocated().asExpr() or |
| 280 | + result = this.asIndexAdjusted().getBase() |
| 281 | + } |
| 282 | +} |
| 283 | + |
| 284 | +predicate srcSinkLengthMap( |
| 285 | + DataFlow::Node src, DataFlow::Node sink, // both `src` and `sink` are fat pointers |
| 286 | + int srcOffset, int sinkOffset, int length |
| 287 | +) { |
| 288 | + TrackArray::flow(src, sink) and |
| 289 | + exists(FatPointer start, FatPointer end | |
| 290 | + /* Reiterate the data flow configuration here. */ |
| 291 | + src = start.getNode() and |
| 292 | + sink.asExpr() = end.getBasePointer() |
| 293 | + | |
| 294 | + srcOffset = start.getOffset() and |
| 295 | + sinkOffset = end.getOffset() and |
| 296 | + ( |
| 297 | + /* Base case: The object is allocated and a fat pointer created. */ |
| 298 | + length = start.getLength() |
| 299 | + or |
| 300 | + /* Recursive case: A fat pointer is derived from a fat pointer. */ |
| 301 | + srcSinkLengthMap(_, _, _, _, length) |
| 302 | + ) |
| 303 | + ) |
| 304 | +} |
| 305 | + |
| 306 | +/** |
| 307 | + * A data flow configuration that starts from the allocation of an array and ends at a |
| 308 | + * pointer derived from that array. |
| 309 | + */ |
| 310 | +module TrackArrayConfig implements DataFlow::ConfigSig { |
| 311 | + predicate isSource(DataFlow::Node node) { |
| 312 | + exists(FatPointer fatPointer | node = fatPointer.getNode()) |
| 313 | + } |
| 314 | + |
| 315 | + predicate isSink(DataFlow::Node node) { |
| 316 | + exists(FatPointer fatPointer | node.asExpr() = fatPointer.getBasePointer()) |
| 317 | + } |
| 318 | +} |
| 319 | + |
| 320 | +module TrackArray = DataFlow::Global<TrackArrayConfig>; |
| 321 | + |
| 322 | +import TrackArray::PathGraph |
| 323 | + |
| 324 | +from TrackArray::PathNode source, TrackArray::PathNode sink, string message |
| 325 | +where |
| 326 | + not isExcluded(sink.getNode().asExpr(), |
| 327 | + Memory1Package::pointerArithmeticFormsAnInvalidPointerQuery()) and |
| 328 | + none() and // TODO |
| 329 | + message = |
| 330 | + // "This pointer has offset " + pointerOffset + |
| 331 | + // " when the minimum possible length of the object might be " + arrayLength + "." |
| 332 | + "TODO" |
| 333 | +select sink, source, sink, message |
0 commit comments