11/**
22 * This file provides the first phase of the `cpp/invalid-pointer-deref` query that identifies flow
33 * from an allocation to a pointer-arithmetic instruction that constructs a pointer that is out of bounds.
4+ *
5+ * Consider the following snippet:
6+ * ```cpp
7+ * 1. char* base = (char*)malloc(size);
8+ * 2. char* end = base + size;
9+ * 3. for(int *p = base; p <= end; p++) {
10+ * 4. use(*p); // BUG: Should have been bounded by `p < end`.
11+ * 5. }
12+ * ```
13+ * this file identifies the flow from `new int[size]` to `base + size`.
14+ *
15+ * This is done using the product-flow library. The configuration tracks flow from the pair
16+ * `(allocation, size of allocation)` to a pair `(a, b)` where there exists a pointer-arithmetic instruction
17+ * `pai = a + r` such that `b` is a dataflow node where `b <= r`. Because there will be a dataflow-path from
18+ * `allocation` to `a` this means that the `pai` will compute a pointer that is some number of elements beyond
19+ * the end position of the allocation. See `pointerAddInstructionHasBounds` for the implementation of this.
20+ *
21+ * In the above example, the pair `(a, b)` is `(base, size)` with `base` and `size` coming from the expression
22+ * `base + size` on line 2, which is also the pointer-arithmetic instruction. In general, the pair does not necessarily
23+ * correspond directly to the operands of the pointer-arithmetic instruction.
24+ * In the following example, the pair is again `(base, size)`, but with `base` coming from line 3 and `size` from line 2,
25+ * and the pointer-arithmetic instruction being `base + n` on line 3:
26+ * ```cpp
27+ * 1. int* base = new int[size];
28+ * 2. if(n <= size) {
29+ * 3. int* end = base + n;
30+ * 4. for(int* p = base; p <= end; ++p) {
31+ * 5. *p = 0; // BUG: Should have been bounded by `p < end`.
32+ * 6. }
33+ * 7. }
34+ * ```
35+ *
36+ * Handling false positives:
37+ *
38+ * Consider a snippet such as:
39+ * ```cpp
40+ * 1. int* base = new int[size];
41+ * 2. int n = condition() ? size : 0;
42+ * 3. if(n >= size) return;
43+ * 4. int* end = base + n;
44+ * 5. for(int* p = base; p <= end; ++p) {
45+ * 6. *p = 0; // This is fine since `end < base + size`
46+ * 7. }
47+ * ```
48+ * In order to remove this false positive we define a barrier (see `SizeBarrier::SizeBarrierConfig`) that finds the
49+ * possible guards that compares a value to the size of the allocation. In the above example, this is the `(n >= size)`
50+ * guard on line 3. `SizeBarrier::getABarrierNode` then defines any node that is guarded by such a guard as a barrier
51+ * in the dataflow configuration.
452 */
553
654private import cpp
@@ -42,59 +90,57 @@ predicate hasSize(HeuristicAllocationExpr alloc, DataFlow::Node n, int state) {
4290 * ```
4391 * In this case, the sink pair identified by the product flow library (without any additional barriers)
4492 * would be `(p, n)` (where `n` is the `n` in `p[n]`), because there exists a pointer-arithmetic
45- * instruction `pai` such that:
46- * 1. The left-hand of `pai` flows from the allocation , and
47- * 2. The right-hand of `pai` is non-strictly upper bounded by ` n` ( where `n` is the `n` in `p[n]`)
93+ * instruction `pai = a + b ` such that:
94+ * 1. the allocation flows to `a` , and
95+ * 2. `b <= n` where `n` is the `n` in `p[n]`
4896 * but because there's a strict comparison that compares `n` against the size of the allocation this
4997 * snippet is fine.
5098 */
51- module Barrier2 {
52- private class FlowState2 = int ;
53-
54- private module BarrierConfig2 implements DataFlow:: ConfigSig {
99+ module SizeBarrier {
100+ private module SizeBarrierConfig implements DataFlow:: ConfigSig {
55101 predicate isSource ( DataFlow:: Node source ) {
56102 // The sources is the same as in the sources for the second
57103 // projection in the `AllocToInvalidPointerConfig` module.
58104 hasSize ( _, source , _)
59105 }
60106
61107 additional predicate isSink (
62- DataFlow:: Node left , DataFlow:: Node right , IRGuardCondition g , FlowState2 state ,
63- boolean testIsTrue
108+ DataFlow:: Node left , DataFlow:: Node right , IRGuardCondition g , int k , boolean testIsTrue
64109 ) {
65- // The sink is any "large" side of a relational comparison.
66- g .comparesLt ( left .asOperand ( ) , right .asOperand ( ) , state , true , testIsTrue )
110+ // The sink is any "large" side of a relational comparison. i.e., the `right` expression
111+ // in a guard such as `left < right + k`.
112+ g .comparesLt ( left .asOperand ( ) , right .asOperand ( ) , k , true , testIsTrue )
67113 }
68114
69115 predicate isSink ( DataFlow:: Node sink ) { isSink ( _, sink , _, _, _) }
70116 }
71117
72- private import DataFlow:: Global< BarrierConfig2 >
118+ private import DataFlow:: Global< SizeBarrierConfig >
73119
74- private FlowState2 getAFlowStateForNode ( DataFlow:: Node node ) {
120+ private int getAFlowStateForNode ( DataFlow:: Node node ) {
75121 exists ( DataFlow:: Node source |
76122 flow ( source , node ) and
77123 hasSize ( _, source , result )
78124 )
79125 }
80126
81127 private predicate operandGuardChecks (
82- IRGuardCondition g , Operand left , Operand right , FlowState2 state , boolean edge
128+ IRGuardCondition g , Operand left , Operand right , int state , boolean edge
83129 ) {
84- exists ( DataFlow:: Node nLeft , DataFlow:: Node nRight , FlowState2 state0 |
130+ exists ( DataFlow:: Node nLeft , DataFlow:: Node nRight , int k |
85131 nRight .asOperand ( ) = right and
86132 nLeft .asOperand ( ) = left and
87- BarrierConfig2 :: isSink ( nLeft , nRight , g , state0 , edge ) and
133+ SizeBarrierConfig :: isSink ( nLeft , nRight , g , k , edge ) and
88134 state = getAFlowStateForNode ( nRight ) and
89- state0 <= state
135+ k <= state
90136 )
91137 }
92138
93139 /**
94140 * Gets an instruction that is guarded by a guard condition which ensures that
95141 * the value of the instruction is upper-bounded by size of some allocation.
96142 */
97- Instruction getABarrierInstruction ( FlowState2 state ) {
143+ Instruction getABarrierInstruction ( int state ) {
98144 exists ( IRGuardCondition g , ValueNumber value , Operand use , boolean edge |
99145 use = value .getAUse ( ) and
100146 operandGuardChecks ( pragma [ only_bind_into ] ( g ) , pragma [ only_bind_into ] ( use ) , _,
@@ -108,17 +154,15 @@ module Barrier2 {
108154 * Gets a `DataFlow::Node` that is guarded by a guard condition which ensures that
109155 * the value of the node is upper-bounded by size of some allocation.
110156 */
111- DataFlow:: Node getABarrierNode ( FlowState2 state ) {
157+ DataFlow:: Node getABarrierNode ( int state ) {
112158 result .asOperand ( ) = getABarrierInstruction ( state ) .getAUse ( )
113159 }
114160
115161 /**
116162 * Gets the block of a node that is guarded (see `getABarrierInstruction` or
117163 * `getABarrierNode` for the definition of what it means to be guarded).
118164 */
119- IRBlock getABarrierBlock ( FlowState2 state ) {
120- result .getAnInstruction ( ) = getABarrierInstruction ( state )
121- }
165+ IRBlock getABarrierBlock ( int state ) { result .getAnInstruction ( ) = getABarrierInstruction ( state ) }
122166}
123167
124168private module InterestingPointerAddInstruction {
@@ -151,54 +195,38 @@ private module InterestingPointerAddInstruction {
151195}
152196
153197/**
154- * A product-flow configuration for flow from an (allocation, size) pair to a
155- * pointer-arithmetic operation that is non-strictly upper-bounded by `allocation + size`.
156- *
157- * The goal of this query is to find patterns such as:
158- * ```cpp
159- * 1. char* begin = (char*)malloc(size);
160- * 2. char* end = begin + size;
161- * 3. for(int *p = begin; p <= end; p++) {
162- * 4. use(*p);
163- * 5. }
164- * ```
165- *
166- * We do this by splitting the task up into two configurations:
167- * 1. `AllocToInvalidPointerConfig` find flow from `malloc(size)` to `begin + size`, and
168- * 2. `InvalidPointerToDerefConfig` finds flow from `begin + size` to an `end` (on line 3).
169- *
170- * Finally, the range-analysis library will find a load from (or store to) an address that
171- * is non-strictly upper-bounded by `end` (which in this case is `*p`).
198+ * A product-flow configuration for flow from an `(allocation, size)` pair to a
199+ * pointer-arithmetic operation `pai` such that `pai <= allocation + size`.
172200 */
173201private module Config implements ProductFlow:: StateConfigSig {
174202 class FlowState1 = Unit ;
175203
176204 class FlowState2 = int ;
177205
178206 predicate isSourcePair (
179- DataFlow:: Node source1 , FlowState1 state1 , DataFlow:: Node source2 , FlowState2 state2
207+ DataFlow:: Node allocSource , FlowState1 unit , DataFlow:: Node sizeSource , FlowState2 sizeAddend
180208 ) {
181209 // In the case of an allocation like
182210 // ```cpp
183211 // malloc(size + 1);
184212 // ```
185213 // we use `state2` to remember that there was an offset (in this case an offset of `1`) added
186214 // to the size of the allocation. This state is then checked in `isSinkPair`.
187- exists ( state1 ) and
188- hasSize ( source1 .asConvertedExpr ( ) , source2 , state2 )
215+ exists ( unit ) and
216+ hasSize ( allocSource .asConvertedExpr ( ) , sizeSource , sizeAddend )
189217 }
190218
191219 predicate isSinkPair (
192- DataFlow:: Node sink1 , FlowState1 state1 , DataFlow:: Node sink2 , FlowState2 state2
220+ DataFlow:: Node allocSink , FlowState1 unit , DataFlow:: Node sizeSink , FlowState2 sizeAddend
193221 ) {
194- exists ( state1 ) and
222+ exists ( unit ) and
195223 // We check that the delta computed by the range analysis matches the
196224 // state value that we set in `isSourcePair`.
197- pointerAddInstructionHasBounds0 ( _, sink1 , sink2 , state2 )
225+ pointerAddInstructionHasBounds0 ( _, allocSink , sizeSink , sizeAddend )
198226 }
199227
200228 predicate isBarrier2 ( DataFlow:: Node node , FlowState2 state ) {
201- node = Barrier2 :: getABarrierNode ( state )
229+ node = SizeBarrier :: getABarrierNode ( state )
202230 }
203231
204232 predicate isBarrierIn1 ( DataFlow:: Node node ) { isSourcePair ( node , _, _, _) }
@@ -211,7 +239,7 @@ private module Config implements ProductFlow::StateConfigSig {
211239private module AllocToInvalidPointerFlow = ProductFlow:: GlobalWithState< Config > ;
212240
213241/**
214- * Holds if `pai` is non-strictly upper bounded by `sink2 + delta` and `sink1 ` is the
242+ * Holds if `pai` is non-strictly upper bounded by `sizeSink + delta` and `allocSink ` is the
215243 * left operand of the pointer-arithmetic operation.
216244 *
217245 * For example in,
@@ -220,37 +248,37 @@ private module AllocToInvalidPointerFlow = ProductFlow::GlobalWithState<Config>;
220248 * ```
221249 * We will have:
222250 * - `pai` is `p + (size + 1)`,
223- * - `sink1 ` is `p`
224- * - `sink2 ` is `size`
251+ * - `allocSink ` is `p`
252+ * - `sizeSink ` is `size`
225253 * - `delta` is `1`.
226254 */
227255pragma [ nomagic]
228256private predicate pointerAddInstructionHasBounds0 (
229- PointerAddInstruction pai , DataFlow:: Node sink1 , DataFlow:: Node sink2 , int delta
257+ PointerAddInstruction pai , DataFlow:: Node allocSink , DataFlow:: Node sizeSink , int delta
230258) {
231259 InterestingPointerAddInstruction:: isInteresting ( pragma [ only_bind_into ] ( pai ) ) and
232- exists ( Instruction right , Instruction instr2 |
260+ exists ( Instruction right , Instruction sizeInstr |
233261 pai .getRight ( ) = right and
234- pai .getLeft ( ) = sink1 .asInstruction ( ) and
235- instr2 = sink2 .asInstruction ( ) and
236- // pai.getRight() <= sink2 + delta
237- bounded1 ( right , instr2 , delta ) and
238- not right = Barrier2 :: getABarrierInstruction ( delta ) and
239- not instr2 = Barrier2 :: getABarrierInstruction ( delta )
262+ pai .getLeft ( ) = allocSink .asInstruction ( ) and
263+ sizeInstr = sizeSink .asInstruction ( ) and
264+ // pai.getRight() <= sizeSink + delta
265+ bounded1 ( right , sizeInstr , delta ) and
266+ not right = SizeBarrier :: getABarrierInstruction ( delta ) and
267+ not sizeInstr = SizeBarrier :: getABarrierInstruction ( delta )
240268 )
241269}
242270
243271/**
244- * Holds if `allocation` flows to `sink1 ` and `sink1 ` represents the left-hand
245- * side of the pointer-arithmetic instruction `pai`, and the right-hand side of `pai`
246- * is non-strictly upper bounded by the size of `alllocation` + ` delta`.
272+ * Holds if `allocation` flows to `allocSink ` and `allocSink ` represents the left operand
273+ * of the pointer-arithmetic instruction `pai = a + b` (i.e., `allocSink = a`), and
274+ * `b <= allocation + delta`.
247275 */
248276pragma [ nomagic]
249277predicate pointerAddInstructionHasBounds (
250- DataFlow:: Node allocation , PointerAddInstruction pai , DataFlow:: Node sink1 , int delta
278+ DataFlow:: Node allocation , PointerAddInstruction pai , DataFlow:: Node allocSink , int delta
251279) {
252- exists ( DataFlow:: Node sink2 |
253- AllocToInvalidPointerFlow:: flow ( allocation , _, sink1 , sink2 ) and
254- pointerAddInstructionHasBounds0 ( pai , sink1 , sink2 , delta )
280+ exists ( DataFlow:: Node sizeSink |
281+ AllocToInvalidPointerFlow:: flow ( allocation , _, allocSink , sizeSink ) and
282+ pointerAddInstructionHasBounds0 ( pai , allocSink , sizeSink , delta )
255283 )
256284}
0 commit comments