Skip to content

Commit dac4f0f

Browse files
author
Robert Marsh
authored
Merge pull request #2763 from jbj/ir-VariableNode
C++: DefaultTaintTracking perf fix for globals
2 parents 180e9d4 + a0e2d59 commit dac4f0f

File tree

13 files changed

+215
-74
lines changed

13 files changed

+215
-74
lines changed

cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll

Lines changed: 19 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ private class DefaultTaintTrackingCfg extends DataFlow::Configuration {
6969

7070
override predicate isSource(DataFlow::Node source) { source = getNodeForSource(_) }
7171

72-
override predicate isSink(DataFlow::Node sink) { any() }
72+
override predicate isSink(DataFlow::Node sink) { exists(adjustedSink(sink)) }
7373

7474
override predicate isAdditionalFlowStep(DataFlow::Node n1, DataFlow::Node n2) {
7575
instructionTaintStep(n1.asInstruction(), n2.asInstruction())
@@ -84,18 +84,15 @@ private class ToGlobalVarTaintTrackingCfg extends DataFlow::Configuration {
8484
override predicate isSource(DataFlow::Node source) { source = getNodeForSource(_) }
8585

8686
override predicate isSink(DataFlow::Node sink) {
87-
exists(GlobalOrNamespaceVariable gv | writesVariable(sink.asInstruction(), gv))
87+
sink.asVariable() instanceof GlobalOrNamespaceVariable
8888
}
8989

9090
override predicate isAdditionalFlowStep(DataFlow::Node n1, DataFlow::Node n2) {
9191
instructionTaintStep(n1.asInstruction(), n2.asInstruction())
9292
or
93-
exists(StoreInstruction i1, LoadInstruction i2, GlobalOrNamespaceVariable gv |
94-
writesVariable(i1, gv) and
95-
readsVariable(i2, gv) and
96-
i1 = n1.asInstruction() and
97-
i2 = n2.asInstruction()
98-
)
93+
writesVariable(n1.asInstruction(), n2.asVariable().(GlobalOrNamespaceVariable))
94+
or
95+
readsVariable(n2.asInstruction(), n1.asVariable().(GlobalOrNamespaceVariable))
9996
}
10097

10198
override predicate isBarrier(DataFlow::Node node) { nodeIsBarrier(node) }
@@ -105,19 +102,20 @@ private class FromGlobalVarTaintTrackingCfg extends DataFlow2::Configuration {
105102
FromGlobalVarTaintTrackingCfg() { this = "FromGlobalVarTaintTrackingCfg" }
106103

107104
override predicate isSource(DataFlow::Node source) {
108-
exists(
109-
ToGlobalVarTaintTrackingCfg other, DataFlow::Node prevSink, GlobalOrNamespaceVariable gv
110-
|
111-
other.hasFlowTo(prevSink) and
112-
writesVariable(prevSink.asInstruction(), gv) and
113-
readsVariable(source.asInstruction(), gv)
114-
)
105+
// This set of sources should be reasonably small, which is good for
106+
// performance since the set of sinks is very large.
107+
exists(ToGlobalVarTaintTrackingCfg otherCfg | otherCfg.hasFlowTo(source))
115108
}
116109

117-
override predicate isSink(DataFlow::Node sink) { any() }
110+
override predicate isSink(DataFlow::Node sink) { exists(adjustedSink(sink)) }
118111

119112
override predicate isAdditionalFlowStep(DataFlow::Node n1, DataFlow::Node n2) {
120113
instructionTaintStep(n1.asInstruction(), n2.asInstruction())
114+
or
115+
// Additional step for flow out of variables. There is no flow _into_
116+
// variables in this configuration, so this step only serves to take flow
117+
// out of a variable that's a source.
118+
readsVariable(n2.asInstruction(), n1.asVariable())
121119
}
122120

123121
override predicate isBarrier(DataFlow::Node node) { nodeIsBarrier(node) }
@@ -351,23 +349,12 @@ predicate taintedIncludingGlobalVars(Expr source, Element tainted, string global
351349
globalVar = ""
352350
or
353351
exists(
354-
ToGlobalVarTaintTrackingCfg toCfg, FromGlobalVarTaintTrackingCfg fromCfg, DataFlow::Node store,
355-
GlobalOrNamespaceVariable global, DataFlow::Node load, DataFlow::Node sink
352+
ToGlobalVarTaintTrackingCfg toCfg, FromGlobalVarTaintTrackingCfg fromCfg,
353+
DataFlow::VariableNode variableNode, GlobalOrNamespaceVariable global, DataFlow::Node sink
356354
|
357-
toCfg.hasFlow(getNodeForSource(source), store) and
358-
store
359-
.asInstruction()
360-
.(StoreInstruction)
361-
.getDestinationAddress()
362-
.(VariableAddressInstruction)
363-
.getASTVariable() = global and
364-
load
365-
.asInstruction()
366-
.(LoadInstruction)
367-
.getSourceAddress()
368-
.(VariableAddressInstruction)
369-
.getASTVariable() = global and
370-
fromCfg.hasFlow(load, sink) and
355+
global = variableNode.getVariable() and
356+
toCfg.hasFlow(getNodeForSource(source), variableNode) and
357+
fromCfg.hasFlow(variableNode, sink) and
371358
tainted = adjustedSink(sink) and
372359
global = globalVarFromId(globalVar)
373360
)

cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,17 @@ private import DataFlowDispatch
77
* A data flow node that occurs as the argument of a call and is passed as-is
88
* to the callable. Instance arguments (`this` pointer) are also included.
99
*/
10-
class ArgumentNode extends Node {
11-
ArgumentNode() { exists(CallInstruction call | this.asInstruction() = call.getAnArgument()) }
10+
class ArgumentNode extends InstructionNode {
11+
ArgumentNode() { exists(CallInstruction call | this.getInstruction() = call.getAnArgument()) }
1212

1313
/**
1414
* Holds if this argument occurs at the given position in the given call.
1515
* The instance argument is considered to have index `-1`.
1616
*/
1717
predicate argumentOf(DataFlowCall call, int pos) {
18-
this.asInstruction() = call.getPositionalArgument(pos)
18+
this.getInstruction() = call.getPositionalArgument(pos)
1919
or
20-
this.asInstruction() = call.getThisArgument() and pos = -1
20+
this.getInstruction() = call.getThisArgument() and pos = -1
2121
}
2222

2323
/** Gets the call in which this node is an argument. */
@@ -36,15 +36,15 @@ class ReturnKind extends TReturnKind {
3636
}
3737

3838
/** A data flow node that occurs as the result of a `ReturnStmt`. */
39-
class ReturnNode extends Node {
40-
ReturnNode() { exists(ReturnValueInstruction ret | this.asInstruction() = ret.getReturnValue()) }
39+
class ReturnNode extends InstructionNode {
40+
ReturnNode() { exists(ReturnValueInstruction ret | this.getInstruction() = ret.getReturnValue()) }
4141

4242
/** Gets the kind of this returned value. */
4343
ReturnKind getKind() { result = TNormalReturnKind() }
4444
}
4545

4646
/** A data flow node that represents the output of a call. */
47-
class OutNode extends Node {
47+
class OutNode extends InstructionNode {
4848
override CallInstruction instr;
4949

5050
/** Gets the underlying call. */
@@ -181,11 +181,17 @@ private predicate suppressUnusedType(Type t) { any() }
181181
// Java QL library compatibility wrappers
182182
//////////////////////////////////////////////////////////////////////////////
183183
/** A node that performs a type cast. */
184-
class CastNode extends Node {
184+
class CastNode extends InstructionNode {
185185
CastNode() { none() } // stub implementation
186186
}
187187

188-
class DataFlowCallable = Function;
188+
/**
189+
* A function that may contain code or a variable that may contain itself. When
190+
* flow crosses from one _enclosing callable_ to another, the interprocedural
191+
* data-flow library discards call contexts and inserts a node in the big-step
192+
* relation used for human-readable path explanations.
193+
*/
194+
class DataFlowCallable = Declaration;
189195

190196
class DataFlowExpr = Expr;
191197

cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll

Lines changed: 88 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,9 @@ private import semmle.code.cpp.controlflow.IRGuards
88
private import semmle.code.cpp.ir.ValueNumbering
99
private import semmle.code.cpp.models.interfaces.DataFlow
1010

11-
/**
12-
* A newtype wrapper to prevent accidental casts between `Node` and
13-
* `Instruction`. This ensures we can add `Node`s that are not `Instruction`s
14-
* in the future.
15-
*/
16-
private newtype TIRDataFlowNode = MkIRDataFlowNode(Instruction i)
11+
private newtype TIRDataFlowNode =
12+
TInstructionNode(Instruction i) or
13+
TVariableNode(Variable var)
1714

1815
/**
1916
* A node in a data flow graph.
@@ -23,44 +20,45 @@ private newtype TIRDataFlowNode = MkIRDataFlowNode(Instruction i)
2320
* `DataFlow::parameterNode`, and `DataFlow::uninitializedNode` respectively.
2421
*/
2522
class Node extends TIRDataFlowNode {
26-
Instruction instr;
27-
28-
Node() { this = MkIRDataFlowNode(instr) }
29-
3023
/**
31-
* INTERNAL: Do not use. Alternative name for `getFunction`.
24+
* INTERNAL: Do not use.
3225
*/
33-
Function getEnclosingCallable() { result = this.getFunction() }
26+
Declaration getEnclosingCallable() { none() } // overridden in subclasses
3427

35-
Function getFunction() { result = instr.getEnclosingFunction() }
28+
/** Gets the function to which this node belongs, if any. */
29+
Function getFunction() { none() } // overridden in subclasses
3630

3731
/** Gets the type of this node. */
38-
Type getType() { result = instr.getResultType() }
32+
Type getType() { none() } // overridden in subclasses
3933

40-
Instruction asInstruction() { this = MkIRDataFlowNode(result) }
34+
/** Gets the instruction corresponding to this node, if any. */
35+
Instruction asInstruction() { result = this.(InstructionNode).getInstruction() }
4136

4237
/**
4338
* Gets the non-conversion expression corresponding to this node, if any. If
4439
* this node strictly (in the sense of `asConvertedExpr`) corresponds to a
4540
* `Conversion`, then the result is that `Conversion`'s non-`Conversion` base
4641
* expression.
4742
*/
48-
Expr asExpr() {
49-
result.getConversion*() = instr.getConvertedResultExpression() and
50-
not result instanceof Conversion
51-
}
43+
Expr asExpr() { result = this.(ExprNode).getExpr() }
5244

5345
/**
5446
* Gets the expression corresponding to this node, if any. The returned
5547
* expression may be a `Conversion`.
5648
*/
57-
Expr asConvertedExpr() { result = instr.getConvertedResultExpression() }
49+
Expr asConvertedExpr() { result = this.(ExprNode).getConvertedExpr() }
5850

5951
/** Gets the argument that defines this `DefinitionByReferenceNode`, if any. */
6052
Expr asDefiningArgument() { result = this.(DefinitionByReferenceNode).getArgument() }
6153

6254
/** Gets the parameter corresponding to this node, if any. */
63-
Parameter asParameter() { result = instr.(InitializeParameterInstruction).getParameter() }
55+
Parameter asParameter() { result = this.(ParameterNode).getParameter() }
56+
57+
/**
58+
* Gets the variable corresponding to this node, if any. This can be used for
59+
* modelling flow in and out of global variables.
60+
*/
61+
Variable asVariable() { result = this.(VariableNode).getVariable() }
6462

6563
/**
6664
* DEPRECATED: See UninitializedNode.
@@ -76,7 +74,7 @@ class Node extends TIRDataFlowNode {
7674
Type getTypeBound() { result = getType() }
7775

7876
/** Gets the location of this element. */
79-
Location getLocation() { result = instr.getLocation() }
77+
Location getLocation() { none() } // overridden by subclasses
8078

8179
/**
8280
* Holds if this element is at the specified location.
@@ -91,32 +89,55 @@ class Node extends TIRDataFlowNode {
9189
this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
9290
}
9391

94-
string toString() {
92+
/** Gets a textual representation of this element. */
93+
string toString() { none() } // overridden by subclasses
94+
}
95+
96+
class InstructionNode extends Node, TInstructionNode {
97+
Instruction instr;
98+
99+
InstructionNode() { this = TInstructionNode(instr) }
100+
101+
/** Gets the instruction corresponding to this node. */
102+
Instruction getInstruction() { result = instr }
103+
104+
override Declaration getEnclosingCallable() { result = this.getFunction() }
105+
106+
override Function getFunction() { result = instr.getEnclosingFunction() }
107+
108+
override Type getType() { result = instr.getResultType() }
109+
110+
override Location getLocation() { result = instr.getLocation() }
111+
112+
override string toString() {
95113
// This predicate is overridden in subclasses. This default implementation
96114
// does not use `Instruction.toString` because that's expensive to compute.
97-
result = this.asInstruction().getOpcode().toString()
115+
result = this.getInstruction().getOpcode().toString()
98116
}
99117
}
100118

101119
/**
102120
* An expression, viewed as a node in a data flow graph.
103121
*/
104-
class ExprNode extends Node {
105-
ExprNode() { exists(this.asExpr()) }
122+
class ExprNode extends InstructionNode {
123+
ExprNode() { exists(instr.getConvertedResultExpression()) }
106124

107125
/**
108126
* Gets the non-conversion expression corresponding to this node, if any. If
109127
* this node strictly (in the sense of `getConvertedExpr`) corresponds to a
110128
* `Conversion`, then the result is that `Conversion`'s non-`Conversion` base
111129
* expression.
112130
*/
113-
Expr getExpr() { result = this.asExpr() }
131+
Expr getExpr() {
132+
result.getConversion*() = instr.getConvertedResultExpression() and
133+
not result instanceof Conversion
134+
}
114135

115136
/**
116137
* Gets the expression corresponding to this node, if any. The returned
117138
* expression may be a `Conversion`.
118139
*/
119-
Expr getConvertedExpr() { result = this.asConvertedExpr() }
140+
Expr getConvertedExpr() { result = instr.getConvertedResultExpression() }
120141

121142
override string toString() { result = this.asConvertedExpr().toString() }
122143
}
@@ -125,7 +146,7 @@ class ExprNode extends Node {
125146
* The value of a parameter at function entry, viewed as a node in a data
126147
* flow graph.
127148
*/
128-
class ParameterNode extends Node {
149+
class ParameterNode extends InstructionNode {
129150
override InitializeParameterInstruction instr;
130151

131152
/**
@@ -139,7 +160,7 @@ class ParameterNode extends Node {
139160
override string toString() { result = instr.getParameter().toString() }
140161
}
141162

142-
private class ThisParameterNode extends Node {
163+
private class ThisParameterNode extends InstructionNode {
143164
override InitializeThisInstruction instr;
144165

145166
override string toString() { result = "this" }
@@ -176,7 +197,7 @@ deprecated class UninitializedNode extends Node {
176197
* This class exists to match the interface used by Java. There are currently no non-abstract
177198
* classes that extend it. When we implement field flow, we can revisit this.
178199
*/
179-
abstract class PostUpdateNode extends Node {
200+
abstract class PostUpdateNode extends InstructionNode {
180201
/**
181202
* Gets the node before the state update.
182203
*/
@@ -193,7 +214,7 @@ abstract class PostUpdateNode extends Node {
193214
* returned. This node will have its `getArgument()` equal to `&x` and its
194215
* `getVariableAccess()` equal to `x`.
195216
*/
196-
class DefinitionByReferenceNode extends Node {
217+
class DefinitionByReferenceNode extends InstructionNode {
197218
override WriteSideEffectInstruction instr;
198219

199220
/** Gets the argument corresponding to this node. */
@@ -220,10 +241,41 @@ class DefinitionByReferenceNode extends Node {
220241
}
221242
}
222243

244+
/**
245+
* A `Node` corresponding to a variable in the program, as opposed to the
246+
* value of that variable at some particular point. This can be used for
247+
* modelling flow in and out of global variables.
248+
*/
249+
class VariableNode extends Node, TVariableNode {
250+
Variable v;
251+
252+
VariableNode() { this = TVariableNode(v) }
253+
254+
/** Gets the variable corresponding to this node. */
255+
Variable getVariable() { result = v }
256+
257+
override Function getFunction() { none() }
258+
259+
override Declaration getEnclosingCallable() {
260+
// When flow crosses from one _enclosing callable_ to another, the
261+
// interprocedural data-flow library discards call contexts and inserts a
262+
// node in the big-step relation used for human-readable path explanations.
263+
// Therefore we want a distinct enclosing callable for each `VariableNode`,
264+
// and that can be the `Variable` itself.
265+
result = v
266+
}
267+
268+
override Type getType() { result = v.getType() }
269+
270+
override Location getLocation() { result = v.getLocation() }
271+
272+
override string toString() { result = v.toString() }
273+
}
274+
223275
/**
224276
* Gets the node corresponding to `instr`.
225277
*/
226-
Node instructionNode(Instruction instr) { result.asInstruction() = instr }
278+
InstructionNode instructionNode(Instruction instr) { result.getInstruction() = instr }
227279

228280
DefinitionByReferenceNode definitionByReferenceNode(Expr e) { result.getArgument() = e }
229281

@@ -244,6 +296,9 @@ ExprNode convertedExprNode(Expr e) { result.getExpr() = e }
244296
*/
245297
ParameterNode parameterNode(Parameter p) { result.getParameter() = p }
246298

299+
/** Gets the `VariableNode` corresponding to the variable `v`. */
300+
VariableNode variableNode(Variable v) { result.getVariable() = v }
301+
247302
/**
248303
* Gets the `Node` corresponding to the value of an uninitialized local
249304
* variable `v`.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
| globals.cpp:13:15:13:20 | call to getenv | globals.cpp:2:17:2:25 | sinkParam | global1 |
2+
| globals.cpp:13:15:13:20 | call to getenv | globals.cpp:12:10:12:16 | global1 | global1 |
3+
| globals.cpp:23:15:23:20 | call to getenv | globals.cpp:2:17:2:25 | sinkParam | global2 |
4+
| globals.cpp:23:15:23:20 | call to getenv | globals.cpp:19:10:19:16 | global2 | global2 |

0 commit comments

Comments
 (0)