Skip to content

Commit ce905e7

Browse files
authored
Merge pull request #597 from dave-bartolomeo/dave/IRDataflow
C++: Initial IR-based dataflow implementation
2 parents d94e14d + 78e5b3a commit ce905e7

38 files changed

+8201
-446
lines changed

config/identical-files.json

Lines changed: 74 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,55 +1,76 @@
11
{
2-
"C++ IR Instruction": [
3-
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Instruction.qll",
4-
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Instruction.qll",
5-
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Instruction.qll"
6-
],
7-
"C++ IR IRBlock": [
8-
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IRBlock.qll",
9-
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IRBlock.qll",
10-
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IRBlock.qll"
11-
],
12-
"C++ IR IRVariable": [
13-
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IRVariable.qll",
14-
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IRVariable.qll",
15-
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IRVariable.qll"
16-
],
17-
"C++ IR FunctionIR": [
18-
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/FunctionIR.qll",
19-
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/FunctionIR.qll",
20-
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/FunctionIR.qll"
21-
],
22-
"C++ IR Operand": [
23-
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Operand.qll",
24-
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Operand.qll",
25-
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Operand.qll"
26-
],
27-
"C++ IR IRImpl": [
28-
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IR.qll",
29-
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IR.qll",
30-
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IR.qll"
31-
],
32-
"C++ IR IRSanityImpl": [
33-
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IRSanity.qll",
34-
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IRSanity.qll",
35-
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IRSanity.qll"
36-
],
37-
"C++ IR PrintIRImpl": [
38-
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/PrintIR.qll",
39-
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/PrintIR.qll",
40-
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/PrintIR.qll"
41-
],
42-
"C++ SSA AliasAnalysis": [
43-
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/AliasAnalysis.qll",
44-
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/internal/AliasAnalysis.qll"
45-
],
46-
"C++ SSA SSAConstruction": [
47-
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/SSAConstruction.qll",
48-
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/internal/SSAConstruction.qll"
49-
],
50-
"C++ IR ValueNumber": [
51-
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/gvn/ValueNumbering.qll",
52-
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/gvn/ValueNumbering.qll",
53-
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/gvn/ValueNumbering.qll"
54-
]
2+
"DataFlow Java/C++": [
3+
"java/ql/src/semmle/code/java/dataflow/internal/DataFlowImpl.qll",
4+
"java/ql/src/semmle/code/java/dataflow/internal/DataFlowImpl2.qll",
5+
"java/ql/src/semmle/code/java/dataflow/internal/DataFlowImpl3.qll",
6+
"java/ql/src/semmle/code/java/dataflow/internal/DataFlowImpl4.qll",
7+
"java/ql/src/semmle/code/java/dataflow/internal/DataFlowImpl5.qll",
8+
"java/ql/src/semmle/code/java/dataflow/internal/DataFlowImplDepr.qll",
9+
"cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImpl.qll",
10+
"cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImpl2.qll",
11+
"cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImpl3.qll",
12+
"cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImpl4.qll",
13+
"cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl.qll",
14+
"cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl2.qll",
15+
"cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl3.qll",
16+
"cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl4.qll"
17+
],
18+
"DataFlow Java/C++ Common": [
19+
"java/ql/src/semmle/code/java/dataflow/internal/DataFlowImplCommon.qll",
20+
"cpp/ql/src/semmle/code/cpp/dataflow/internal/DataFlowImplCommon.qll",
21+
"cpp/ql/src/semmle/code/cpp/ir/dataflow/internal/DataFlowImplCommon.qll"
22+
],
23+
"C++ IR Instruction": [
24+
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Instruction.qll",
25+
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Instruction.qll",
26+
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Instruction.qll"
27+
],
28+
"C++ IR IRBlock": [
29+
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IRBlock.qll",
30+
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IRBlock.qll",
31+
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IRBlock.qll"
32+
],
33+
"C++ IR IRVariable": [
34+
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IRVariable.qll",
35+
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IRVariable.qll",
36+
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IRVariable.qll"
37+
],
38+
"C++ IR FunctionIR": [
39+
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/FunctionIR.qll",
40+
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/FunctionIR.qll",
41+
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/FunctionIR.qll"
42+
],
43+
"C++ IR Operand": [
44+
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/Operand.qll",
45+
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/Operand.qll",
46+
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/Operand.qll"
47+
],
48+
"C++ IR IRImpl": [
49+
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IR.qll",
50+
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IR.qll",
51+
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IR.qll"
52+
],
53+
"C++ IR IRSanityImpl": [
54+
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/IRSanity.qll",
55+
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/IRSanity.qll",
56+
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/IRSanity.qll"
57+
],
58+
"C++ IR PrintIRImpl": [
59+
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/PrintIR.qll",
60+
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/PrintIR.qll",
61+
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/PrintIR.qll"
62+
],
63+
"C++ SSA AliasAnalysis": [
64+
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/AliasAnalysis.qll",
65+
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/internal/AliasAnalysis.qll"
66+
],
67+
"C++ SSA SSAConstruction": [
68+
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/SSAConstruction.qll",
69+
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/internal/SSAConstruction.qll"
70+
],
71+
"C++ IR ValueNumber": [
72+
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/gvn/ValueNumbering.qll",
73+
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/gvn/ValueNumbering.qll",
74+
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/gvn/ValueNumbering.qll"
75+
]
5576
}

cpp/ql/src/filters/ImportAdditionalLibraries.ql

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ import semmle.code.cpp.dataflow.DataFlow2
1414
import semmle.code.cpp.dataflow.DataFlow3
1515
import semmle.code.cpp.dataflow.DataFlow4
1616
import semmle.code.cpp.dataflow.TaintTracking
17+
import semmle.code.cpp.ir.dataflow.DataFlow
18+
import semmle.code.cpp.ir.dataflow.DataFlow2
19+
import semmle.code.cpp.ir.dataflow.DataFlow3
20+
import semmle.code.cpp.ir.dataflow.DataFlow4
1721
import semmle.code.cpp.valuenumbering.HashCons
1822

1923
from File f, string tag
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
/**
2+
* Provides a library for local (intra-procedural) and global (inter-procedural)
3+
* data flow analysis: deciding whether data can flow from a _source_ to a
4+
* _sink_. This library differs from the one in `semmle.code.cpp.dataflow` in that
5+
* this library uses the IR (Intermediate Representation) library, which provides
6+
* a more precise semantic representation of the program, whereas the other dataflow
7+
* library uses the more syntax-oriented ASTs. This library should provide more accurate
8+
* results than the AST-based library in most scenarios.
9+
*
10+
* Unless configured otherwise, _flow_ means that the exact value of
11+
* the source may reach the sink. We do not track flow across pointer
12+
* dereferences or array indexing.
13+
*
14+
* To use global (interprocedural) data flow, extend the class
15+
* `DataFlow::Configuration` as documented on that class. To use local
16+
* (intraprocedural) data flow, invoke `DataFlow::localFlow` or
17+
* `DataFlow::LocalFlowStep` with arguments of type `DataFlow::Node`.
18+
*/
19+
import cpp
20+
21+
module DataFlow {
22+
import semmle.code.cpp.ir.dataflow.internal.DataFlowImpl
23+
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/**
2+
* Provides a `DataFlow2` module, which is a copy of the `DataFlow` module. Use
3+
* this class when data-flow configurations must depend on each other. Two
4+
* classes extending `DataFlow::Configuration` should never depend on each
5+
* other, but one of them should instead depend on a
6+
* `DataFlow2::Configuration`, a `DataFlow3::Configuration`, or a
7+
* `DataFlow4::Configuration`.
8+
*
9+
* See `semmle.code.cpp.dataflow.DataFlow` for the full documentation.
10+
*/
11+
import cpp
12+
13+
module DataFlow2 {
14+
import semmle.code.cpp.ir.dataflow.internal.DataFlowImpl2
15+
16+
/**
17+
* This class exists to prevent mutual recursion between the user-overridden
18+
* member predicates of `Configuration` and the rest of the data-flow library.
19+
* Good performance cannot be guaranteed in the presence of such recursion, so
20+
* it should be replaced by using more than one copy of the data flow library.
21+
* Four copies are available: `DataFlow` through `DataFlow4`.
22+
*/
23+
private abstract
24+
class ConfigurationRecursionPrevention extends Configuration {
25+
bindingset[this]
26+
ConfigurationRecursionPrevention() { any() }
27+
28+
override predicate hasFlow(Node source, Node sink) {
29+
strictcount(Node n | this.isSource(n)) < 0
30+
or
31+
strictcount(Node n | this.isSink(n)) < 0
32+
or
33+
strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0
34+
or
35+
super.hasFlow(source, sink)
36+
}
37+
}
38+
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/**
2+
* Provides a `DataFlow3` module, which is a copy of the `DataFlow` module. Use
3+
* this class when data-flow configurations must depend on each other. Two
4+
* classes extending `DataFlow::Configuration` should never depend on each
5+
* other, but one of them should instead depend on a
6+
* `DataFlow2::Configuration`, a `DataFlow3::Configuration`, or a
7+
* `DataFlow4::Configuration`.
8+
*
9+
* See `semmle.code.cpp.dataflow.DataFlow` for the full documentation.
10+
*/
11+
import cpp
12+
13+
module DataFlow3 {
14+
import semmle.code.cpp.ir.dataflow.internal.DataFlowImpl3
15+
16+
/**
17+
* This class exists to prevent mutual recursion between the user-overridden
18+
* member predicates of `Configuration` and the rest of the data-flow library.
19+
* Good performance cannot be guaranteed in the presence of such recursion, so
20+
* it should be replaced by using more than one copy of the data flow library.
21+
* Four copies are available: `DataFlow` through `DataFlow4`.
22+
*/
23+
private abstract
24+
class ConfigurationRecursionPrevention extends Configuration {
25+
bindingset[this]
26+
ConfigurationRecursionPrevention() { any() }
27+
28+
override predicate hasFlow(Node source, Node sink) {
29+
strictcount(Node n | this.isSource(n)) < 0
30+
or
31+
strictcount(Node n | this.isSink(n)) < 0
32+
or
33+
strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0
34+
or
35+
super.hasFlow(source, sink)
36+
}
37+
}
38+
}
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
/**
2+
* Provides a `DataFlow4` module, which is a copy of the `DataFlow` module. Use
3+
* this class when data-flow configurations must depend on each other. Two
4+
* classes extending `DataFlow::Configuration` should never depend on each
5+
* other, but one of them should instead depend on a
6+
* `DataFlow2::Configuration`, a `DataFlow3::Configuration`, or a
7+
* `DataFlow4::Configuration`.
8+
*
9+
* See `semmle.code.cpp.dataflow.DataFlow` for the full documentation.
10+
*/
11+
import cpp
12+
13+
module DataFlow4 {
14+
import semmle.code.cpp.ir.dataflow.internal.DataFlowImpl4
15+
16+
/**
17+
* This class exists to prevent mutual recursion between the user-overridden
18+
* member predicates of `Configuration` and the rest of the data-flow library.
19+
* Good performance cannot be guaranteed in the presence of such recursion, so
20+
* it should be replaced by using more than one copy of the data flow library.
21+
* Four copies are available: `DataFlow` through `DataFlow4`.
22+
*/
23+
private abstract
24+
class ConfigurationRecursionPrevention extends Configuration {
25+
bindingset[this]
26+
ConfigurationRecursionPrevention() { any() }
27+
28+
override predicate hasFlow(Node source, Node sink) {
29+
strictcount(Node n | this.isSource(n)) < 0
30+
or
31+
strictcount(Node n | this.isSink(n)) < 0
32+
or
33+
strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0
34+
or
35+
super.hasFlow(source, sink)
36+
}
37+
}
38+
}
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
private import cpp
2+
private import DataFlowPrivate
3+
4+
Function viableImpl(MethodAccess ma) {
5+
result = ma.getTarget()
6+
}
7+
8+
Function viableCallable(Call call) {
9+
result = call.getTarget()
10+
}
11+
12+
/**
13+
* Holds if the call context `ctx` reduces the set of viable dispatch
14+
* targets of `ma` in `c`.
15+
*/
16+
predicate reducedViableImplInCallContext(MethodAccess ma, Callable c, Call ctx) {
17+
none()
18+
}
19+
20+
/**
21+
* Gets a viable dispatch target of `ma` in the context `ctx`. This is
22+
* restricted to those `ma`s for which a context might make a difference.
23+
*/
24+
private Method viableImplInCallContext(MethodAccess ma, Call ctx) {
25+
// stub implementation
26+
result = viableImpl(ma) and
27+
viableCallable(ctx) = ma.getEnclosingFunction()
28+
}
29+
30+
/**
31+
* Gets a viable dispatch target of `ma` in the context `ctx`. This is
32+
* restricted to those `ma`s for which the context makes a difference.
33+
*/
34+
Method prunedViableImplInCallContext(MethodAccess ma, Call ctx) {
35+
result = viableImplInCallContext(ma, ctx) and
36+
reducedViableImplInCallContext(ma, _, ctx)
37+
}
38+
39+
/**
40+
* Holds if data might flow from `ma` to a return statement in some
41+
* configuration.
42+
*/
43+
private predicate maybeChainedReturn(MethodAccess ma) {
44+
exists(ReturnStmt ret |
45+
exists(ret.getExpr()) and
46+
ret.getEnclosingFunction() = ma.getEnclosingFunction() and
47+
not ma.getParent() instanceof ExprStmt
48+
)
49+
}
50+
51+
/**
52+
* Holds if flow returning from `m` to `ma` might return further and if
53+
* this path restricts the set of call sites that can be returned to.
54+
*/
55+
predicate reducedViableImplInReturn(Method m, MethodAccess ma) {
56+
exists(int tgts, int ctxtgts |
57+
m = viableImpl(ma) and
58+
ctxtgts = count(Call ctx | m = viableImplInCallContext(ma, ctx)) and
59+
tgts = strictcount(Call ctx | viableCallable(ctx) = ma.getEnclosingFunction()) and
60+
ctxtgts < tgts
61+
) and
62+
maybeChainedReturn(ma)
63+
}
64+
65+
/**
66+
* Gets a viable dispatch target of `ma` in the context `ctx`. This is
67+
* restricted to those `ma`s and results for which the return flow from the
68+
* result to `ma` restricts the possible context `ctx`.
69+
*/
70+
Method prunedViableImplInCallContextReverse(MethodAccess ma, Call ctx) {
71+
result = viableImplInCallContext(ma, ctx) and
72+
reducedViableImplInReturn(result, ma)
73+
}

0 commit comments

Comments
 (0)