Skip to content

Commit 6d46e4d

Browse files
committed
C++: Wire up models to DefaultTaintTracking
This adds support for arg-to-arg and arg-to-return taint.
1 parent fa00e96 commit 6d46e4d

File tree

3 files changed

+68
-14
lines changed

3 files changed

+68
-14
lines changed

cpp/ql/src/semmle/code/cpp/ir/dataflow/DefaultTaintTracking.qll

Lines changed: 60 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ private import semmle.code.cpp.ir.dataflow.DataFlow
44
private import semmle.code.cpp.ir.dataflow.DataFlow2
55
private import semmle.code.cpp.ir.IR
66
private import semmle.code.cpp.ir.dataflow.internal.DataFlowDispatch as Dispatch
7+
private import semmle.code.cpp.models.interfaces.Taint
8+
private import semmle.code.cpp.models.interfaces.DataFlow
79

810
/**
911
* A predictable instruction is one where an external user can predict
@@ -159,18 +161,64 @@ private predicate instructionTaintStep(Instruction i1, Instruction i2) {
159161
// This is part of the translation of `a[i]`, where we want taint to flow
160162
// from `a`.
161163
i2.(PointerAddInstruction).getLeft() = i1
162-
// TODO: robust Chi handling
163-
//
164-
// TODO: Flow from argument to return of known functions: Port missing parts
165-
// of `returnArgument` to the `interfaces.Taint` and `interfaces.DataFlow`
166-
// libraries.
167-
//
168-
// TODO: Flow from input argument to output argument of known functions: Port
169-
// missing parts of `copyValueBetweenArguments` to the `interfaces.Taint` and
170-
// `interfaces.DataFlow` libraries and implement call side-effect nodes. This
171-
// will help with the test for `ExecTainted.ql`. The test for
172-
// `TaintedPath.ql` is more tricky because the output arg is a pointer
173-
// addition expression.
164+
or
165+
// Flow from argument to return value
166+
i2 = any(CallInstruction call |
167+
exists(int indexIn |
168+
modelTaintToReturnValue(call.getStaticCallTarget(), indexIn) and
169+
i1 = call.getPositionalArgument(indexIn)
170+
)
171+
)
172+
or
173+
// Flow from input argument to output argument
174+
// TODO: This won't work in practice as long as all aliased memory is tracked
175+
// together in a single virtual variable.
176+
// TODO: Will this work on the test for `TaintedPath.ql`, where the output arg
177+
// is a pointer addition expression?
178+
i2 = any(WriteSideEffectInstruction outNode |
179+
exists(CallInstruction call, int indexIn, int indexOut |
180+
modelTaintToParameter(call.getStaticCallTarget(), indexIn, indexOut) and
181+
i1 = call.getPositionalArgument(indexIn) and
182+
outNode.getIndex() = indexOut and
183+
outNode.getPrimaryInstruction() = call
184+
)
185+
)
186+
}
187+
188+
private predicate modelTaintToParameter(Function f, int parameterIn, int parameterOut) {
189+
exists( FunctionInput modelIn, FunctionOutput modelOut |
190+
f.(TaintFunction).hasTaintFlow(modelIn, modelOut) and
191+
(modelIn.isParameter(parameterIn) or modelIn.isParameterDeref(parameterIn)) and
192+
modelOut.isParameterDeref(parameterOut)
193+
)
194+
}
195+
196+
private predicate modelTaintToReturnValue(Function f, int parameterIn) {
197+
// Taint flow from parameter to return value
198+
exists(FunctionInput modelIn, FunctionOutput modelOut |
199+
f.(TaintFunction).hasTaintFlow(modelIn, modelOut) and
200+
(modelIn.isParameter(parameterIn) or modelIn.isParameterDeref(parameterIn)) and
201+
(modelOut.isReturnValue() or modelOut.isReturnValueDeref())
202+
)
203+
or
204+
// Data flow (not taint flow) to where the return value points. For the time
205+
// being we will conflate pointers and objects in taint tracking.
206+
exists(FunctionInput modelIn, FunctionOutput modelOut |
207+
f.(DataFlowFunction).hasDataFlow(modelIn, modelOut) and
208+
(modelIn.isParameter(parameterIn) or modelIn.isParameterDeref(parameterIn)) and
209+
modelOut.isReturnValueDeref()
210+
)
211+
or
212+
// Taint flow from one argument to another and data flow from an argument to a
213+
// return value. This happens in functions like `strcat` and `memcpy`. We
214+
// could model this flow in two separate steps, but that would add reverse
215+
// flow from the write side-effect to the call instruction, which may not be
216+
// desirable.
217+
exists(int parameterMid, InParameter modelMid, OutReturnValue returnOut |
218+
modelTaintToParameter(f, parameterIn, parameterMid) and
219+
modelMid.isParameter(parameterMid) and
220+
f.(DataFlowFunction).hasDataFlow(modelMid, returnOut)
221+
)
174222
}
175223

176224
private Element adjustedSink(DataFlow::Node sink) {

cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/defaulttainttracking.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,13 @@ int main(int argc, char *argv[]) {
1313
int taintedInt = atoi(getenv("VAR"));
1414
taintedInt++; // BUG: `taintedInt` isn't marked as tainted. Only `++` is.
1515

16-
sink(_strdup(getenv("VAR"))); // BUG: no taint
16+
sink(_strdup(getenv("VAR")));
1717
sink(strdup(getenv("VAR")));
1818
sink(unmodeled_function(getenv("VAR")));
1919

2020
char untainted_buf[100] = "";
2121
char buf[100] = "VAR = ";
22-
sink(strcat(buf, getenv("VAR"))); // BUG: no taint
22+
sink(strcat(buf, getenv("VAR")));
2323

2424
sink(buf); // BUG: no taint
2525
sink(untainted_buf); // the two buffers would be conflated if we added flow through partial chi inputs

cpp/ql/test/library-tests/dataflow/DefaultTaintTracking/tainted.expected

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
| defaulttainttracking.cpp:13:25:13:30 | call to getenv | defaulttainttracking.cpp:13:25:13:37 | (const char *)... |
66
| defaulttainttracking.cpp:13:25:13:30 | call to getenv | defaulttainttracking.cpp:14:3:14:14 | ... ++ |
77
| defaulttainttracking.cpp:16:16:16:21 | call to getenv | defaulttainttracking.cpp:6:15:6:24 | p#0 |
8+
| defaulttainttracking.cpp:16:16:16:21 | call to getenv | defaulttainttracking.cpp:9:11:9:20 | p#0 |
9+
| defaulttainttracking.cpp:16:16:16:21 | call to getenv | defaulttainttracking.cpp:16:8:16:14 | call to _strdup |
10+
| defaulttainttracking.cpp:16:16:16:21 | call to getenv | defaulttainttracking.cpp:16:8:16:29 | (const char *)... |
811
| defaulttainttracking.cpp:16:16:16:21 | call to getenv | defaulttainttracking.cpp:16:16:16:21 | call to getenv |
912
| defaulttainttracking.cpp:16:16:16:21 | call to getenv | defaulttainttracking.cpp:16:16:16:28 | (const char *)... |
1013
| defaulttainttracking.cpp:17:15:17:20 | call to getenv | defaulttainttracking.cpp:5:14:5:23 | p#0 |
@@ -17,5 +20,8 @@
1720
| defaulttainttracking.cpp:18:27:18:32 | call to getenv | defaulttainttracking.cpp:18:27:18:32 | call to getenv |
1821
| defaulttainttracking.cpp:18:27:18:32 | call to getenv | defaulttainttracking.cpp:18:27:18:39 | (const char *)... |
1922
| defaulttainttracking.cpp:22:20:22:25 | call to getenv | defaulttainttracking.cpp:3:38:3:39 | s2 |
23+
| defaulttainttracking.cpp:22:20:22:25 | call to getenv | defaulttainttracking.cpp:9:11:9:20 | p#0 |
24+
| defaulttainttracking.cpp:22:20:22:25 | call to getenv | defaulttainttracking.cpp:22:8:22:13 | call to strcat |
25+
| defaulttainttracking.cpp:22:20:22:25 | call to getenv | defaulttainttracking.cpp:22:8:22:33 | (const char *)... |
2026
| defaulttainttracking.cpp:22:20:22:25 | call to getenv | defaulttainttracking.cpp:22:20:22:25 | call to getenv |
2127
| defaulttainttracking.cpp:22:20:22:25 | call to getenv | defaulttainttracking.cpp:22:20:22:32 | (const char *)... |

0 commit comments

Comments
 (0)