diff --git a/cpp/ql/lib/change-notes/2025-03-13-ascertaindef.md b/cpp/ql/lib/change-notes/2025-03-13-ascertaindef.md new file mode 100644 index 000000000000..6a55fc7bdd08 --- /dev/null +++ b/cpp/ql/lib/change-notes/2025-03-13-ascertaindef.md @@ -0,0 +1,4 @@ +--- +category: feature +--- +* Added `Node.asUncertainDefinition` and `Node.asCertainDefinition` to the `DataFlow::Node` class for querying whether a definition overwrites the entire destination buffer. \ No newline at end of file diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll index c9e2a7136216..a6018535824a 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -313,13 +313,79 @@ class Node extends TIRDataFlowNode { * `n.asExpr() instanceof IncrementOperation` since the result of evaluating * the expression `x++` is passed to `sink`. */ - Expr asDefinition() { - exists(StoreInstruction store | + Expr asDefinition() { result = this.asDefinition(_) } + + /** + * Gets the definition associated with this node, if any. + * + * For example, consider the following example + * ```cpp + * int x = 42; // 1 + * x = 34; // 2 + * ++x; // 3 + * x++; // 4 + * x += 1; // 5 + * int y = x += 2; // 6 + * ``` + * - For (1) the result is `42`. + * - For (2) the result is `x = 34`. + * - For (3) the result is `++x`. + * - For (4) the result is `x++`. + * - For (5) the result is `x += 1`. + * - For (6) there are two results: + * - For the definition generated by `x += 2` the result is `x += 2` + * - For the definition generated by `int y = ...` the result is + * also `x += 2`. + * + * For assignments, `node.asDefinition(_)` and `node.asExpr()` will both exist + * for the same dataflow node. However, for expression such as `x++` that + * both write to `x` and read the current value of `x`, `node.asDefinition(_)` + * will give the node corresponding to the value after the increment, and + * `node.asExpr()` will give the node corresponding to the value before the + * increment. For an example of this, consider the following: + * + * ```cpp + * sink(x++); + * ``` + * in the above program, there will not be flow from a node `n` such that + * `n.asDefinition(_) instanceof IncrementOperation` to the argument of `sink` + * since the value passed to `sink` is the value before to the increment. + * However, there will be dataflow from a node `n` such that + * `n.asExpr() instanceof IncrementOperation` since the result of evaluating + * the expression `x++` is passed to `sink`. + * + * If `uncertain = false` then the definition is guaranteed to overwrite + * the entire buffer pointed to by the destination address of the definition. + * Otherwise, `uncertain = true`. + * + * For example, the write `int x; x = 42;` is guaranteed to overwrite all the + * bytes allocated to `x`, while the assignment `int p[10]; p[3] = 42;` has + * `uncertain = true` since the write will not overwrite the entire buffer + * pointed to by `p`. + */ + Expr asDefinition(boolean uncertain) { + exists(StoreInstruction store, Ssa::DefinitionExt def | store = this.asInstruction() and - result = asDefinitionImpl(store) + result = asDefinitionImpl(store) and + Ssa::defToNode(this, def, _, _, _, _) and + if def.isCertain() then uncertain = false else uncertain = true ) } + /** + * Gets the definition associated with this node, if this node is a certain definition. + * + * See `Node.asDefinition/1` for a description of certain and uncertain definitions. + */ + Expr asCertainDefinition() { result = this.asDefinition(false) } + + /** + * Gets the definition associated with this node, if this node is an uncertain definition. + * + * See `Node.asDefinition/1` for a description of certain and uncertain definitions. + */ + Expr asUncertainDefinition() { result = this.asDefinition(true) } + /** * Gets the indirect definition at a given indirection corresponding to this * node, if any. diff --git a/cpp/ql/src/Security/CWE/CWE-079/CgiXss.ql b/cpp/ql/src/Security/CWE/CWE-079/CgiXss.ql index 812fe236f764..9b27e95fd658 100644 --- a/cpp/ql/src/Security/CWE/CWE-079/CgiXss.ql +++ b/cpp/ql/src/Security/CWE/CWE-079/CgiXss.ql @@ -37,7 +37,7 @@ module Config implements DataFlow::ConfigSig { predicate isBarrier(DataFlow::Node node) { isSink(node) and node.asExpr().getUnspecifiedType() instanceof ArithmeticType or - node.asInstruction().(StoreInstruction).getResultType() instanceof ArithmeticType + node.asCertainDefinition().getUnspecifiedType() instanceof ArithmeticType } } diff --git a/cpp/ql/src/Security/CWE/CWE-114/UncontrolledProcessOperation.ql b/cpp/ql/src/Security/CWE/CWE-114/UncontrolledProcessOperation.ql index 9672a830ce06..e5fd1a94f181 100644 --- a/cpp/ql/src/Security/CWE/CWE-114/UncontrolledProcessOperation.ql +++ b/cpp/ql/src/Security/CWE/CWE-114/UncontrolledProcessOperation.ql @@ -37,7 +37,7 @@ module Config implements DataFlow::ConfigSig { predicate isBarrier(DataFlow::Node node) { isSink(node) and node.asExpr().getUnspecifiedType() instanceof ArithmeticType or - node.asInstruction().(StoreInstruction).getResultType() instanceof ArithmeticType + node.asCertainDefinition().getUnspecifiedType() instanceof ArithmeticType } } diff --git a/cpp/ql/src/Security/CWE/CWE-134/UncontrolledFormatString.ql b/cpp/ql/src/Security/CWE/CWE-134/UncontrolledFormatString.ql index f4a716765b8a..04b4fe45fcfa 100644 --- a/cpp/ql/src/Security/CWE/CWE-134/UncontrolledFormatString.ql +++ b/cpp/ql/src/Security/CWE/CWE-134/UncontrolledFormatString.ql @@ -42,7 +42,7 @@ module Config implements DataFlow::ConfigSig { predicate isBarrier(DataFlow::Node node) { isSink(node) and isArithmeticNonCharType(node.asExpr().getUnspecifiedType()) or - isArithmeticNonCharType(node.asInstruction().(StoreInstruction).getResultType()) + isArithmeticNonCharType(node.asCertainDefinition().getUnspecifiedType()) } } diff --git a/cpp/ql/src/Security/CWE/CWE-170/ImproperNullTerminationTainted.ql b/cpp/ql/src/Security/CWE/CWE-170/ImproperNullTerminationTainted.ql index e1e459c259f1..9b595657fce9 100644 --- a/cpp/ql/src/Security/CWE/CWE-170/ImproperNullTerminationTainted.ql +++ b/cpp/ql/src/Security/CWE/CWE-170/ImproperNullTerminationTainted.ql @@ -37,7 +37,7 @@ private module Config implements DataFlow::ConfigSig { predicate isBarrier(DataFlow::Node node) { isSink(node) and node.asExpr().getUnspecifiedType() instanceof ArithmeticType or - node.asInstruction().(StoreInstruction).getResultType() instanceof ArithmeticType + node.asCertainDefinition().getUnspecifiedType() instanceof ArithmeticType or mayAddNullTerminator(_, node.asIndirectExpr()) } diff --git a/cpp/ql/src/Security/CWE/CWE-190/ArithmeticTainted.ql b/cpp/ql/src/Security/CWE/CWE-190/ArithmeticTainted.ql index 810039f4e1a4..c9efaf9f6952 100644 --- a/cpp/ql/src/Security/CWE/CWE-190/ArithmeticTainted.ql +++ b/cpp/ql/src/Security/CWE/CWE-190/ArithmeticTainted.ql @@ -75,9 +75,11 @@ module Config implements DataFlow::ConfigSig { predicate isSink(DataFlow::Node sink) { isSink(sink, _, _) } predicate isBarrier(DataFlow::Node node) { - exists(StoreInstruction store | store = node.asInstruction() | + exists(StoreInstruction store, Expr e | + store = node.asInstruction() and e = node.asCertainDefinition() + | // Block flow to "likely small expressions" - bounded(store.getSourceValue().getUnconvertedResultExpression()) + bounded(e) or // Block flow to "small types" store.getResultType().getUnspecifiedType().(IntegralType).getSize() <= 1