From c1b91db37ab39eebe91fbb3da9e6af86f26fba48 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Tue, 19 Aug 2025 12:32:09 +0200 Subject: [PATCH 01/12] C++: Add more virtual dispatch tests. --- .../library-tests/dataflow/dispatch/test.cpp | 127 ++++++++++++++++++ .../dataflow/dispatch/test.expected | 0 .../library-tests/dataflow/dispatch/test.ql | 22 +++ 3 files changed, 149 insertions(+) create mode 100644 cpp/ql/test/library-tests/dataflow/dispatch/test.cpp create mode 100644 cpp/ql/test/library-tests/dataflow/dispatch/test.expected create mode 100644 cpp/ql/test/library-tests/dataflow/dispatch/test.ql diff --git a/cpp/ql/test/library-tests/dataflow/dispatch/test.cpp b/cpp/ql/test/library-tests/dataflow/dispatch/test.cpp new file mode 100644 index 000000000000..b89ba51f1c64 --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/dispatch/test.cpp @@ -0,0 +1,127 @@ +struct Base { + void f(); + virtual void virtual_f(); +}; + +struct Derived : Base { + void f(); + void virtual_f(); +}; + +void test_simple() { + Base b; + b.f(); // $ target=2 + b.virtual_f(); // $ target=3 + + Derived d; + d.f(); // $ target=7 + d.virtual_f(); // $ target=8 + + Base* b_ptr = &d; + b_ptr->f(); // $ target=2 + b_ptr->virtual_f(); // $ target=8 SPURIOUS: target=3 + + Base& b_ref = d; + b_ref.f(); // $ target=2 + b_ref.virtual_f(); // $ target=8 SPURIOUS: target=3 + + Base* b_null = nullptr; + b_null->f(); // $ target=2 + b_null->virtual_f(); // $ target=3 + + Base* base_is_derived = new Derived(); + base_is_derived->f(); // $ target=2 + base_is_derived->virtual_f(); // $ target=8 SPURIOUS: target=3 + + Base* base_is_base = new Base(); + base_is_base->f(); // $ target=2 + base_is_base->virtual_f(); // $ target=3 + + Derived* derived_is_derived = new Derived(); + derived_is_derived->f(); // $ target=7 + derived_is_derived->virtual_f(); // $ target=8 + + Base& b_ref2 = b; + b_ref2 = d; + b_ref2.f(); // $ target=2 + b_ref2.virtual_f(); // $ target=3 +} + +struct S { + Base* b1; + Base* b2; +}; + +void test_fields() { + S s; + + s.b1 = new Base(); + s.b2 = new Derived(); + + s.b1->virtual_f(); // $ target=3 + s.b2->virtual_f(); // $ SPURIOUS: target=3 MISSING: target=8 + + s.b1 = new Derived(); + s.b2 = new Base(); + s.b1->virtual_f(); // $ MISSING: target=8 SPURIOUS: target=3 // type-tracking has no 'clearsContent' feature and C/C++ doesn't have field-based SSA + s.b2->virtual_f(); // $ target=3 // type-tracking has no 'clearsContent' feature and C/C++ doesn't have field-based SSA +} + +Base* getDerived() { + return new Derived(); +} + +void test_getDerived() { + Base* b = getDerived(); + b->virtual_f(); // $ target=8 SPURIOUS: target=3 + + Derived d = *(Derived*)getDerived(); + d.virtual_f(); // $ target=8 +} + +void write_to_arg(Base* b) { + *b = Derived(); +} + +void write_to_arg_2(Base** b) { + Derived* d = new Derived(); + *b = d; +} + +void test_write_to_arg() { + { + Base b; + write_to_arg(&b); + b.virtual_f(); // $ SPURIOUS: target=3 MISSING: target=8 // missing flow through the copy-constructor in write_to_arg + } + + { + Base* b; + write_to_arg_2(&b); + b->virtual_f(); // $ target=8 SPURIOUS: target=3 + } +} + +Base* global_derived; + +void set_global_to_derived() { + global_derived = new Derived(); +} + +void read_global() { + global_derived->virtual_f(); // $ target=8 SPURIOUS: target=3 +} + +Base* global_base_or_derived; + +void set_global_base_or_derived_1() { + global_base_or_derived = new Base(); +} + +void set_global_base_or_derived_2() { + global_base_or_derived = new Derived(); +} + +void read_global_base_or_derived() { + global_base_or_derived->virtual_f(); // $ target=3 target=8 +} \ No newline at end of file diff --git a/cpp/ql/test/library-tests/dataflow/dispatch/test.expected b/cpp/ql/test/library-tests/dataflow/dispatch/test.expected new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/cpp/ql/test/library-tests/dataflow/dispatch/test.ql b/cpp/ql/test/library-tests/dataflow/dispatch/test.ql new file mode 100644 index 000000000000..de16d6da1ef1 --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/dispatch/test.ql @@ -0,0 +1,22 @@ +import cpp +import utils.test.InlineExpectationsTest +import semmle.code.cpp.ir.dataflow.internal.DataFlowDispatch +import semmle.code.cpp.ir.dataflow.internal.DataFlowPrivate + +module ResolveDispatchTest implements TestSig { + string getARelevantTag() { result = "target" } + + predicate hasActualResult(Location location, string element, string tag, string value) { + exists(DataFlowCall call, SourceCallable callable, MemberFunction mf | + mf = callable.asSourceCallable() and + not mf.isCompilerGenerated() and + callable = viableCallable(call) and + location = call.getLocation() and + element = call.toString() and + tag = "target" and + value = callable.getLocation().getStartLine().toString() + ) + } +} + +import MakeTest From 42fcfca8499d698eeb28bb694d9197adb2e331fa Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Tue, 19 Aug 2025 12:39:26 +0200 Subject: [PATCH 02/12] C++: Remove the old virtual dispatch case from 'defaultViableCallable' and slightly reorganize the code in preparation for the next commit. --- .../ir/dataflow/internal/DataFlowDispatch.qll | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll index b5e899bf0aac..e762a82d04f2 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll @@ -11,7 +11,15 @@ private import DataFlowImplCommon as DataFlowImplCommon * from `AdditionalCallTarget` into account. */ cached -DataFlowCallable defaultViableCallable(DataFlowCall call) { +DataFlowPrivate::DataFlowCallable defaultViableCallable(DataFlowPrivate::DataFlowCall call) { + result = defaultViableCallableWithoutLambda(call) + or + result = DataFlowImplCommon::viableCallableLambda(call, _) +} + +private DataFlowPrivate::DataFlowCallable defaultViableCallableWithoutLambda( + DataFlowPrivate::DataFlowCall call +) { DataFlowImplCommon::forceCachingInSameStage() and result = call.getStaticCallTarget() or @@ -26,17 +34,13 @@ DataFlowCallable defaultViableCallable(DataFlowCall call) { functionSignatureWithBody(qualifiedName, nparams, result.getUnderlyingCallable()) and strictcount(Function other | functionSignatureWithBody(qualifiedName, nparams, other)) = 1 ) - or - // Virtual dispatch - result.asSourceCallable() = call.(VirtualDispatch::DataSensitiveCall).resolve() } /** * Gets a function that might be called by `call`. */ -cached -DataFlowCallable viableCallable(DataFlowCall call) { - result = defaultViableCallable(call) +private DataFlowPrivate::DataFlowCallable nonVirtualDispatch(DataFlowPrivate::DataFlowCall call) { + result = defaultViableCallableWithoutLambda(call) or // Additional call targets result.getUnderlyingCallable() = From fdb9f7ba2afd971122e17770a6e60bc4632d2905 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Tue, 19 Aug 2025 12:42:15 +0200 Subject: [PATCH 03/12] C++: Move these predicates to make the diff smaller. --- .../ir/dataflow/internal/DataFlowDispatch.qll | 66 +++++++++---------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll index e762a82d04f2..477875fffde5 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll @@ -4,6 +4,39 @@ private import DataFlowPrivate private import DataFlowUtil private import DataFlowImplCommon as DataFlowImplCommon +/** + * Holds if `f` has name `qualifiedName` and `nparams` parameter count. This is + * an approximation of its signature for the purpose of matching functions that + * might be the same across link targets. + */ +private predicate functionSignature(Function f, string qualifiedName, int nparams) { + qualifiedName = f.getQualifiedName() and + nparams = f.getNumberOfParameters() and + not f.isStatic() +} + +/** + * Holds if `f` is a function with a body that has name `qualifiedName` and + * `nparams` parameter count. See `functionSignature`. + */ +private predicate functionSignatureWithBody(string qualifiedName, int nparams, Function f) { + functionSignature(f, qualifiedName, nparams) and + exists(f.getBlock()) +} + +/** + * Holds if the target of `call` is a function _with no definition_ that has + * name `qualifiedName` and `nparams` parameter count. See `functionSignature`. + */ +pragma[noinline] +private predicate callSignatureWithoutBody(string qualifiedName, int nparams, CallInstruction call) { + exists(Function target | + target = call.getStaticCallTarget() and + not exists(target.getBlock()) and + functionSignature(target, qualifiedName, nparams) + ) +} + /** * Gets a function that might be called by `call`. * @@ -219,39 +252,6 @@ private module VirtualDispatch { } } -/** - * Holds if `f` is a function with a body that has name `qualifiedName` and - * `nparams` parameter count. See `functionSignature`. - */ -private predicate functionSignatureWithBody(string qualifiedName, int nparams, Function f) { - functionSignature(f, qualifiedName, nparams) and - exists(f.getBlock()) -} - -/** - * Holds if the target of `call` is a function _with no definition_ that has - * name `qualifiedName` and `nparams` parameter count. See `functionSignature`. - */ -pragma[noinline] -private predicate callSignatureWithoutBody(string qualifiedName, int nparams, CallInstruction call) { - exists(Function target | - target = call.getStaticCallTarget() and - not exists(target.getBlock()) and - functionSignature(target, qualifiedName, nparams) - ) -} - -/** - * Holds if `f` has name `qualifiedName` and `nparams` parameter count. This is - * an approximation of its signature for the purpose of matching functions that - * might be the same across link targets. - */ -private predicate functionSignature(Function f, string qualifiedName, int nparams) { - qualifiedName = f.getQualifiedName() and - nparams = f.getNumberOfParameters() and - not f.isStatic() -} - /** * Holds if the set of viable implementations that can be called by `call` * might be improved by knowing the call context. From caf7464f3be67df6b001b715ac82b2c0da1d47d7 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Tue, 19 Aug 2025 12:45:37 +0200 Subject: [PATCH 04/12] C++: Prefix with 'DataflowPrivate'. --- .../ir/dataflow/internal/DataFlowDispatch.qll | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll index 477875fffde5..a4d29cdbcb93 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll @@ -1,6 +1,6 @@ private import cpp private import semmle.code.cpp.ir.IR -private import DataFlowPrivate +private import DataFlowPrivate as DataFlowPrivate private import DataFlowUtil private import DataFlowImplCommon as DataFlowImplCommon @@ -256,14 +256,16 @@ private module VirtualDispatch { * Holds if the set of viable implementations that can be called by `call` * might be improved by knowing the call context. */ -predicate mayBenefitFromCallContext(DataFlowCall call) { mayBenefitFromCallContext(call, _, _) } +predicate mayBenefitFromCallContext(DataFlowPrivate::DataFlowCall call) { + mayBenefitFromCallContext(call, _, _) +} /** * Holds if `call` is a call through a function pointer, and the pointer * value is given as the `arg`'th argument to `f`. */ private predicate mayBenefitFromCallContext( - VirtualDispatch::DataSensitiveCall call, DataFlowCallable f, int arg + DataFlowPrivate::DataFlowCall call, DataFlowPrivate::DataFlowCallable f, int arg ) { f = pragma[only_bind_out](call).getEnclosingCallable() and exists(InitializeParameterInstruction init | @@ -278,9 +280,11 @@ private predicate mayBenefitFromCallContext( * Gets a viable dispatch target of `call` in the context `ctx`. This is * restricted to those `call`s for which a context might make a difference. */ -DataFlowCallable viableImplInCallContext(DataFlowCall call, DataFlowCall ctx) { +DataFlowPrivate::DataFlowCallable viableImplInCallContext( + DataFlowPrivate::DataFlowCall call, DataFlowPrivate::DataFlowCall ctx +) { result = viableCallable(call) and - exists(int i, DataFlowCallable f | + exists(int i, DataFlowPrivate::DataFlowCallable f | mayBenefitFromCallContext(pragma[only_bind_into](call), f, i) and f = ctx.getStaticCallTarget() and result.asSourceCallable() = @@ -290,4 +294,8 @@ DataFlowCallable viableImplInCallContext(DataFlowCall call, DataFlowCall ctx) { /** Holds if arguments at position `apos` match parameters at position `ppos`. */ pragma[inline] -predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) { ppos = apos } +predicate parameterMatch( + DataFlowPrivate::ParameterPosition ppos, DataFlowPrivate::ArgumentPosition apos +) { + ppos = apos +} From d4188d59a8d0beabb9664e67054e11d2dca03951 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Tue, 19 Aug 2025 12:48:07 +0200 Subject: [PATCH 05/12] C++: Instantiate the type tracking module inside a reusable module like it's done in Java. --- .../ir/dataflow/internal/DataFlowDispatch.qll | 319 ++++++++++-------- 1 file changed, 174 insertions(+), 145 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll index a4d29cdbcb93..c9e1ac8cfee4 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll @@ -1,8 +1,11 @@ private import cpp private import semmle.code.cpp.ir.IR +private import semmle.code.cpp.ir.dataflow.DataFlow private import DataFlowPrivate as DataFlowPrivate private import DataFlowUtil private import DataFlowImplCommon as DataFlowImplCommon +private import codeql.typetracking.TypeTracking +private import SsaImpl as SsaImpl /** * Holds if `f` has name `qualifiedName` and `nparams` parameter count. This is @@ -81,174 +84,200 @@ private DataFlowPrivate::DataFlowCallable nonVirtualDispatch(DataFlowPrivate::Da .viableTarget(call.asCallInstruction().getUnconvertedResultExpression()) } +private class RelevantNode extends Node { + RelevantNode() { this.getType().stripType() instanceof Class } +} + +private signature DataFlowPrivate::DataFlowCallable methodDispatchSig( + DataFlowPrivate::DataFlowCall c +); + +private predicate ignoreConstructor(Expr e) { + e instanceof ConstructorDirectInit or + e instanceof ConstructorVirtualInit or + e instanceof ConstructorDelegationInit or + exists(ConstructorFieldInit init | init.getExpr() = e) +} + /** - * Provides virtual dispatch support compatible with the original - * implementation of `semmle.code.cpp.security.TaintTracking`. + * Holds if `n` is either: + * - the post-update node of a qualifier after a call to a constructor which + * constructs an object containing at least one virtual function. + * - a node which represents a derived-to-base instruction that converts from `c`. */ -private module VirtualDispatch { - /** A call that may dispatch differently depending on the qualifier value. */ - abstract class DataSensitiveCall extends DataFlowCall { - /** - * Gets the node whose value determines the target of this call. This node - * could be the qualifier of a virtual dispatch or the function-pointer - * expression in a call to a function pointer. What they have in common is - * that we need to find out which data flows there, and then it's up to the - * `resolve` predicate to stitch that information together and resolve the - * call. - */ - abstract Node getDispatchValue(); - - /** Gets a candidate target for this call. */ - abstract Function resolve(); - - /** - * Whether `src` can flow to this call. - * - * Searches backwards from `getDispatchValue()` to `src`. The `allowFromArg` - * parameter is true when the search is allowed to continue backwards into - * a parameter; non-recursive callers should pass `_` for `allowFromArg`. - */ - predicate flowsFrom(Node src, boolean allowFromArg) { - src = this.getDispatchValue() and allowFromArg = true - or - exists(Node other, boolean allowOtherFromArg | this.flowsFrom(other, allowOtherFromArg) | - // Call argument - exists(DataFlowCall call, Position i | - other.(ParameterNode).isParameterOf(pragma[only_bind_into](call).getStaticCallTarget(), i) and - src.(ArgumentNode).argumentOf(call, pragma[only_bind_into](pragma[only_bind_out](i))) - ) and - allowOtherFromArg = true and - allowFromArg = true +private predicate lambdaSourceImpl(RelevantNode n, Class c) { + // Object construction + exists(CallInstruction call, ThisArgumentOperand qualifier, Call e | + qualifier = call.getThisArgumentOperand() and + n.(PostUpdateNode).getPreUpdateNode().asOperand() = qualifier and + call.getStaticCallTarget() instanceof Constructor and + qualifier.getType().stripType() = c and + c.getABaseClass*().getAMemberFunction().isVirtual() and + e = call.getUnconvertedResultExpression() and + not ignoreConstructor(e) + | + exists(c.getABaseClass()) + or + exists(c.getADerivedClass()) + ) + or + // Conversion to a base class + exists(ConvertToBaseInstruction convert | + // Only keep the most specific cast + not convert.getUnary() instanceof ConvertToBaseInstruction and + n.asInstruction() = convert and + convert.getDerivedClass() = c and + c.getABaseClass*().getAMemberFunction().isVirtual() + ) +} + +private module TrackVirtualDispatch { + /** + * Gets a possible runtime target of `c` using both static call-target + * information, and call-target resolution from `lambdaDispatch0`. + */ + private DataFlowPrivate::DataFlowCallable dispatch(DataFlowPrivate::DataFlowCall c) { + result = nonVirtualDispatch(c) or + result = lambdaDispatch0(c) + } + + private module TtInput implements TypeTrackingInput { + final class Node = RelevantNode; + + class LocalSourceNode extends Node { + LocalSourceNode() { + this instanceof ParameterNode or - // Call return - exists(DataFlowCall call, ReturnKind returnKind | - other = getAnOutNode(call, returnKind) and - returnNodeWithKindAndEnclosingCallable(src, returnKind, call.getStaticCallTarget()) - ) and - allowFromArg = false + this instanceof DataFlowPrivate::OutNode or - // Local flow - localFlowStep(src, other) and - allowFromArg = allowOtherFromArg + DataFlowPrivate::readStep(_, _, this) or - // Flow from global variable to load. - exists(LoadInstruction load, GlobalOrNamespaceVariable var | - var = src.asVariable() and - other.asInstruction() = load and - addressOfGlobal(load.getSourceAddress(), var) and - // The `allowFromArg` concept doesn't play a role when `src` is a - // global variable, so we just set it to a single arbitrary value for - // performance. - allowFromArg = true - ) + DataFlowPrivate::storeStep(_, _, this) + or + DataFlowPrivate::jumpStep(_, this) or - // Flow from store to global variable. - exists(StoreInstruction store, GlobalOrNamespaceVariable var | - var = other.asVariable() and - store = src.asInstruction() and - storeIntoGlobal(store, var) and - // Setting `allowFromArg` to `true` like in the base case means we - // treat a store to a global variable like the dispatch itself: flow - // may come from anywhere. - allowFromArg = true + lambdaSourceImpl(this, _) + } + } + + final private class ContentSetFinal = ContentSet; + + class Content extends ContentSetFinal { + Content() { + exists(DataFlow::Content c | + this.isSingleton(c) and + c.getIndirectionIndex() = 1 ) - ) + } } - } - pragma[noinline] - private predicate storeIntoGlobal(StoreInstruction store, GlobalOrNamespaceVariable var) { - addressOfGlobal(store.getDestinationAddress(), var) - } + class ContentFilter extends Content { + Content getAMatchingContent() { result = this } + } - /** Holds if `addressInstr` is an instruction that produces the address of `var`. */ - private predicate addressOfGlobal(Instruction addressInstr, GlobalOrNamespaceVariable var) { - // Access directly to the global variable - addressInstr.(VariableAddressInstruction).getAstVariable() = var - or - // Access to a field on a global union - exists(FieldAddressInstruction fa | - fa = addressInstr and - fa.getObjectAddress().(VariableAddressInstruction).getAstVariable() = var and - fa.getField().getDeclaringType() instanceof Union - ) - } + predicate compatibleContents(Content storeContents, Content loadContents) { + storeContents = loadContents + } - /** - * A ReturnNode with its ReturnKind and its enclosing callable. - * - * Used to fix a join ordering issue in flowsFrom. - */ - pragma[noinline] - private predicate returnNodeWithKindAndEnclosingCallable( - ReturnNode node, ReturnKind kind, DataFlowCallable callable - ) { - node.getKind() = kind and - node.getFunction() = callable.getUnderlyingCallable() - } + predicate simpleLocalSmallStep(Node nodeFrom, Node nodeTo) { + nodeFrom.getFunction() instanceof Function and + simpleLocalFlowStep(nodeFrom, nodeTo, _) + } - /** Call through a function pointer. */ - private class DataSensitiveExprCall extends DataSensitiveCall { - DataSensitiveExprCall() { not exists(this.getStaticCallTarget()) } + predicate levelStepNoCall(Node n1, LocalSourceNode n2) { none() } - override Node getDispatchValue() { result.asOperand() = this.getCallTargetOperand() } + predicate levelStepCall(Node n1, LocalSourceNode n2) { none() } - override Function resolve() { - exists(FunctionInstruction fi | - this.flowsFrom(instructionNode(fi), _) and - result = fi.getFunctionSymbol() - ) and - ( - this.getNumberOfArguments() <= result.getEffectiveNumberOfParameters() and - this.getNumberOfArguments() >= result.getEffectiveNumberOfParameters() - or - result.isVarargs() - ) - } - } + predicate storeStep(Node n1, Node n2, Content f) { DataFlowPrivate::storeStep(n1, f, n2) } - /** Call to a virtual function. */ - private class DataSensitiveOverriddenFunctionCall extends DataSensitiveCall { - DataSensitiveOverriddenFunctionCall() { - exists( - this.getStaticCallTarget() - .getUnderlyingCallable() - .(VirtualFunction) - .getAnOverridingFunction() + predicate callStep(Node n1, LocalSourceNode n2) { + exists(DataFlowPrivate::DataFlowCall call, DataFlowPrivate::Position pos | + n1.(DataFlowPrivate::ArgumentNode).argumentOf(call, pos) and + n2.(ParameterNode).isParameterOf(dispatch(call), pos) ) } - override Node getDispatchValue() { result.asInstruction() = this.getArgument(-1) } - - override MemberFunction resolve() { - exists(Class overridingClass | - this.overrideMayAffectCall(overridingClass, result) and - this.hasFlowFromCastFrom(overridingClass) + predicate returnStep(Node n1, LocalSourceNode n2) { + exists(DataFlowPrivate::DataFlowCallable callable, DataFlowPrivate::DataFlowCall call | + n1.(DataFlowPrivate::ReturnNode).getEnclosingCallable() = callable and + callable = dispatch(call) and + n2 = DataFlowPrivate::getAnOutNode(call, n1.(DataFlowPrivate::ReturnNode).getKind()) ) } - /** - * Holds if `this` is a virtual function call whose static target is - * overridden by `overridingFunction` in `overridingClass`. - */ - pragma[noinline] - private predicate overrideMayAffectCall(Class overridingClass, MemberFunction overridingFunction) { - overridingFunction.getAnOverriddenFunction+() = - this.getStaticCallTarget().getUnderlyingCallable().(VirtualFunction) and - overridingFunction.getDeclaringType() = overridingClass + predicate loadStep(Node n1, LocalSourceNode n2, Content f) { + DataFlowPrivate::readStep(n1, f, n2) } - /** - * Holds if the qualifier of `this` has flow from an upcast from - * `derivedClass`. - */ - pragma[noinline] - private predicate hasFlowFromCastFrom(Class derivedClass) { - exists(ConvertToBaseInstruction toBase | - this.flowsFrom(instructionNode(toBase), _) and - derivedClass = toBase.getDerivedClass() - ) - } + predicate loadStoreStep(Node nodeFrom, Node nodeTo, Content f1, Content f2) { none() } + + predicate withContentStep(Node nodeFrom, LocalSourceNode nodeTo, ContentFilter f) { none() } + + predicate withoutContentStep(Node nodeFrom, LocalSourceNode nodeTo, ContentFilter f) { none() } + + predicate jumpStep(Node n1, LocalSourceNode n2) { DataFlowPrivate::jumpStep(n1, n2) } + + predicate hasFeatureBacktrackStoreTarget() { none() } + } + + private predicate lambdaSource(RelevantNode n) { lambdaSourceImpl(n, _) } + + /** + * Holds if `n` is the qualifier of `call` which targets the virtual member + * function `mf`. + */ + private predicate lambdaSinkImpl(RelevantNode n, CallInstruction call, MemberFunction mf) { + n.asOperand() = call.getThisArgumentOperand() and + call.getStaticCallTarget() = mf and + mf.isVirtual() + } + + private predicate lambdaSink(RelevantNode n) { lambdaSinkImpl(n, _, _) } + + private import TypeTracking::TypeTrack::Graph + + private predicate edgePlus(PathNode n1, PathNode n2) = fastTC(edges/2)(n1, n2) + + /** + * Gets the most specific implementation of `mf` that may be called when the + * qualifier has runtime type `c`. + */ + private MemberFunction mostSpecific(MemberFunction mf, Class c) { + lambdaSinkImpl(_, _, mf) and + mf.getAnOverridingFunction*() = result and + ( + result.getDeclaringType() = c + or + not c.getAMemberFunction().getAnOverriddenFunction*() = mf and + result = mostSpecific(mf, c.getABaseClass()) + ) + } + + /** + * Gets a possible pair of end-points `(p1, p2)` where: + * - `p1` is a derived-to-base conversion that converts from some + * class `derived`, and + * - `p2` is the qualifier of a call to a virtual function that may + * target `callable`, and + * - `callable` is the most specific implementation that may be called when + * the qualifier has type `derived`. + */ + private predicate pairCand( + PathNode p1, PathNode p2, DataFlowPrivate::DataFlowCallable callable, + DataFlowPrivate::DataFlowCall call + ) { + exists(Class derived, MemberFunction mf | + lambdaSourceImpl(p1.getNode(), derived) and + lambdaSinkImpl(p2.getNode(), call.asCallInstruction(), mf) and + p1.isSource() and + p2.isSink() and + callable.asSourceCallable() = mostSpecific(mf, derived) + ) + } + + /** Gets a possible run-time target of `call`. */ + DataFlowPrivate::DataFlowCallable lambdaDispatch(DataFlowPrivate::DataFlowCall call) { + exists(PathNode p1, PathNode p2 | p1 = p2 or edgePlus(p1, p2) | pairCand(p1, p2, result, call)) } } From 383799ce67ec9af67d6e1fd155f3440bf2afe55c Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Tue, 19 Aug 2025 12:48:31 +0200 Subject: [PATCH 06/12] C++: Perform 6 rounds of virtual dispatch resolution like Java. --- .../ir/dataflow/internal/DataFlowDispatch.qll | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll index c9e1ac8cfee4..c644715aedb4 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll @@ -281,6 +281,47 @@ private module TrackVirtualDispatch { } } +private DataFlowPrivate::DataFlowCallable noDisp(DataFlowPrivate::DataFlowCall call) { none() } + +pragma[nomagic] +private DataFlowPrivate::DataFlowCallable d1(DataFlowPrivate::DataFlowCall call) { + result = TrackVirtualDispatch::lambdaDispatch(call) +} + +pragma[nomagic] +private DataFlowPrivate::DataFlowCallable d2(DataFlowPrivate::DataFlowCall call) { + result = TrackVirtualDispatch::lambdaDispatch(call) +} + +pragma[nomagic] +private DataFlowPrivate::DataFlowCallable d3(DataFlowPrivate::DataFlowCall call) { + result = TrackVirtualDispatch::lambdaDispatch(call) +} + +pragma[nomagic] +private DataFlowPrivate::DataFlowCallable d4(DataFlowPrivate::DataFlowCall call) { + result = TrackVirtualDispatch::lambdaDispatch(call) +} + +pragma[nomagic] +private DataFlowPrivate::DataFlowCallable d5(DataFlowPrivate::DataFlowCall call) { + result = TrackVirtualDispatch::lambdaDispatch(call) +} + +pragma[nomagic] +private DataFlowPrivate::DataFlowCallable d6(DataFlowPrivate::DataFlowCall call) { + result = TrackVirtualDispatch::lambdaDispatch(call) +} + +/** Gets a function that might be called by `call`. */ +cached +DataFlowPrivate::DataFlowCallable viableCallable(DataFlowPrivate::DataFlowCall call) { + not exists(d6(call)) and + result = nonVirtualDispatch(call) + or + result = d6(call) +} + /** * Holds if the set of viable implementations that can be called by `call` * might be improved by knowing the call context. From cca5bd9adac088c647874efd7975430a1558495a Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Tue, 19 Aug 2025 12:50:23 +0200 Subject: [PATCH 07/12] C++: Update 'mayBenefitFromCallContext' to not use the old virtual dispatch local flow predicate. --- .../cpp/ir/dataflow/internal/DataFlowDispatch.qll | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll index c644715aedb4..899dae69589c 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll @@ -330,6 +330,12 @@ predicate mayBenefitFromCallContext(DataFlowPrivate::DataFlowCall call) { mayBenefitFromCallContext(call, _, _) } +private predicate localLambdaFlowStep(Node nodeFrom, Node nodeTo) { + localFlowStep(nodeFrom, nodeTo) + or + DataFlowPrivate::additionalLambdaFlowStep(nodeFrom, nodeTo, _) +} + /** * Holds if `call` is a call through a function pointer, and the pointer * value is given as the `arg`'th argument to `f`. @@ -339,9 +345,13 @@ private predicate mayBenefitFromCallContext( ) { f = pragma[only_bind_out](call).getEnclosingCallable() and exists(InitializeParameterInstruction init | - not exists(call.getStaticCallTarget()) and + not exists(call.getStaticCallTarget()) + or + exists(call.getStaticCallSourceTarget().(VirtualFunction).getAnOverridingFunction()) + | init.getEnclosingFunction() = f.getUnderlyingCallable() and - call.flowsFrom(instructionNode(init), _) and + localLambdaFlowStep+(instructionNode(init), + operandNode(call.asCallInstruction().getCallTargetOperand())) and init.getParameter().getIndex() = arg ) } From 302d35bedc5a7124bc8f5afbe994389ba99d3c87 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Tue, 19 Aug 2025 12:57:56 +0200 Subject: [PATCH 08/12] C++: Accept test changes. --- .../dataflow/dataflow-tests/dispatch.cpp | 20 +++++++++---------- .../dataflow-tests/test-source-sink.expected | 12 ++--------- .../library-tests/dataflow/dispatch/test.cpp | 20 +++++++++---------- 3 files changed, 22 insertions(+), 30 deletions(-) diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/dispatch.cpp b/cpp/ql/test/library-tests/dataflow/dataflow-tests/dispatch.cpp index 105212ccca67..50c698033a47 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/dispatch.cpp +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/dispatch.cpp @@ -8,7 +8,7 @@ struct Top { virtual void isSink(int x) { } virtual int notSource1() { return source(); } virtual int notSource2() { return source(); } - virtual void notSink(int x) { sink(x); } // $ SPURIOUS: ast,ir=37:19 ast,ir=45:18 + virtual void notSink(int x) { sink(x); } // $ SPURIOUS: ast=37:19 ast=45:18 }; // This class has the correct behavior for just the functions ending in 2. @@ -32,16 +32,16 @@ void VirtualDispatch(Bottom *bottomPtr, Bottom &bottomRef) { // $ ast-def=bottom sink(topPtr->isSource2()); // $ ir MISSING: ast topPtr->isSink(source()); // causing a MISSING for ast - sink(topPtr->notSource1()); // $ SPURIOUS: ast,ir - sink(topPtr->notSource2()); // $ SPURIOUS: ast,ir + sink(topPtr->notSource1()); // $ SPURIOUS: ast + sink(topPtr->notSource2()); // $ SPURIOUS: ast topPtr->notSink(source()); // causing SPURIOUS for ast,ir sink(topRef.isSource1()); // $ ir MISSING: ast sink(topRef.isSource2()); // $ ir MISSING: ast topRef.isSink(source()); // causing a MISSING for ast - sink(topRef.notSource1()); // $ SPURIOUS: ast,ir - sink(topRef.notSource2()); // $ SPURIOUS: ast,ir + sink(topRef.notSource1()); // $ SPURIOUS: ast + sink(topRef.notSource2()); // $ SPURIOUS: ast topRef.notSink(source()); // causing SPURIOUS for ast,ir } @@ -52,10 +52,10 @@ Top *readGlobalBottom() { } void DispatchThroughGlobal() { - sink(globalBottom->isSource1()); // $ ir MISSING: ast + sink(globalBottom->isSource1()); // $ MISSING: ast,ir sink(globalMiddle->isSource1()); // no flow - sink(readGlobalBottom()->isSource1()); // $ ir MISSING: ast + sink(readGlobalBottom()->isSource1()); // $ MISSING: ast,ir globalBottom = new Bottom(); globalMiddle = new Middle(); @@ -93,7 +93,7 @@ void callIdentityFunctions(Top *top, Bottom *bottom) { // $ ast-def=bottom ast-d using SinkFunctionType = void (*)(int); void callSink(int x) { - sink(x); // $ ir=107:17 ir=140:8 ir=144:8 MISSING: ast=107:17 ast=140:8 ast=144:8 + sink(x); // $ ir MISSING: ast,ir=107:17 ast,ir=140:8 ast,ir=144:8 } SinkFunctionType returnCallSink() { @@ -126,8 +126,8 @@ namespace virtual_inheritance { // get flow from a `Middle` value to the call qualifier. Top *topPtr = bottomPtr, &topRef = bottomRef; - sink(topPtr->isSource()); // $ MISSING: ast,ir - sink(topRef.isSource()); // $ MISSING: ast,ir + sink(topPtr->isSource()); // $ ir MISSING: ast + sink(topRef.isSource()); // $ ir MISSING: ast } } diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test-source-sink.expected b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test-source-sink.expected index 6e0b03be9c61..7ca7e6a9bf01 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test-source-sink.expected +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test-source-sink.expected @@ -169,26 +169,18 @@ irFlow | clang.cpp:50:35:50:40 | call to source | clang.cpp:53:17:53:26 | *stackArray | | clang.cpp:51:19:51:24 | call to source | clang.cpp:53:17:53:26 | *stackArray | | clang.cpp:57:21:57:28 | call to source | clang.cpp:59:8:59:8 | d | -| dispatch.cpp:9:37:9:42 | call to source | dispatch.cpp:35:16:35:25 | call to notSource1 | -| dispatch.cpp:9:37:9:42 | call to source | dispatch.cpp:43:15:43:24 | call to notSource1 | -| dispatch.cpp:10:37:10:42 | call to source | dispatch.cpp:36:16:36:25 | call to notSource2 | -| dispatch.cpp:10:37:10:42 | call to source | dispatch.cpp:44:15:44:24 | call to notSource2 | | dispatch.cpp:16:37:16:42 | call to source | dispatch.cpp:32:16:32:24 | call to isSource2 | | dispatch.cpp:16:37:16:42 | call to source | dispatch.cpp:40:15:40:23 | call to isSource2 | | dispatch.cpp:22:37:22:42 | call to source | dispatch.cpp:31:16:31:24 | call to isSource1 | | dispatch.cpp:22:37:22:42 | call to source | dispatch.cpp:39:15:39:23 | call to isSource1 | -| dispatch.cpp:22:37:22:42 | call to source | dispatch.cpp:55:22:55:30 | call to isSource1 | -| dispatch.cpp:22:37:22:42 | call to source | dispatch.cpp:58:28:58:36 | call to isSource1 | | dispatch.cpp:33:18:33:23 | call to source | dispatch.cpp:23:38:23:38 | x | -| dispatch.cpp:37:19:37:24 | call to source | dispatch.cpp:11:38:11:38 | x | | dispatch.cpp:41:17:41:22 | call to source | dispatch.cpp:23:38:23:38 | x | -| dispatch.cpp:45:18:45:23 | call to source | dispatch.cpp:11:38:11:38 | x | | dispatch.cpp:69:15:69:20 | call to source | dispatch.cpp:23:38:23:38 | x | | dispatch.cpp:73:14:73:19 | call to source | dispatch.cpp:23:38:23:38 | x | | dispatch.cpp:81:13:81:18 | call to source | dispatch.cpp:23:38:23:38 | x | | dispatch.cpp:107:17:107:22 | call to source | dispatch.cpp:96:8:96:8 | x | -| dispatch.cpp:140:8:140:13 | call to source | dispatch.cpp:96:8:96:8 | x | -| dispatch.cpp:144:8:144:13 | call to source | dispatch.cpp:96:8:96:8 | x | +| dispatch.cpp:117:38:117:43 | call to source | dispatch.cpp:129:18:129:25 | call to isSource | +| dispatch.cpp:117:38:117:43 | call to source | dispatch.cpp:130:17:130:24 | call to isSource | | flowOut.cpp:5:16:5:21 | call to source | flowOut.cpp:31:9:31:9 | x | | flowOut.cpp:5:16:5:21 | call to source | flowOut.cpp:61:8:61:11 | access to array | | flowOut.cpp:84:18:84:23 | call to source | flowOut.cpp:85:8:85:9 | * ... | diff --git a/cpp/ql/test/library-tests/dataflow/dispatch/test.cpp b/cpp/ql/test/library-tests/dataflow/dispatch/test.cpp index b89ba51f1c64..0d112acc9a17 100644 --- a/cpp/ql/test/library-tests/dataflow/dispatch/test.cpp +++ b/cpp/ql/test/library-tests/dataflow/dispatch/test.cpp @@ -19,11 +19,11 @@ void test_simple() { Base* b_ptr = &d; b_ptr->f(); // $ target=2 - b_ptr->virtual_f(); // $ target=8 SPURIOUS: target=3 + b_ptr->virtual_f(); // $ target=8 Base& b_ref = d; b_ref.f(); // $ target=2 - b_ref.virtual_f(); // $ target=8 SPURIOUS: target=3 + b_ref.virtual_f(); // $ target=8 Base* b_null = nullptr; b_null->f(); // $ target=2 @@ -31,7 +31,7 @@ void test_simple() { Base* base_is_derived = new Derived(); base_is_derived->f(); // $ target=2 - base_is_derived->virtual_f(); // $ target=8 SPURIOUS: target=3 + base_is_derived->virtual_f(); // $ target=8 Base* base_is_base = new Base(); base_is_base->f(); // $ target=2 @@ -59,12 +59,12 @@ void test_fields() { s.b2 = new Derived(); s.b1->virtual_f(); // $ target=3 - s.b2->virtual_f(); // $ SPURIOUS: target=3 MISSING: target=8 + s.b2->virtual_f(); // $ target=8 s.b1 = new Derived(); s.b2 = new Base(); - s.b1->virtual_f(); // $ MISSING: target=8 SPURIOUS: target=3 // type-tracking has no 'clearsContent' feature and C/C++ doesn't have field-based SSA - s.b2->virtual_f(); // $ target=3 // type-tracking has no 'clearsContent' feature and C/C++ doesn't have field-based SSA + s.b1->virtual_f(); // $ target=8 SPURIOUS: target=3 // type-tracking has no 'clearsContent' feature and C/C++ doesn't have field-based SSA + s.b2->virtual_f(); // $ target=3 SPURIOUS: target=8 // type-tracking has no 'clearsContent' feature and C/C++ doesn't have field-based SSA } Base* getDerived() { @@ -73,7 +73,7 @@ Base* getDerived() { void test_getDerived() { Base* b = getDerived(); - b->virtual_f(); // $ target=8 SPURIOUS: target=3 + b->virtual_f(); // $ target=8 Derived d = *(Derived*)getDerived(); d.virtual_f(); // $ target=8 @@ -98,7 +98,7 @@ void test_write_to_arg() { { Base* b; write_to_arg_2(&b); - b->virtual_f(); // $ target=8 SPURIOUS: target=3 + b->virtual_f(); // $ target=8 } } @@ -109,7 +109,7 @@ void set_global_to_derived() { } void read_global() { - global_derived->virtual_f(); // $ target=8 SPURIOUS: target=3 + global_derived->virtual_f(); // $ SPURIOUS: target=3 MISSING: target=8 } Base* global_base_or_derived; @@ -123,5 +123,5 @@ void set_global_base_or_derived_2() { } void read_global_base_or_derived() { - global_base_or_derived->virtual_f(); // $ target=3 target=8 + global_base_or_derived->virtual_f(); // $ target=3 MISSING: target=8 } \ No newline at end of file From 16508b18004dec91c1577b9e2d165bf0b2745745 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Tue, 19 Aug 2025 13:03:53 +0200 Subject: [PATCH 09/12] C++: Fix off-by-one error in getType on 'FinalGlobalValue' nodes and accept test changes. --- .../lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll | 2 +- .../test/library-tests/dataflow/dataflow-tests/dispatch.cpp | 4 ++-- .../dataflow/dataflow-tests/test-source-sink.expected | 2 ++ cpp/ql/test/library-tests/dataflow/dispatch/test.cpp | 4 ++-- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll index a0a99711552c..ef4051171afb 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -795,7 +795,7 @@ class FinalGlobalValue extends Node, TFinalGlobalValue { override DataFlowType getType() { exists(int indirectionIndex | indirectionIndex = globalUse.getIndirectionIndex() and - result = getTypeImpl(globalUse.getUnderlyingType(), indirectionIndex - 1) + result = getTypeImpl(globalUse.getUnderlyingType(), indirectionIndex) ) } diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/dispatch.cpp b/cpp/ql/test/library-tests/dataflow/dataflow-tests/dispatch.cpp index 50c698033a47..6361d172ed47 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/dispatch.cpp +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/dispatch.cpp @@ -52,10 +52,10 @@ Top *readGlobalBottom() { } void DispatchThroughGlobal() { - sink(globalBottom->isSource1()); // $ MISSING: ast,ir + sink(globalBottom->isSource1()); // $ ir MISSING: ast sink(globalMiddle->isSource1()); // no flow - sink(readGlobalBottom()->isSource1()); // $ MISSING: ast,ir + sink(readGlobalBottom()->isSource1()); // $ ir MISSING: ast globalBottom = new Bottom(); globalMiddle = new Middle(); diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test-source-sink.expected b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test-source-sink.expected index 7ca7e6a9bf01..cb339d8d3656 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test-source-sink.expected +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test-source-sink.expected @@ -173,6 +173,8 @@ irFlow | dispatch.cpp:16:37:16:42 | call to source | dispatch.cpp:40:15:40:23 | call to isSource2 | | dispatch.cpp:22:37:22:42 | call to source | dispatch.cpp:31:16:31:24 | call to isSource1 | | dispatch.cpp:22:37:22:42 | call to source | dispatch.cpp:39:15:39:23 | call to isSource1 | +| dispatch.cpp:22:37:22:42 | call to source | dispatch.cpp:55:22:55:30 | call to isSource1 | +| dispatch.cpp:22:37:22:42 | call to source | dispatch.cpp:58:28:58:36 | call to isSource1 | | dispatch.cpp:33:18:33:23 | call to source | dispatch.cpp:23:38:23:38 | x | | dispatch.cpp:41:17:41:22 | call to source | dispatch.cpp:23:38:23:38 | x | | dispatch.cpp:69:15:69:20 | call to source | dispatch.cpp:23:38:23:38 | x | diff --git a/cpp/ql/test/library-tests/dataflow/dispatch/test.cpp b/cpp/ql/test/library-tests/dataflow/dispatch/test.cpp index 0d112acc9a17..f243b76ad140 100644 --- a/cpp/ql/test/library-tests/dataflow/dispatch/test.cpp +++ b/cpp/ql/test/library-tests/dataflow/dispatch/test.cpp @@ -109,7 +109,7 @@ void set_global_to_derived() { } void read_global() { - global_derived->virtual_f(); // $ SPURIOUS: target=3 MISSING: target=8 + global_derived->virtual_f(); // $ target=8 } Base* global_base_or_derived; @@ -123,5 +123,5 @@ void set_global_base_or_derived_2() { } void read_global_base_or_derived() { - global_base_or_derived->virtual_f(); // $ target=3 MISSING: target=8 + global_base_or_derived->virtual_f(); // $ target=3 target=8 } \ No newline at end of file From 0631bd74666d4f0fb2759dc30cb57954dc42b19a Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Tue, 19 Aug 2025 13:07:55 +0200 Subject: [PATCH 10/12] C++: Add object/flow conflation for unions when resolving function pointers. --- .../code/cpp/ir/dataflow/internal/DataFlowPrivate.qll | 9 ++++++++- .../library-tests/dataflow/dataflow-tests/dispatch.cpp | 2 +- .../dataflow/dataflow-tests/test-source-sink.expected | 2 ++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll index a03042a77ff0..3aa8994a4499 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll @@ -1492,7 +1492,14 @@ predicate lambdaCall(DataFlowCall call, LambdaCallKind kind, Node receiver) { } /** Extra data-flow steps needed for lambda flow analysis. */ -predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preservesValue) { none() } +predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preservesValue) { + preservesValue = false and + exists(ContentSet cs | cs.isSingleton(any(UnionContent uc)) | + storeStep(nodeFrom, cs, nodeTo) + or + readStep(nodeFrom, cs, nodeTo) + ) +} predicate knownSourceModel(Node source, string model) { External::sourceNode(source, _, model) } diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/dispatch.cpp b/cpp/ql/test/library-tests/dataflow/dataflow-tests/dispatch.cpp index 6361d172ed47..63528d712c0c 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/dispatch.cpp +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/dispatch.cpp @@ -93,7 +93,7 @@ void callIdentityFunctions(Top *top, Bottom *bottom) { // $ ast-def=bottom ast-d using SinkFunctionType = void (*)(int); void callSink(int x) { - sink(x); // $ ir MISSING: ast,ir=107:17 ast,ir=140:8 ast,ir=144:8 + sink(x); // $ ir=107:17 ir=140:8 ir=144:8 MISSING: ast=107:17 ast=140:8 ast=144:8 } SinkFunctionType returnCallSink() { diff --git a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test-source-sink.expected b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test-source-sink.expected index cb339d8d3656..8c009241734a 100644 --- a/cpp/ql/test/library-tests/dataflow/dataflow-tests/test-source-sink.expected +++ b/cpp/ql/test/library-tests/dataflow/dataflow-tests/test-source-sink.expected @@ -183,6 +183,8 @@ irFlow | dispatch.cpp:107:17:107:22 | call to source | dispatch.cpp:96:8:96:8 | x | | dispatch.cpp:117:38:117:43 | call to source | dispatch.cpp:129:18:129:25 | call to isSource | | dispatch.cpp:117:38:117:43 | call to source | dispatch.cpp:130:17:130:24 | call to isSource | +| dispatch.cpp:140:8:140:13 | call to source | dispatch.cpp:96:8:96:8 | x | +| dispatch.cpp:144:8:144:13 | call to source | dispatch.cpp:96:8:96:8 | x | | flowOut.cpp:5:16:5:21 | call to source | flowOut.cpp:31:9:31:9 | x | | flowOut.cpp:5:16:5:21 | call to source | flowOut.cpp:61:8:61:11 | access to array | | flowOut.cpp:84:18:84:23 | call to source | flowOut.cpp:85:8:85:9 | * ... | From 02bf923f7e1c332683e3af55301308c3a7a24b7f Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Tue, 19 Aug 2025 13:50:03 +0200 Subject: [PATCH 11/12] C++: Add change note. --- cpp/ql/lib/change-notes/2025-08-19-virtual-dispatch.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 cpp/ql/lib/change-notes/2025-08-19-virtual-dispatch.md diff --git a/cpp/ql/lib/change-notes/2025-08-19-virtual-dispatch.md b/cpp/ql/lib/change-notes/2025-08-19-virtual-dispatch.md new file mode 100644 index 000000000000..4342bb7f62dc --- /dev/null +++ b/cpp/ql/lib/change-notes/2025-08-19-virtual-dispatch.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* The new dataflow/taint-tracking library (`semmle.code.cpp.dataflow.new.DataFlow` and `semmle.code.cpp.dataflow.new.TaintTracking`) now resolves virtual function calls more precisely. This results in fewer false positives when running dataflow/taint-tracking queries on C++ projects. \ No newline at end of file From 70d3e69ce5f1c7d3b819b8c60a39923ddd0d7366 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Wed, 20 Aug 2025 10:38:22 +0200 Subject: [PATCH 12/12] C++: Rename 'lambda' to 'virtual'. --- .../ir/dataflow/internal/DataFlowDispatch.qll | 40 ++++++++++--------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll index 899dae69589c..0d63558c956e 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll @@ -105,7 +105,7 @@ private predicate ignoreConstructor(Expr e) { * constructs an object containing at least one virtual function. * - a node which represents a derived-to-base instruction that converts from `c`. */ -private predicate lambdaSourceImpl(RelevantNode n, Class c) { +private predicate qualifierSourceImpl(RelevantNode n, Class c) { // Object construction exists(CallInstruction call, ThisArgumentOperand qualifier, Call e | qualifier = call.getThisArgumentOperand() and @@ -131,14 +131,14 @@ private predicate lambdaSourceImpl(RelevantNode n, Class c) { ) } -private module TrackVirtualDispatch { +private module TrackVirtualDispatch { /** * Gets a possible runtime target of `c` using both static call-target - * information, and call-target resolution from `lambdaDispatch0`. + * information, and call-target resolution from `virtualDispatch0`. */ private DataFlowPrivate::DataFlowCallable dispatch(DataFlowPrivate::DataFlowCall c) { result = nonVirtualDispatch(c) or - result = lambdaDispatch0(c) + result = virtualDispatch0(c) } private module TtInput implements TypeTrackingInput { @@ -156,7 +156,7 @@ private module TrackVirtualDispatch { or DataFlowPrivate::jumpStep(_, this) or - lambdaSourceImpl(this, _) + qualifierSourceImpl(this, _) } } @@ -220,21 +220,23 @@ private module TrackVirtualDispatch { predicate hasFeatureBacktrackStoreTarget() { none() } } - private predicate lambdaSource(RelevantNode n) { lambdaSourceImpl(n, _) } + private predicate qualifierSource(RelevantNode n) { qualifierSourceImpl(n, _) } /** * Holds if `n` is the qualifier of `call` which targets the virtual member * function `mf`. */ - private predicate lambdaSinkImpl(RelevantNode n, CallInstruction call, MemberFunction mf) { + private predicate qualifierOfVirtualCallImpl( + RelevantNode n, CallInstruction call, MemberFunction mf + ) { n.asOperand() = call.getThisArgumentOperand() and call.getStaticCallTarget() = mf and mf.isVirtual() } - private predicate lambdaSink(RelevantNode n) { lambdaSinkImpl(n, _, _) } + private predicate qualifierOfVirtualCall(RelevantNode n) { qualifierOfVirtualCallImpl(n, _, _) } - private import TypeTracking::TypeTrack::Graph + private import TypeTracking::TypeTrack::Graph private predicate edgePlus(PathNode n1, PathNode n2) = fastTC(edges/2)(n1, n2) @@ -243,7 +245,7 @@ private module TrackVirtualDispatch { * qualifier has runtime type `c`. */ private MemberFunction mostSpecific(MemberFunction mf, Class c) { - lambdaSinkImpl(_, _, mf) and + qualifierOfVirtualCallImpl(_, _, mf) and mf.getAnOverridingFunction*() = result and ( result.getDeclaringType() = c @@ -267,8 +269,8 @@ private module TrackVirtualDispatch { DataFlowPrivate::DataFlowCall call ) { exists(Class derived, MemberFunction mf | - lambdaSourceImpl(p1.getNode(), derived) and - lambdaSinkImpl(p2.getNode(), call.asCallInstruction(), mf) and + qualifierSourceImpl(p1.getNode(), derived) and + qualifierOfVirtualCallImpl(p2.getNode(), call.asCallInstruction(), mf) and p1.isSource() and p2.isSink() and callable.asSourceCallable() = mostSpecific(mf, derived) @@ -276,7 +278,7 @@ private module TrackVirtualDispatch { } /** Gets a possible run-time target of `call`. */ - DataFlowPrivate::DataFlowCallable lambdaDispatch(DataFlowPrivate::DataFlowCall call) { + DataFlowPrivate::DataFlowCallable virtualDispatch(DataFlowPrivate::DataFlowCall call) { exists(PathNode p1, PathNode p2 | p1 = p2 or edgePlus(p1, p2) | pairCand(p1, p2, result, call)) } } @@ -285,32 +287,32 @@ private DataFlowPrivate::DataFlowCallable noDisp(DataFlowPrivate::DataFlowCall c pragma[nomagic] private DataFlowPrivate::DataFlowCallable d1(DataFlowPrivate::DataFlowCall call) { - result = TrackVirtualDispatch::lambdaDispatch(call) + result = TrackVirtualDispatch::virtualDispatch(call) } pragma[nomagic] private DataFlowPrivate::DataFlowCallable d2(DataFlowPrivate::DataFlowCall call) { - result = TrackVirtualDispatch::lambdaDispatch(call) + result = TrackVirtualDispatch::virtualDispatch(call) } pragma[nomagic] private DataFlowPrivate::DataFlowCallable d3(DataFlowPrivate::DataFlowCall call) { - result = TrackVirtualDispatch::lambdaDispatch(call) + result = TrackVirtualDispatch::virtualDispatch(call) } pragma[nomagic] private DataFlowPrivate::DataFlowCallable d4(DataFlowPrivate::DataFlowCall call) { - result = TrackVirtualDispatch::lambdaDispatch(call) + result = TrackVirtualDispatch::virtualDispatch(call) } pragma[nomagic] private DataFlowPrivate::DataFlowCallable d5(DataFlowPrivate::DataFlowCall call) { - result = TrackVirtualDispatch::lambdaDispatch(call) + result = TrackVirtualDispatch::virtualDispatch(call) } pragma[nomagic] private DataFlowPrivate::DataFlowCallable d6(DataFlowPrivate::DataFlowCall call) { - result = TrackVirtualDispatch::lambdaDispatch(call) + result = TrackVirtualDispatch::virtualDispatch(call) } /** Gets a function that might be called by `call`. */