Skip to content

Commit b4373d6

Browse files
committed
Shared: Add DataFlow::DeduplicatePathGraph
1 parent 7780fe9 commit b4373d6

File tree

1 file changed

+172
-0
lines changed

1 file changed

+172
-0
lines changed

shared/dataflow/codeql/dataflow/DataFlow.qll

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -697,4 +697,176 @@ module DataFlowMake<InputSig Lang> {
697697
}
698698
}
699699
}
700+
701+
/**
702+
* Generates a `PathGraph` in which equivalent path nodes are merged, in order to avoid duplicate paths.
703+
*/
704+
module DeduplicatePathGraph<PathNodeSig InputPathNode, PathGraphSig<InputPathNode> Graph> {
705+
// NOTE: there is a known limitation in that this module cannot see which nodes are sources or sinks.
706+
// This only matters in the rare case where a sink PathNode has a non-empty set of succesors, and there is a
707+
// non-sink PathNode with the same `(node, toString)` value and the same successors, but is transitively
708+
// reachable from a different set of PathNodes. (And conversely for sources).
709+
//
710+
/**
711+
* Gets a successor of `node`, taking `subpaths` into account.
712+
*/
713+
private InputPathNode getASuccessorLike(InputPathNode node) {
714+
Graph::edges(node, result)
715+
or
716+
Graph::subpaths(node, _, _, result) // arg -> out
717+
//
718+
// Note that there is no case for `arg -> param` or `ret -> out` for subpaths.
719+
// It is OK to collapse nodes inside a subpath while calls to that subpaths aren't collapsed and vice versa.
720+
}
721+
722+
private InputPathNode getAPredecessorLike(InputPathNode node) {
723+
node = getASuccessorLike(result)
724+
}
725+
726+
pragma[nomagic]
727+
private InputPathNode getAPathNode(Node node, string toString) {
728+
result.getNode() = node and
729+
Graph::nodes(result, _, toString)
730+
}
731+
732+
private signature predicate collapseCandidateSig(Node node, string toString);
733+
734+
private signature InputPathNode stepSig(InputPathNode node);
735+
736+
/**
737+
* Performs a forward or backward pass computing which `(node, toString)` pairs can subsume their corresponding
738+
* path nodes.
739+
*
740+
* This is similar to automaton minimization, but for an NFA. Since minimizing an NFA is NP-hard (and does not have
741+
* a unique minimal NFA), we operate with the simpler model: for a given `(node, toString)` pair, either all
742+
* corresponding path nodes are merged, or none are merged.
743+
*
744+
* Comments are written as if this checks for outgoing edges and propagates backward, though the module is also
745+
* used to perform the opposite direction.
746+
*/
747+
private module MakeDiscriminatorPass<collapseCandidateSig/2 collapseCandidate, stepSig/1 step> {
748+
/**
749+
* Gets the number of `(node, toString)` pairs reachable in one step from `pathNode`.
750+
*/
751+
private int getOutDegreeFromPathNode(InputPathNode pathNode) {
752+
result = count(Node node, string toString | step(pathNode) = getAPathNode(node, toString))
753+
}
754+
755+
/**
756+
* Gets the number of `(node2, toString2)` pairs reachable in one step from path nodes corresponding to `(node, toString)`.
757+
*/
758+
private int getOutDegreeFromNode(Node node, string toString) {
759+
result =
760+
strictcount(Node node2, string toString2 |
761+
step(getAPathNode(node, toString)) = getAPathNode(node2, toString2)
762+
)
763+
}
764+
765+
/** Holds if `(node, toString)` cannot be collapsed (but was a candidate for being collapsed). */
766+
predicate discriminatedPair(Node node, string toString) {
767+
collapseCandidate(node, toString) and
768+
(
769+
// Check if all corresponding PathNodes have the same successor sets when projected to `(node, toString)`.
770+
// To do this, we check that each successor set has the same size as the union of the succesor sets.
771+
// - If the successor sets are equal, then they are also equal to their union, and so have the correct size.
772+
// - Conversely, if two successor sets are not equal, one of them must be missing an element that is present
773+
// in the union, but must still be a subset of the union, and thus be strictly smaller than the union.
774+
getOutDegreeFromPathNode(getAPathNode(node, toString)) <
775+
getOutDegreeFromNode(node, toString)
776+
or
777+
// Retain flow state if one of the successors requires it to be retained
778+
discriminatedPathNode(step(getAPathNode(node, toString)))
779+
)
780+
}
781+
782+
/** Holds if `pathNode` cannot be collapsed. */
783+
predicate discriminatedPathNode(InputPathNode pathNode) {
784+
exists(Node node, string toString |
785+
discriminatedPair(node, toString) and
786+
getAPathNode(node, toString) = pathNode
787+
)
788+
}
789+
}
790+
791+
private predicate initialCandidate(Node node, string toString) {
792+
exists(getAPathNode(node, toString))
793+
}
794+
795+
private module Pass1 = MakeDiscriminatorPass<initialCandidate/2, getASuccessorLike/1>;
796+
797+
private module Pass2 = MakeDiscriminatorPass<Pass1::discriminatedPair/2, getAPredecessorLike/1>;
798+
799+
private newtype TPathNode =
800+
TPreservedPathNode(InputPathNode node) { Pass2::discriminatedPathNode(node) } or
801+
TCollapsedPathNode(Node node, string toString) {
802+
initialCandidate(node, toString) and
803+
not Pass2::discriminatedPair(node, toString)
804+
}
805+
806+
/** A node in the path graph after equivalent nodes have been collapsed. */
807+
class PathNode extends TPathNode {
808+
private Node asCollapsedNode() { this = TCollapsedPathNode(result, _) }
809+
810+
private InputPathNode asPreservedNode() { this = TPreservedPathNode(result) }
811+
812+
/** Gets a correspondng node in the original graph. */
813+
InputPathNode getAnOriginalPathNode() {
814+
exists(Node node, string toString |
815+
this = TCollapsedPathNode(node, toString) and
816+
result = getAPathNode(node, toString)
817+
)
818+
or
819+
result = this.asPreservedNode()
820+
}
821+
822+
/** Gets a string representation of this node. */
823+
string toString() {
824+
result = this.asPreservedNode().toString() or this = TCollapsedPathNode(_, result)
825+
}
826+
827+
/**
828+
* Holds if this element is at the specified location.
829+
* The location spans column `startcolumn` of line `startline` to
830+
* column `endcolumn` of line `endline` in file `filepath`.
831+
* For more information, see
832+
* [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
833+
*/
834+
predicate hasLocationInfo(
835+
string filepath, int startline, int startcolumn, int endline, int endcolumn
836+
) {
837+
this.getAnOriginalPathNode()
838+
.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
839+
}
840+
841+
/** Gets the corresponding data-flow node. */
842+
Node getNode() {
843+
result = this.asCollapsedNode()
844+
or
845+
result = this.asPreservedNode().getNode()
846+
}
847+
}
848+
849+
/**
850+
* Provides the query predicates needed to include a graph in a path-problem query.
851+
*/
852+
module PathGraph implements PathGraphSig<PathNode> {
853+
query predicate nodes(PathNode node, string key, string val) {
854+
Graph::nodes(node.getAnOriginalPathNode(), key, val)
855+
}
856+
857+
query predicate edges(PathNode node1, PathNode node2) {
858+
Graph::edges(node1.getAnOriginalPathNode(), node2.getAnOriginalPathNode())
859+
}
860+
861+
query predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out) {
862+
// Note: this may look suspiciously simple, but it's not an oversight. Even if the caller needs to retain state,
863+
// it is entirely possible to step through a subpath in which state has been projected away.
864+
Graph::subpaths(arg.getAnOriginalPathNode(), par.getAnOriginalPathNode(),
865+
ret.getAnOriginalPathNode(), out.getAnOriginalPathNode())
866+
}
867+
}
868+
869+
// Re-export the PathGraph so the user can import a single module and get both PathNode and the query predicates
870+
import PathGraph
871+
}
700872
}

0 commit comments

Comments
 (0)