@@ -851,4 +851,176 @@ module DataFlowMake<LocationSig Location, InputSig<Location> Lang> {
851851 }
852852 }
853853 }
854+
855+ /**
856+ * Generates a `PathGraph` in which equivalent path nodes are merged, in order to avoid duplicate paths.
857+ */
858+ module DeduplicatePathGraph< PathNodeSig InputPathNode, PathGraphSig< InputPathNode > Graph> {
859+ // NOTE: there is a known limitation in that this module cannot see which nodes are sources or sinks.
860+ // This only matters in the rare case where a sink PathNode has a non-empty set of succesors, and there is a
861+ // non-sink PathNode with the same `(node, toString)` value and the same successors, but is transitively
862+ // reachable from a different set of PathNodes. (And conversely for sources).
863+ //
864+ /**
865+ * Gets a successor of `node`, taking `subpaths` into account.
866+ */
867+ private InputPathNode getASuccessorLike ( InputPathNode node ) {
868+ Graph:: edges ( node , result )
869+ or
870+ Graph:: subpaths ( node , _, _, result ) // arg -> out
871+ //
872+ // Note that there is no case for `arg -> param` or `ret -> out` for subpaths.
873+ // It is OK to collapse nodes inside a subpath while calls to that subpaths aren't collapsed and vice versa.
874+ }
875+
876+ private InputPathNode getAPredecessorLike ( InputPathNode node ) {
877+ node = getASuccessorLike ( result )
878+ }
879+
880+ pragma [ nomagic]
881+ private InputPathNode getAPathNode ( Node node , string toString ) {
882+ result .getNode ( ) = node and
883+ Graph:: nodes ( result , _, toString )
884+ }
885+
886+ private signature predicate collapseCandidateSig ( Node node , string toString ) ;
887+
888+ private signature InputPathNode stepSig ( InputPathNode node ) ;
889+
890+ /**
891+ * Performs a forward or backward pass computing which `(node, toString)` pairs can subsume their corresponding
892+ * path nodes.
893+ *
894+ * This is similar to automaton minimization, but for an NFA. Since minimizing an NFA is NP-hard (and does not have
895+ * a unique minimal NFA), we operate with the simpler model: for a given `(node, toString)` pair, either all
896+ * corresponding path nodes are merged, or none are merged.
897+ *
898+ * Comments are written as if this checks for outgoing edges and propagates backward, though the module is also
899+ * used to perform the opposite direction.
900+ */
901+ private module MakeDiscriminatorPass< collapseCandidateSig / 2 collapseCandidate, stepSig / 1 step> {
902+ /**
903+ * Gets the number of `(node, toString)` pairs reachable in one step from `pathNode`.
904+ */
905+ private int getOutDegreeFromPathNode ( InputPathNode pathNode ) {
906+ result = count ( Node node , string toString | step ( pathNode ) = getAPathNode ( node , toString ) )
907+ }
908+
909+ /**
910+ * Gets the number of `(node2, toString2)` pairs reachable in one step from path nodes corresponding to `(node, toString)`.
911+ */
912+ private int getOutDegreeFromNode ( Node node , string toString ) {
913+ result =
914+ strictcount ( Node node2 , string toString2 |
915+ step ( getAPathNode ( node , toString ) ) = getAPathNode ( node2 , toString2 )
916+ )
917+ }
918+
919+ /** Holds if `(node, toString)` cannot be collapsed (but was a candidate for being collapsed). */
920+ predicate discriminatedPair ( Node node , string toString ) {
921+ collapseCandidate ( node , toString ) and
922+ (
923+ // Check if all corresponding PathNodes have the same successor sets when projected to `(node, toString)`.
924+ // To do this, we check that each successor set has the same size as the union of the succesor sets.
925+ // - If the successor sets are equal, then they are also equal to their union, and so have the correct size.
926+ // - Conversely, if two successor sets are not equal, one of them must be missing an element that is present
927+ // in the union, but must still be a subset of the union, and thus be strictly smaller than the union.
928+ getOutDegreeFromPathNode ( getAPathNode ( node , toString ) ) <
929+ getOutDegreeFromNode ( node , toString )
930+ or
931+ // Retain flow state if one of the successors requires it to be retained
932+ discriminatedPathNode ( step ( getAPathNode ( node , toString ) ) )
933+ )
934+ }
935+
936+ /** Holds if `pathNode` cannot be collapsed. */
937+ predicate discriminatedPathNode ( InputPathNode pathNode ) {
938+ exists ( Node node , string toString |
939+ discriminatedPair ( node , toString ) and
940+ getAPathNode ( node , toString ) = pathNode
941+ )
942+ }
943+ }
944+
945+ private predicate initialCandidate ( Node node , string toString ) {
946+ exists ( getAPathNode ( node , toString ) )
947+ }
948+
949+ private module Pass1 = MakeDiscriminatorPass< initialCandidate / 2 , getASuccessorLike / 1 > ;
950+
951+ private module Pass2 = MakeDiscriminatorPass< Pass1:: discriminatedPair / 2 , getAPredecessorLike / 1 > ;
952+
953+ private newtype TPathNode =
954+ TPreservedPathNode ( InputPathNode node ) { Pass2:: discriminatedPathNode ( node ) } or
955+ TCollapsedPathNode ( Node node , string toString ) {
956+ initialCandidate ( node , toString ) and
957+ not Pass2:: discriminatedPair ( node , toString )
958+ }
959+
960+ /** A node in the path graph after equivalent nodes have been collapsed. */
961+ class PathNode extends TPathNode {
962+ private Node asCollapsedNode ( ) { this = TCollapsedPathNode ( result , _) }
963+
964+ private InputPathNode asPreservedNode ( ) { this = TPreservedPathNode ( result ) }
965+
966+ /** Gets a correspondng node in the original graph. */
967+ InputPathNode getAnOriginalPathNode ( ) {
968+ exists ( Node node , string toString |
969+ this = TCollapsedPathNode ( node , toString ) and
970+ result = getAPathNode ( node , toString )
971+ )
972+ or
973+ result = this .asPreservedNode ( )
974+ }
975+
976+ /** Gets a string representation of this node. */
977+ string toString ( ) {
978+ result = this .asPreservedNode ( ) .toString ( ) or this = TCollapsedPathNode ( _, result )
979+ }
980+
981+ /**
982+ * Holds if this element is at the specified location.
983+ * The location spans column `startcolumn` of line `startline` to
984+ * column `endcolumn` of line `endline` in file `filepath`.
985+ * For more information, see
986+ * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
987+ */
988+ predicate hasLocationInfo (
989+ string filepath , int startline , int startcolumn , int endline , int endcolumn
990+ ) {
991+ this .getAnOriginalPathNode ( )
992+ .hasLocationInfo ( filepath , startline , startcolumn , endline , endcolumn )
993+ }
994+
995+ /** Gets the corresponding data-flow node. */
996+ Node getNode ( ) {
997+ result = this .asCollapsedNode ( )
998+ or
999+ result = this .asPreservedNode ( ) .getNode ( )
1000+ }
1001+ }
1002+
1003+ /**
1004+ * Provides the query predicates needed to include a graph in a path-problem query.
1005+ */
1006+ module PathGraph implements PathGraphSig< PathNode > {
1007+ query predicate nodes ( PathNode node , string key , string val ) {
1008+ Graph:: nodes ( node .getAnOriginalPathNode ( ) , key , val )
1009+ }
1010+
1011+ query predicate edges ( PathNode node1 , PathNode node2 ) {
1012+ Graph:: edges ( node1 .getAnOriginalPathNode ( ) , node2 .getAnOriginalPathNode ( ) )
1013+ }
1014+
1015+ query predicate subpaths ( PathNode arg , PathNode par , PathNode ret , PathNode out ) {
1016+ // Note: this may look suspiciously simple, but it's not an oversight. Even if the caller needs to retain state,
1017+ // it is entirely possible to step through a subpath in which state has been projected away.
1018+ Graph:: subpaths ( arg .getAnOriginalPathNode ( ) , par .getAnOriginalPathNode ( ) ,
1019+ ret .getAnOriginalPathNode ( ) , out .getAnOriginalPathNode ( ) )
1020+ }
1021+ }
1022+
1023+ // Re-export the PathGraph so the user can import a single module and get both PathNode and the query predicates
1024+ import PathGraph
1025+ }
8541026}
0 commit comments