@@ -697,4 +697,176 @@ module DataFlowMake<InputSig Lang> {
697697 }
698698 }
699699 }
700+
701+ /**
702+ * Generates a `PathGraph` in which equivalent path nodes are merged, in order to avoid duplicate paths.
703+ */
704+ module DeduplicatePathGraph< PathNodeSig InputPathNode, PathGraphSig< InputPathNode > Graph> {
705+ // NOTE: there is a known limitation in that this module cannot see which nodes are sources or sinks.
706+ // This only matters in the rare case where a sink PathNode has a non-empty set of succesors, and there is a
707+ // non-sink PathNode with the same `(node, toString)` value and the same successors, but is transitively
708+ // reachable from a different set of PathNodes. (And conversely for sources).
709+ //
710+ /**
711+ * Gets a successor of `node`, taking `subpaths` into account.
712+ */
713+ private InputPathNode getASuccessorLike ( InputPathNode node ) {
714+ Graph:: edges ( node , result )
715+ or
716+ Graph:: subpaths ( node , _, _, result ) // arg -> out
717+ //
718+ // Note that there is no case for `arg -> param` or `ret -> out` for subpaths.
719+ // It is OK to collapse nodes inside a subpath while calls to that subpaths aren't collapsed and vice versa.
720+ }
721+
722+ private InputPathNode getAPredecessorLike ( InputPathNode node ) {
723+ node = getASuccessorLike ( result )
724+ }
725+
726+ pragma [ nomagic]
727+ private InputPathNode getAPathNode ( Node node , string toString ) {
728+ result .getNode ( ) = node and
729+ Graph:: nodes ( result , _, toString )
730+ }
731+
732+ private signature predicate collapseCandidateSig ( Node node , string toString ) ;
733+
734+ private signature InputPathNode stepSig ( InputPathNode node ) ;
735+
736+ /**
737+ * Performs a forward or backward pass computing which `(node, toString)` pairs can subsume their corresponding
738+ * path nodes.
739+ *
740+ * This is similar to automaton minimization, but for an NFA. Since minimizing an NFA is NP-hard (and does not have
741+ * a unique minimal NFA), we operate with the simpler model: for a given `(node, toString)` pair, either all
742+ * corresponding path nodes are merged, or none are merged.
743+ *
744+ * Comments are written as if this checks for outgoing edges and propagates backward, though the module is also
745+ * used to perform the opposite direction.
746+ */
747+ private module MakeDiscriminatorPass< collapseCandidateSig / 2 collapseCandidate, stepSig / 1 step> {
748+ /**
749+ * Gets the number of `(node, toString)` pairs reachable in one step from `pathNode`.
750+ */
751+ private int getOutDegreeFromPathNode ( InputPathNode pathNode ) {
752+ result = count ( Node node , string toString | step ( pathNode ) = getAPathNode ( node , toString ) )
753+ }
754+
755+ /**
756+ * Gets the number of `(node2, toString2)` pairs reachable in one step from path nodes corresponding to `(node, toString)`.
757+ */
758+ private int getOutDegreeFromNode ( Node node , string toString ) {
759+ result =
760+ strictcount ( Node node2 , string toString2 |
761+ step ( getAPathNode ( node , toString ) ) = getAPathNode ( node2 , toString2 )
762+ )
763+ }
764+
765+ /** Holds if `(node, toString)` cannot be collapsed (but was a candidate for being collapsed). */
766+ predicate discriminatedPair ( Node node , string toString ) {
767+ collapseCandidate ( node , toString ) and
768+ (
769+ // Check if all corresponding PathNodes have the same successor sets when projected to `(node, toString)`.
770+ // To do this, we check that each successor set has the same size as the union of the succesor sets.
771+ // - If the successor sets are equal, then they are also equal to their union, and so have the correct size.
772+ // - Conversely, if two successor sets are not equal, one of them must be missing an element that is present
773+ // in the union, but must still be a subset of the union, and thus be strictly smaller than the union.
774+ getOutDegreeFromPathNode ( getAPathNode ( node , toString ) ) <
775+ getOutDegreeFromNode ( node , toString )
776+ or
777+ // Retain flow state if one of the successors requires it to be retained
778+ discriminatedPathNode ( step ( getAPathNode ( node , toString ) ) )
779+ )
780+ }
781+
782+ /** Holds if `pathNode` cannot be collapsed. */
783+ predicate discriminatedPathNode ( InputPathNode pathNode ) {
784+ exists ( Node node , string toString |
785+ discriminatedPair ( node , toString ) and
786+ getAPathNode ( node , toString ) = pathNode
787+ )
788+ }
789+ }
790+
791+ private predicate initialCandidate ( Node node , string toString ) {
792+ exists ( getAPathNode ( node , toString ) )
793+ }
794+
795+ private module Pass1 = MakeDiscriminatorPass< initialCandidate / 2 , getASuccessorLike / 1 > ;
796+
797+ private module Pass2 = MakeDiscriminatorPass< Pass1:: discriminatedPair / 2 , getAPredecessorLike / 1 > ;
798+
799+ private newtype TPathNode =
800+ TPreservedPathNode ( InputPathNode node ) { Pass2:: discriminatedPathNode ( node ) } or
801+ TCollapsedPathNode ( Node node , string toString ) {
802+ initialCandidate ( node , toString ) and
803+ not Pass2:: discriminatedPair ( node , toString )
804+ }
805+
806+ /** A node in the path graph after equivalent nodes have been collapsed. */
807+ class PathNode extends TPathNode {
808+ private Node asCollapsedNode ( ) { this = TCollapsedPathNode ( result , _) }
809+
810+ private InputPathNode asPreservedNode ( ) { this = TPreservedPathNode ( result ) }
811+
812+ /** Gets a correspondng node in the original graph. */
813+ InputPathNode getAnOriginalPathNode ( ) {
814+ exists ( Node node , string toString |
815+ this = TCollapsedPathNode ( node , toString ) and
816+ result = getAPathNode ( node , toString )
817+ )
818+ or
819+ result = this .asPreservedNode ( )
820+ }
821+
822+ /** Gets a string representation of this node. */
823+ string toString ( ) {
824+ result = this .asPreservedNode ( ) .toString ( ) or this = TCollapsedPathNode ( _, result )
825+ }
826+
827+ /**
828+ * Holds if this element is at the specified location.
829+ * The location spans column `startcolumn` of line `startline` to
830+ * column `endcolumn` of line `endline` in file `filepath`.
831+ * For more information, see
832+ * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/).
833+ */
834+ predicate hasLocationInfo (
835+ string filepath , int startline , int startcolumn , int endline , int endcolumn
836+ ) {
837+ this .getAnOriginalPathNode ( )
838+ .hasLocationInfo ( filepath , startline , startcolumn , endline , endcolumn )
839+ }
840+
841+ /** Gets the corresponding data-flow node. */
842+ Node getNode ( ) {
843+ result = this .asCollapsedNode ( )
844+ or
845+ result = this .asPreservedNode ( ) .getNode ( )
846+ }
847+ }
848+
849+ /**
850+ * Provides the query predicates needed to include a graph in a path-problem query.
851+ */
852+ module PathGraph implements PathGraphSig< PathNode > {
853+ query predicate nodes ( PathNode node , string key , string val ) {
854+ Graph:: nodes ( node .getAnOriginalPathNode ( ) , key , val )
855+ }
856+
857+ query predicate edges ( PathNode node1 , PathNode node2 ) {
858+ Graph:: edges ( node1 .getAnOriginalPathNode ( ) , node2 .getAnOriginalPathNode ( ) )
859+ }
860+
861+ query predicate subpaths ( PathNode arg , PathNode par , PathNode ret , PathNode out ) {
862+ // Note: this may look suspiciously simple, but it's not an oversight. Even if the caller needs to retain state,
863+ // it is entirely possible to step through a subpath in which state has been projected away.
864+ Graph:: subpaths ( arg .getAnOriginalPathNode ( ) , par .getAnOriginalPathNode ( ) ,
865+ ret .getAnOriginalPathNode ( ) , out .getAnOriginalPathNode ( ) )
866+ }
867+ }
868+
869+ // Re-export the PathGraph so the user can import a single module and get both PathNode and the query predicates
870+ import PathGraph
871+ }
700872}
0 commit comments