@@ -21,6 +21,7 @@ private import codeql.ruby.typetracking.TypeTracker
2121private import codeql.ruby.ApiGraphs
2222private import codeql.ruby.Concepts
2323private import codeql.ruby.dataflow.internal.DataFlowPrivate as DataFlowPrivate
24+ private import codeql.ruby.dataflow.internal.TaintTrackingPrivate as TaintTrackingPrivate
2425private import codeql.ruby.TaintTracking
2526private import codeql.ruby.frameworks.core.String
2627
@@ -37,43 +38,6 @@ DataFlow::LocalSourceNode strStart() {
3738/** Gets a dataflow node for a regular expression literal. */
3839DataFlow:: LocalSourceNode regStart ( ) { result .asExpr ( ) .getExpr ( ) instanceof Ast:: RegExpLiteral }
3940
40- /**
41- * Holds if the analysis should track flow from `nodeFrom` to `nodeTo` on top of the ordinary type-tracking steps.
42- * `nodeFrom` and `nodeTo` has type `fromType` and `toType` respectively.
43- * The types are either "string" or "regexp".
44- */
45- predicate step (
46- DataFlow:: Node nodeFrom , DataFlow:: LocalSourceNode nodeTo , string fromType , string toType
47- ) {
48- fromType = toType and
49- fromType = "string" and
50- (
51- // include taint flow through `String` summaries
52- TaintTracking:: localTaintStep ( nodeFrom , nodeTo ) and
53- nodeFrom .( DataFlowPrivate:: SummaryNode ) .getSummarizedCallable ( ) instanceof
54- String:: SummarizedCallable
55- or
56- // string concatenations, and
57- exists ( CfgNodes:: ExprNodes:: OperationCfgNode op |
58- op = nodeTo .asExpr ( ) and
59- op .getAnOperand ( ) = nodeFrom .asExpr ( ) and
60- op .getExpr ( ) .( Ast:: BinaryOperation ) .getOperator ( ) = "+"
61- )
62- or
63- // string interpolations
64- nodeFrom .asExpr ( ) =
65- nodeTo .asExpr ( ) .( CfgNodes:: ExprNodes:: StringlikeLiteralCfgNode ) .getAComponent ( )
66- )
67- or
68- fromType = "string" and
69- toType = "reg" and
70- exists ( DataFlow:: CallNode call |
71- call = API:: getTopLevelMember ( "Regexp" ) .getAMethodCall ( [ "compile" , "new" ] ) and
72- nodeFrom = call .getArgument ( 0 ) and
73- nodeTo = call
74- )
75- }
76-
7741/** Gets a node where string values that flow to the node are interpreted as regular expressions. */
7842DataFlow:: Node stringSink ( ) {
7943 result instanceof RE:: RegExpInterpretation:: Range and
@@ -91,70 +55,174 @@ DataFlow::Node stringSink() {
9155/** Gets a node where regular expressions that flow to the node are used. */
9256DataFlow:: Node regSink ( ) { result = any ( RegexExecution exec ) .getRegex ( ) }
9357
94- /** Gets a node that is reachable by type-tracking from any string or regular expression. */
95- DataFlow:: LocalSourceNode forward ( TypeTracker t ) {
96- t .start ( ) and
97- result = [ strStart ( ) , regStart ( ) ]
98- or
99- exists ( TypeTracker t2 | result = forward ( t2 ) .track ( t2 , t ) )
100- or
101- exists ( TypeTracker t2 | t2 = t .continue ( ) | step ( forward ( t2 ) .getALocalUse ( ) , result , _, _) )
58+ private signature module TypeTrackInputSig {
59+ DataFlow:: LocalSourceNode start ( TypeTracker t , DataFlow:: Node start ) ;
60+
61+ predicate end ( DataFlow:: Node n ) ;
62+
63+ predicate additionalStep ( DataFlow:: Node nodeFrom , DataFlow:: LocalSourceNode nodeTo ) ;
10264}
10365
10466/**
105- * Gets a node that is backwards reachable from any regular expression use ,
106- * where that use is reachable by type- tracking from any string or regular expression .
67+ * Provides a version of type tracking where we first prune for reachable nodes ,
68+ * before doing the type tracking computation .
10769 */
108- DataFlow:: LocalSourceNode backwards ( TypeBackTracker t ) {
109- t .start ( ) and
110- result .flowsTo ( [ stringSink ( ) , regSink ( ) ] ) and
111- result = forward ( TypeTracker:: end ( ) )
112- or
113- exists ( TypeBackTracker t2 | result = backwards ( t2 ) .backtrack ( t2 , t ) )
114- or
115- exists ( TypeBackTracker t2 | t2 = t .continue ( ) | step ( result .getALocalUse ( ) , backwards ( t2 ) , _, _) )
70+ private module TypeTrack< TypeTrackInputSig Input> {
71+ private predicate additionalStep (
72+ DataFlow:: LocalSourceNode nodeFrom , DataFlow:: LocalSourceNode nodeTo
73+ ) {
74+ Input:: additionalStep ( nodeFrom .getALocalUse ( ) , nodeTo )
75+ }
76+
77+ /** Gets a node that is forwards reachable by type-tracking. */
78+ pragma [ nomagic]
79+ private DataFlow:: LocalSourceNode forward ( TypeTracker t ) {
80+ result = Input:: start ( t , _)
81+ or
82+ exists ( TypeTracker t2 | result = forward ( t2 ) .track ( t2 , t ) )
83+ or
84+ exists ( TypeTracker t2 | t2 = t .continue ( ) | additionalStep ( forward ( t2 ) , result ) )
85+ }
86+
87+ bindingset [ result , tbt]
88+ pragma [ inline_late]
89+ pragma [ noopt]
90+ private DataFlow:: LocalSourceNode forwardLateInline ( TypeBackTracker tbt ) {
91+ exists ( TypeTracker tt |
92+ result = forward ( tt ) and
93+ tt = tbt .getACompatibleTypeTracker ( )
94+ )
95+ }
96+
97+ /** Gets a node that is backwards reachable by type-tracking. */
98+ pragma [ nomagic]
99+ private DataFlow:: LocalSourceNode backwards ( TypeBackTracker t ) {
100+ result = forwardLateInline ( t ) and
101+ (
102+ t .start ( ) and
103+ Input:: end ( result .getALocalUse ( ) )
104+ or
105+ exists ( TypeBackTracker t2 | result = backwards ( t2 ) .backtrack ( t2 , t ) )
106+ or
107+ exists ( TypeBackTracker t2 | t2 = t .continue ( ) | additionalStep ( result , backwards ( t2 ) ) )
108+ )
109+ }
110+
111+ bindingset [ result , tt]
112+ pragma [ inline_late]
113+ pragma [ noopt]
114+ private DataFlow:: LocalSourceNode backwardsInlineLate ( TypeTracker tt ) {
115+ exists ( TypeBackTracker tbt |
116+ result = backwards ( tbt ) and
117+ tt = tbt .getACompatibleTypeTracker ( )
118+ )
119+ }
120+
121+ /** Holds if `n` is forwards and backwards reachable with type tracker `t`. */
122+ pragma [ nomagic]
123+ private predicate reached ( DataFlow:: LocalSourceNode n , TypeTracker t ) {
124+ n = forward ( t ) and
125+ n = backwardsInlineLate ( t )
126+ }
127+
128+ pragma [ nomagic]
129+ private TypeTracker stepReached (
130+ TypeTracker t , DataFlow:: LocalSourceNode nodeFrom , DataFlow:: LocalSourceNode nodeTo
131+ ) {
132+ exists ( StepSummary summary |
133+ StepSummary:: step ( nodeFrom , nodeTo , summary ) and
134+ reached ( nodeFrom , t ) and
135+ reached ( nodeTo , result ) and
136+ result = t .append ( summary )
137+ )
138+ or
139+ additionalStep ( nodeFrom , nodeTo ) and
140+ reached ( nodeFrom , pragma [ only_bind_into ] ( t ) ) and
141+ reached ( nodeTo , pragma [ only_bind_into ] ( t ) ) and
142+ result = t .continue ( )
143+ }
144+
145+ /** Gets a node that has been tracked from the start node `start`. */
146+ DataFlow:: LocalSourceNode track ( DataFlow:: Node start , TypeTracker t ) {
147+ t .start ( ) and
148+ result = Input:: start ( t , start ) and
149+ reached ( result , t )
150+ or
151+ exists ( TypeTracker t2 | t = stepReached ( t2 , track ( start , t2 ) , result ) )
152+ }
153+ }
154+
155+ /** Holds if `inputStr` is compiled to a regular expression that is returned at `call`. */
156+ pragma [ nomagic]
157+ private predicate regFromString ( DataFlow:: LocalSourceNode inputStr , DataFlow:: CallNode call ) {
158+ exists ( DataFlow:: Node mid |
159+ inputStr .flowsTo ( mid ) and
160+ call = API:: getTopLevelMember ( "Regexp" ) .getAMethodCall ( [ "compile" , "new" ] ) and
161+ mid = call .getArgument ( 0 )
162+ )
163+ }
164+
165+ private module StringTypeTrackInput implements TypeTrackInputSig {
166+ DataFlow:: LocalSourceNode start ( TypeTracker t , DataFlow:: Node start ) {
167+ start = strStart ( ) and t .start ( ) and result = start
168+ }
169+
170+ predicate end ( DataFlow:: Node n ) {
171+ n = stringSink ( ) or
172+ regFromString ( n , _)
173+ }
174+
175+ predicate additionalStep ( DataFlow:: Node nodeFrom , DataFlow:: LocalSourceNode nodeTo ) {
176+ // include taint flow through `String` summaries
177+ TaintTrackingPrivate:: summaryThroughStepTaint ( nodeFrom , nodeTo ,
178+ any ( String:: SummarizedCallable c ) )
179+ or
180+ // string concatenations, and
181+ exists ( CfgNodes:: ExprNodes:: OperationCfgNode op |
182+ op = nodeTo .asExpr ( ) and
183+ op .getAnOperand ( ) = nodeFrom .asExpr ( ) and
184+ op .getExpr ( ) .( Ast:: BinaryOperation ) .getOperator ( ) = "+"
185+ )
186+ or
187+ // string interpolations
188+ nodeFrom .asExpr ( ) =
189+ nodeTo .asExpr ( ) .( CfgNodes:: ExprNodes:: StringlikeLiteralCfgNode ) .getAComponent ( )
190+ }
116191}
117192
118193/**
119194 * Gets a node that has been tracked from the string constant `start` to some node.
120195 * This is used to figure out where `start` is evaluated as a regular expression against an input string,
121196 * or where `start` is compiled into a regular expression.
122197 */
123- private DataFlow:: LocalSourceNode trackStrings ( DataFlow:: Node start , TypeTracker t ) {
124- result = backwards ( _) and
125- (
198+ private predicate trackStrings = TypeTrack< StringTypeTrackInput > :: track / 2 ;
199+
200+ /** Holds if `strConst` flows to a regex compilation (tracked by `t`), where the resulting regular expression is stored in `reg`. */
201+ pragma [ nomagic]
202+ private predicate regFromStringStart ( DataFlow:: Node strConst , TypeTracker t , DataFlow:: CallNode reg ) {
203+ regFromString ( trackStrings ( strConst , t ) , reg ) and
204+ exists ( t .continue ( ) )
205+ }
206+
207+ private module RegTypeTrackInput implements TypeTrackInputSig {
208+ DataFlow:: LocalSourceNode start ( TypeTracker t , DataFlow:: Node start ) {
209+ start = regStart ( ) and
126210 t .start ( ) and
127- start = result and
128- result = strStart ( )
211+ result = start
129212 or
130- exists ( TypeTracker t2 | result = trackStrings ( start , t2 ) .track ( t2 , t ) )
131- or
132- // an additional step from string to string
133- exists ( TypeTracker t2 | t2 = t .continue ( ) |
134- step ( trackStrings ( start , t2 ) .getALocalUse ( ) , result , "string" , "string" )
135- )
136- )
213+ regFromStringStart ( start , t , result )
214+ }
215+
216+ predicate end ( DataFlow:: Node n ) { n = regSink ( ) }
217+
218+ predicate additionalStep ( DataFlow:: Node nodeFrom , DataFlow:: LocalSourceNode nodeTo ) { none ( ) }
137219}
138220
139221/**
140222 * Gets a node that has been tracked from the regular expression `start` to some node.
141223 * This is used to figure out where `start` is executed against an input string.
142224 */
143- private DataFlow:: LocalSourceNode trackRegs ( DataFlow:: Node start , TypeTracker t ) {
144- result = backwards ( _) and
145- (
146- t .start ( ) and
147- start = result and
148- result = regStart ( )
149- or
150- exists ( TypeTracker t2 | result = trackRegs ( start , t2 ) .track ( t2 , t ) )
151- or
152- // an additional step where a string is converted to a regular expression
153- exists ( TypeTracker t2 | t2 = t .continue ( ) |
154- step ( trackStrings ( start , t2 ) .getALocalUse ( ) , result , "string" , "reg" )
155- )
156- )
157- }
225+ private predicate trackRegs = TypeTrack< RegTypeTrackInput > :: track / 2 ;
158226
159227/** Gets a node that references a regular expression. */
160228private DataFlow:: LocalSourceNode trackRegexpType ( TypeTracker t ) {
0 commit comments