@@ -13,87 +13,53 @@ private class ExploitableStringLiteral extends StringLiteral {
1313 ExploitableStringLiteral ( ) { this .getValue ( ) .matches ( [ "%+%" , "%*%" , "%{%}%" ] ) }
1414}
1515
16- private class RegexCompileFlowConf extends DataFlow2:: Configuration {
17- RegexCompileFlowConf ( ) { this = "RegexCompileFlowConfig" }
18-
19- override predicate isSource ( DataFlow:: Node node ) {
20- node .asExpr ( ) instanceof ExploitableStringLiteral
21- }
22-
23- override predicate isSink ( DataFlow:: Node node ) {
24- sinkNode ( node , [ "regex-compile" , "regex-compile-match" , "regex-compile-find" ] )
25- }
26-
27- override predicate isBarrier ( DataFlow:: Node node ) {
28- node .getEnclosingCallable ( ) .getDeclaringType ( ) instanceof NonSecurityTestClass
29- }
30- }
31-
32- /**
33- * Holds if `s` is used as a regex, with the mode `mode` (if known).
34- * If regex mode is not known, `mode` will be `"None"`.
35- *
36- * As an optimisation, only regexes containing an infinite repitition quatifier (`+`, `*`, or `{x,}`)
37- * and therefore may be relevant for ReDoS queries are considered.
38- */
39- predicate usedAsRegex ( StringLiteral s , string mode , boolean match_full_string ) {
40- exists ( DataFlow:: Node sink |
41- any ( RegexCompileFlowConf c ) .hasFlow ( DataFlow2:: exprNode ( s ) , sink ) and
42- mode = "None" and // TODO: proper mode detection
43- ( if matchesFullString ( sink ) then match_full_string = true else match_full_string = false )
44- )
45- }
46-
4716/**
48- * Holds if the regex that flows to `sink` is used to match against a full string,
49- * as though it was implicitly surrounded by ^ and $.
17+ * Holds if `kind` is an external sink kind that is relevant for regex flow.
18+ * `full` is true if sinks with this kind match against the full string of its input.
19+ * `strArg` is the index of the argument to methods with this sink kind that contan the string to be matched against,
20+ * where -1 is the qualifier; or -2 if no such argument exists.
5021 */
51- private predicate matchesFullString ( DataFlow:: Node sink ) {
52- sinkNode ( sink , "regex-compile-match" )
53- or
54- exists ( DataFlow:: Node matchSource , RegexCompileToMatchConf conf |
55- matchSource .asExpr ( ) .( MethodAccess ) .getAnArgument ( ) = sink .asExpr ( ) and
56- conf .hasFlow ( matchSource , _)
22+ private predicate regexSinkKindInfo ( string kind , boolean full , int strArg ) {
23+ sinkModel ( _, _, _, _, _, _, _, kind ) and
24+ exists ( string fullStr , string strArgStr |
25+ (
26+ full = true and fullStr = "f"
27+ or
28+ full = false and fullStr = ""
29+ ) and
30+ (
31+ strArgStr .toInt ( ) = strArg
32+ or
33+ strArg = - 2 and
34+ strArgStr = ""
35+ )
36+ |
37+ kind = "regex-use[" + fullStr + strArgStr + "]"
5738 )
5839}
5940
60- private class RegexCompileToMatchConf extends DataFlow2 :: Configuration {
61- RegexCompileToMatchConf ( ) { this = "RegexCompileToMatchConfig" }
62-
63- override predicate isSource ( DataFlow :: Node node ) { sourceNode ( node , "regex-compile" ) }
41+ /** A sink that is relevant for regex flow. */
42+ private class RegexFlowSink extends DataFlow :: Node {
43+ boolean full ;
44+ int strArg ;
6445
65- override predicate isSink ( DataFlow:: Node node ) { sinkNode ( node , "regex-match" ) }
66-
67- override predicate isAdditionalFlowStep ( DataFlow:: Node node1 , DataFlow:: Node node2 ) {
68- exists ( MethodAccess ma | node2 .asExpr ( ) = ma and node1 .asExpr ( ) = ma .getQualifier ( ) |
69- ma .getMethod ( ) .hasQualifiedName ( "java.util.regex" , "Pattern" , "matcher" )
46+ RegexFlowSink ( ) {
47+ exists ( string kind |
48+ regexSinkKindInfo ( kind , full , strArg ) and
49+ sinkNode ( this , kind )
7050 )
7151 }
72- }
73-
74- /**
75- * A method access that can match a regex against a string
76- */
77- abstract class RegexMatchMethodAccess extends MethodAccess {
78- string package ;
79- string type ;
80- string name ;
81- int regexArg ;
82- int stringArg ;
83- Method m ;
84-
85- RegexMatchMethodAccess ( ) {
86- this .getMethod ( ) .getSourceDeclaration ( ) .overrides * ( m ) and
87- m .hasQualifiedName ( package , type , name ) and
88- regexArg in [ - 1 .. m .getNumberOfParameters ( ) - 1 ] and
89- stringArg in [ - 1 .. m .getNumberOfParameters ( ) - 1 ]
90- }
9152
92- /** Gets the argument of this call that the regex to be matched against flows into . */
93- Expr getRegexArg ( ) { result = argOf ( this , regexArg ) }
53+ /** Holds if a regex that flows here is matched against a full string (rather than a substring) . */
54+ predicate matchesFullString ( ) { full = true }
9455
95- /** Gets the argument of this call that the string being matched flows into. */
96- Expr getStringArg ( ) { result = argOf ( this , stringArg ) }
56+ /** Gets the string expression that a regex that flows here is matched against, if any. */
57+ Expr getStringArgument ( ) {
58+ exists ( MethodAccess ma |
59+ this .asExpr ( ) = argOf ( ma , _) and
60+ result = argOf ( ma , strArg )
61+ )
62+ }
9763}
9864
9965private Expr argOf ( MethodAccess ma , int arg ) {
@@ -115,35 +81,7 @@ class RegexAdditionalFlowStep extends Unit {
11581 abstract predicate step ( DataFlow:: Node node1 , DataFlow:: Node node2 ) ;
11682}
11783
118- // TODO: can this be done with the models-as-data framework?
119- private class JdkRegexMatchMethodAccess extends RegexMatchMethodAccess {
120- JdkRegexMatchMethodAccess ( ) {
121- package = "java.util.regex" and
122- type = "Pattern" and
123- (
124- name = "matcher" and regexArg = - 1 and stringArg = 0
125- or
126- name = "matches" and regexArg = 0 and stringArg = 1
127- or
128- name = "split" and regexArg = - 1 and stringArg = 0
129- or
130- name = "splitAsStream" and regexArg = - 1 and stringArg = 0
131- )
132- or
133- package = "java.lang" and
134- type = "String" and
135- name = [ "matches" , "split" , "replaceAll" , "replaceFirst" ] and
136- regexArg = 0 and
137- stringArg = - 1
138- or
139- package = "java.util.function" and
140- type = "Predicate" and
141- name = "test" and
142- regexArg = - 1 and
143- stringArg = 0
144- }
145- }
146-
84+ // TODO: This may be able to be done with models-as-data if query-specific flow steps beome supported.
14785private class JdkRegexFlowStep extends RegexAdditionalFlowStep {
14886 override predicate step ( DataFlow:: Node node1 , DataFlow:: Node node2 ) {
14987 exists ( MethodAccess ma , Method m , string package , string type , string name , int arg |
@@ -155,7 +93,7 @@ private class JdkRegexFlowStep extends RegexAdditionalFlowStep {
15593 package = "java.util.regex" and
15694 type = "Pattern" and
15795 (
158- name = [ "asMatchPredicate" , "asPredicate" ] and
96+ name = [ "asMatchPredicate" , "asPredicate" , "matcher" ] and
15997 arg = - 1
16098 or
16199 name = "compile" and
@@ -170,16 +108,6 @@ private class JdkRegexFlowStep extends RegexAdditionalFlowStep {
170108 }
171109}
172110
173- private class GuavaRegexMatchMethodAccess extends RegexMatchMethodAccess {
174- GuavaRegexMatchMethodAccess ( ) {
175- package = "com.google.common.base" and
176- regexArg = - 1 and
177- stringArg = 0 and
178- type = [ "Splitter" , "Splitter$MapSplitter" ] and
179- name = [ "split" , "splitToList" ]
180- }
181- }
182-
183111private class GuavaRegexFlowStep extends RegexAdditionalFlowStep {
184112 override predicate step ( DataFlow:: Node node1 , DataFlow:: Node node2 ) {
185113 exists ( MethodAccess ma , Method m , string package , string type , string name , int arg |
@@ -209,20 +137,46 @@ private class GuavaRegexFlowStep extends RegexAdditionalFlowStep {
209137 }
210138}
211139
212- private class RegexMatchFlowConf extends DataFlow2:: Configuration {
213- RegexMatchFlowConf ( ) { this = "RegexMatchFlowConf " }
140+ private class RegexFlowConf extends DataFlow2:: Configuration {
141+ RegexFlowConf ( ) { this = "RegexFlowConfig " }
214142
215- override predicate isSource ( DataFlow:: Node src ) {
216- src .asExpr ( ) instanceof ExploitableStringLiteral
143+ override predicate isSource ( DataFlow:: Node node ) {
144+ node .asExpr ( ) instanceof ExploitableStringLiteral
217145 }
218146
219- override predicate isSink ( DataFlow:: Node sink ) {
220- exists ( RegexMatchMethodAccess ma | sink .asExpr ( ) = ma .getRegexArg ( ) )
221- }
147+ override predicate isSink ( DataFlow:: Node node ) { node instanceof RegexFlowSink }
222148
223149 override predicate isAdditionalFlowStep ( DataFlow:: Node node1 , DataFlow:: Node node2 ) {
224150 any ( RegexAdditionalFlowStep s ) .step ( node1 , node2 )
225151 }
152+
153+ override predicate isBarrier ( DataFlow:: Node node ) {
154+ node .getEnclosingCallable ( ) .getDeclaringType ( ) instanceof NonSecurityTestClass
155+ }
156+ }
157+
158+ /**
159+ * Holds if `regex` is used as a regex, with the mode `mode` (if known).
160+ * If regex mode is not known, `mode` will be `"None"`.
161+ *
162+ * As an optimisation, only regexes containing an infinite repitition quatifier (`+`, `*`, or `{x,}`)
163+ * and therefore may be relevant for ReDoS queries are considered.
164+ */
165+ predicate usedAsRegex ( StringLiteral regex , string mode , boolean match_full_string ) {
166+ any ( RegexFlowConf c ) .hasFlow ( DataFlow2:: exprNode ( regex ) , _) and
167+ mode = "None" and // TODO: proper mode detection
168+ ( if matchesFullString ( regex ) then match_full_string = true else match_full_string = false )
169+ }
170+
171+ /**
172+ * Holds if `regex` is used as a regular expression that is matched against a full string,
173+ * as though it was implicitly surrounded by ^ and $.
174+ */
175+ private predicate matchesFullString ( StringLiteral regex ) {
176+ exists ( RegexFlowConf c , RegexFlowSink sink |
177+ sink .matchesFullString ( ) and
178+ c .hasFlow ( DataFlow2:: exprNode ( regex ) , sink )
179+ )
226180}
227181
228182/**
@@ -232,12 +186,8 @@ private class RegexMatchFlowConf extends DataFlow2::Configuration {
232186 * and therefore may be relevant for ReDoS queries are considered.
233187 */
234188predicate regexMatchedAgainst ( StringLiteral regex , Expr str ) {
235- exists (
236- DataFlow:: Node src , DataFlow:: Node sink , RegexMatchMethodAccess ma , RegexMatchFlowConf conf
237- |
238- src .asExpr ( ) = regex and
239- sink .asExpr ( ) = ma .getRegexArg ( ) and
240- conf .hasFlow ( src , sink ) and
241- str = ma .getStringArg ( )
189+ exists ( RegexFlowConf c , RegexFlowSink sink |
190+ str = sink .getStringArgument ( ) and
191+ c .hasFlow ( DataFlow2:: exprNode ( regex ) , sink )
242192 )
243193}
0 commit comments