@@ -20,53 +20,47 @@ import javascript
2020string metachar ( ) { result = "'\"\\&<>\n\r\t*|{}[]%$" .charAt ( _) }
2121
2222/** Gets a string matched by `e` in a `replace` call. */
23- string getAMatchedString ( Expr e ) {
24- result = getAMatchedConstant ( e .( RegExpLiteral ) .getRoot ( ) ) . getValue ( )
23+ string getAMatchedString ( DataFlow :: Node e ) {
24+ result = e .( DataFlow :: RegExpLiteralNode ) .getRoot ( ) . getAMatchedString ( )
2525 or
2626 result = e .getStringValue ( )
2727}
2828
29- /** Gets a constant matched by `t`. */
30- RegExpConstant getAMatchedConstant ( RegExpTerm t ) {
31- result = t
32- or
33- result = getAMatchedConstant ( t .( RegExpAlt ) .getAlternative ( ) )
34- or
35- result = getAMatchedConstant ( t .( RegExpGroup ) .getAChild ( ) )
36- or
37- exists ( RegExpCharacterClass recc | recc = t and not recc .isInverted ( ) |
38- result = getAMatchedConstant ( recc .getAChild ( ) )
39- )
40- }
41-
4229/** Holds if `t` is simple, that is, a union of constants. */
4330predicate isSimple ( RegExpTerm t ) {
4431 t instanceof RegExpConstant
4532 or
4633 isSimple ( t .( RegExpGroup ) .getAChild ( ) )
4734 or
48- (
49- t instanceof RegExpAlt
50- or
51- t instanceof RegExpCharacterClass and not t .( RegExpCharacterClass ) .isInverted ( )
52- ) and
35+ isSimpleCharacterClass ( t )
36+ or
37+ isSimpleAlt ( t )
38+ }
39+
40+ /** Holds if `t` is a non-inverted character class that contains no ranges. */
41+ predicate isSimpleCharacterClass ( RegExpCharacterClass t ) {
42+ not t .isInverted ( ) and
43+ forall ( RegExpTerm ch | ch = t .getAChild ( ) | isSimple ( ch ) )
44+ }
45+
46+ /** Holds if `t` is an alternation of simple terms. */
47+ predicate isSimpleAlt ( RegExpAlt t ) {
5348 forall ( RegExpTerm ch | ch = t .getAChild ( ) | isSimple ( ch ) )
5449}
5550
5651/**
5752 * Holds if `mce` is of the form `x.replace(re, new)`, where `re` is a global
5853 * regular expression and `new` prefixes the matched string with a backslash.
5954 */
60- predicate isBackslashEscape ( MethodCallExpr mce , RegExpLiteral re ) {
61- mce .getMethodName ( ) = "replace" and
62- re .flow ( ) .( DataFlow:: SourceNode ) .flowsToExpr ( mce .getArgument ( 0 ) ) and
63- re .isGlobal ( ) and
64- exists ( string new | new = mce .getArgument ( 1 ) .getStringValue ( ) |
65- // `new` is `\$&`, `\$1` or similar
66- new .regexpMatch ( "\\\\\\$(&|\\d)" )
55+ predicate isBackslashEscape ( StringReplaceCall mce , DataFlow:: RegExpLiteralNode re ) {
56+ mce .isGlobal ( ) and
57+ re = mce .getRegExp ( ) and
58+ (
59+ // replacement with `\$&`, `\$1` or similar
60+ mce .getRawReplacement ( ) .getStringValue ( ) .regexpMatch ( "\\\\\\$(&|\\d)" )
6761 or
68- // `new` is `\c`, where `c` is a constant matched by `re `
69- new . regexpMatch ( "\\\\\\Q " + getAMatchedString ( re ) + "\\E" )
62+ // replacement of `c` with `\c `
63+ exists ( string c | mce . replaces ( c , "\\" + c ) )
7064 )
7165}
7266
@@ -78,7 +72,7 @@ predicate allBackslashesEscaped(DataFlow::Node nd) {
7872 nd = DataFlow:: globalVarRef ( "JSON" ) .getAMemberCall ( "stringify" )
7973 or
8074 // check whether `nd` itself escapes backslashes
81- exists ( RegExpLiteral rel | isBackslashEscape ( nd . asExpr ( ) , rel ) |
75+ exists ( DataFlow :: RegExpLiteralNode rel | isBackslashEscape ( nd , rel ) |
8276 // if it's a complex regexp, we conservatively assume that it probably escapes backslashes
8377 not isSimple ( rel .getRoot ( ) ) or
8478 getAMatchedString ( rel ) = "\\"
@@ -104,10 +98,8 @@ predicate allBackslashesEscaped(DataFlow::Node nd) {
10498/**
10599 * Holds if `repl` looks like a call to "String.prototype.replace" that deliberately removes the first occurrence of `str`.
106100 */
107- predicate removesFirstOccurence ( DataFlow:: MethodCallNode repl , string str ) {
108- repl .getMethodName ( ) = "replace" and
109- repl .getArgument ( 0 ) .getStringValue ( ) = str and
110- repl .getArgument ( 1 ) .getStringValue ( ) = ""
101+ predicate removesFirstOccurence ( StringReplaceCall repl , string str ) {
102+ not exists ( repl .getRegExp ( ) ) and repl .replaces ( str , "" )
111103}
112104
113105/**
@@ -134,25 +126,30 @@ predicate isDelimiterUnwrapper(
134126}
135127
136128/*
137- * Holds if `repl` is a standalone use of `String.prototype.replace` to remove a single newline.
129+ * Holds if `repl` is a standalone use of `String.prototype.replace` to remove a single newline,
130+ * dollar or percent character.
131+ *
132+ * This is often done on inputs that are known to only contain a single instance of the character,
133+ * such as output from a shell command that is known to end with a single newline, or strings
134+ * like "$1.20" or "50%".
138135 */
139136
140- predicate removesTrailingNewLine ( DataFlow:: MethodCallNode repl ) {
141- repl .getMethodName ( ) = "replace" and
142- repl .getArgument ( 0 ) .mayHaveStringValue ( "\n" ) and
143- repl .getArgument ( 1 ) .mayHaveStringValue ( "" ) and
144- not exists ( DataFlow:: MethodCallNode other | other .getMethodName ( ) = "replace" |
145- repl .getAMethodCall ( ) = other or
146- other .getAMethodCall ( ) = repl
137+ predicate whitelistedRemoval ( StringReplaceCall repl ) {
138+ not repl .isGlobal ( ) and
139+ exists ( string s | s = "\n" or s = "%" or s = "$" |
140+ repl .replaces ( s , "" ) and
141+ not exists ( StringReplaceCall other |
142+ repl .getAMethodCall ( ) = other or
143+ other .getAMethodCall ( ) = repl
144+ )
147145 )
148146}
149147
150- from MethodCallExpr repl , Expr old , string msg
148+ from StringReplaceCall repl , DataFlow :: Node old , string msg
151149where
152- repl .getMethodName ( ) = "replace" and
153- ( old = repl .getArgument ( 0 ) or old .flow ( ) .( DataFlow:: SourceNode ) .flowsToExpr ( repl .getArgument ( 0 ) ) ) and
150+ ( old = repl .getArgument ( 0 ) or old = repl .getRegExp ( ) ) and
154151 (
155- not old . ( RegExpLiteral ) .isGlobal ( ) and
152+ not repl .isGlobal ( ) and
156153 msg = "This replaces only the first occurrence of " + old + "." and
157154 // only flag if this is likely to be a sanitizer or URL encoder or decoder
158155 exists ( string m | m = getAMatchedString ( old ) |
@@ -171,17 +168,17 @@ where
171168 ( m = ".." or m = "/.." or m = "../" or m = "/../" )
172169 ) and
173170 // don't flag replace operations in a loop
174- not DataFlow :: valueNode ( repl .getReceiver ( ) ) = DataFlow :: valueNode ( repl ) .getASuccessor + ( ) and
171+ not repl .getReceiver ( ) = repl .getASuccessor + ( ) and
175172 // dont' flag unwrapper
176- not isDelimiterUnwrapper ( repl . flow ( ) , _) and
177- not isDelimiterUnwrapper ( _, repl . flow ( ) ) and
178- // dont' flag the removal of trailing newlines
179- not removesTrailingNewLine ( repl . flow ( ) )
173+ not isDelimiterUnwrapper ( repl , _) and
174+ not isDelimiterUnwrapper ( _, repl ) and
175+ // don't flag replacements of certain characters with whitespace
176+ not whitelistedRemoval ( repl )
180177 or
181- exists ( RegExpLiteral rel |
178+ exists ( DataFlow :: RegExpLiteralNode rel |
182179 isBackslashEscape ( repl , rel ) and
183- not allBackslashesEscaped ( DataFlow :: valueNode ( repl ) ) and
180+ not allBackslashesEscaped ( repl ) and
184181 msg = "This does not escape backslash characters in the input."
185182 )
186183 )
187- select repl .getCallee ( ) , msg
184+ select repl .getCalleeNode ( ) , msg
0 commit comments