Skip to content

Commit ec9b65e

Browse files
authored
Merge pull request #2369 from max-schaefer/js/odasa-8179
Approved by esbena
2 parents 46b6e6d + a3a46bf commit ec9b65e

File tree

8 files changed

+227
-103
lines changed

8 files changed

+227
-103
lines changed

change-notes/1.24/analysis-javascript.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
| **Query** | **Expected impact** | **Change** |
1717
|--------------------------------|------------------------------|---------------------------------------------------------------------------|
1818
| Clear-text logging of sensitive information (`js/clear-text-logging`) | More results | More results involving `process.env` and indirect calls to logging methods are recognized. |
19+
| Incomplete string escaping or encoding (`js/incomplete-sanitization`) | Fewer false positive results | This query now recognizes additional cases where a single replacement is likely to be intentional. |
1920
| Unbound event handler receiver (`js/unbound-event-handler-receiver`) | Fewer false positive results | This query now recognizes additional ways event handler receivers can be bound. |
2021

2122
## Changes to libraries

javascript/ql/src/Security/CWE-116/DoubleEscaping.ql

Lines changed: 28 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -15,76 +15,51 @@
1515

1616
import javascript
1717

18-
/**
19-
* Holds if `rl` is a simple constant, which is bound to the result of the predicate.
20-
*
21-
* For example, `/a/g` has string value `"a"` and `/abc/` has string value `"abc"`,
22-
* while `/ab?/` and `/a(?=b)/` do not have a string value.
23-
*
24-
* Flags are ignored, so `/a/i` is still considered to have string value `"a"`,
25-
* even though it also matches `"A"`.
26-
*
27-
* Note the somewhat subtle use of monotonic aggregate semantics, which makes the
28-
* `strictconcat` fail if one of the children of the root is not a constant (legacy
29-
* semantics would simply skip such children).
30-
*/
31-
language[monotonicAggregates]
32-
string getStringValue(RegExpLiteral rl) {
33-
exists(RegExpTerm root | root = rl.getRoot() |
34-
result = root.(RegExpConstant).getValue()
35-
or
36-
result = strictconcat(RegExpTerm ch, int i |
37-
ch = root.(RegExpSequence).getChild(i)
38-
|
39-
ch.(RegExpConstant).getValue() order by i
40-
)
41-
)
42-
}
43-
4418
/**
4519
* Gets a predecessor of `nd` that is not an SSA phi node.
4620
*/
4721
DataFlow::Node getASimplePredecessor(DataFlow::Node nd) {
4822
result = nd.getAPredecessor() and
49-
not nd.(DataFlow::SsaDefinitionNode).getSsaVariable().getDefinition() instanceof SsaPhiNode
23+
not exists(SsaDefinition ssa |
24+
ssa = nd.(DataFlow::SsaDefinitionNode).getSsaVariable().getDefinition()
25+
|
26+
ssa instanceof SsaPhiNode or
27+
ssa instanceof SsaVariableCapture
28+
)
5029
}
5130

5231
/**
5332
* Holds if `metachar` is a meta-character that is used to escape special characters
5433
* into a form described by regular expression `regex`.
5534
*/
5635
predicate escapingScheme(string metachar, string regex) {
57-
metachar = "&" and regex = "&.*;"
36+
metachar = "&" and regex = "&.+;"
5837
or
59-
metachar = "%" and regex = "%.*"
38+
metachar = "%" and regex = "%.+"
6039
or
61-
metachar = "\\" and regex = "\\\\.*"
40+
metachar = "\\" and regex = "\\\\.+"
6241
}
6342

6443
/**
6544
* A call to `String.prototype.replace` that replaces all instances of a pattern.
6645
*/
67-
class Replacement extends DataFlow::Node {
68-
RegExpLiteral pattern;
69-
46+
class Replacement extends StringReplaceCall {
7047
Replacement() {
71-
exists(DataFlow::MethodCallNode mcn | this = mcn |
72-
mcn.getMethodName() = "replace" and
73-
pattern.flow().(DataFlow::SourceNode).flowsTo(mcn.getArgument(0)) and
74-
mcn.getNumArgument() = 2 and
75-
pattern.isGlobal()
76-
)
48+
isGlobal()
7749
}
7850

7951
/**
80-
* Holds if this replacement replaces the string `input` with `output`.
52+
* Gets the input of this replacement.
8153
*/
82-
predicate replaces(string input, string output) {
83-
exists(DataFlow::MethodCallNode mcn |
84-
mcn = this and
85-
input = getStringValue(pattern) and
86-
output = mcn.getArgument(1).getStringValue()
87-
)
54+
DataFlow::Node getInput() {
55+
result = this.getReceiver()
56+
}
57+
58+
/**
59+
* Gets the output of this replacement.
60+
*/
61+
DataFlow::SourceNode getOutput() {
62+
result = this
8863
}
8964

9065
/**
@@ -119,7 +94,7 @@ class Replacement extends DataFlow::Node {
11994
* Gets the previous replacement in this chain of replacements.
12095
*/
12196
Replacement getPreviousReplacement() {
122-
result = getASimplePredecessor*(this.(DataFlow::MethodCallNode).getReceiver())
97+
result.getOutput() = getASimplePredecessor*(getInput())
12398
}
12499

125100
/**
@@ -130,7 +105,9 @@ class Replacement extends DataFlow::Node {
130105
exists(Replacement pred | pred = this.getPreviousReplacement() |
131106
if pred.escapes(_, metachar)
132107
then result = pred
133-
else result = pred.getAnEarlierEscaping(metachar)
108+
else (
109+
not pred.unescapes(metachar, _) and result = pred.getAnEarlierEscaping(metachar)
110+
)
134111
)
135112
}
136113

@@ -142,7 +119,9 @@ class Replacement extends DataFlow::Node {
142119
exists(Replacement succ | this = succ.getPreviousReplacement() |
143120
if succ.unescapes(metachar, _)
144121
then result = succ
145-
else result = succ.getALaterUnescaping(metachar)
122+
else (
123+
not succ.escapes(_, metachar) and result = succ.getALaterUnescaping(metachar)
124+
)
146125
)
147126
}
148127
}

javascript/ql/src/Security/CWE-116/IncompleteSanitization.ql

Lines changed: 51 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -20,53 +20,47 @@ import javascript
2020
string metachar() { result = "'\"\\&<>\n\r\t*|{}[]%$".charAt(_) }
2121

2222
/** Gets a string matched by `e` in a `replace` call. */
23-
string getAMatchedString(Expr e) {
24-
result = getAMatchedConstant(e.(RegExpLiteral).getRoot()).getValue()
23+
string getAMatchedString(DataFlow::Node e) {
24+
result = e.(DataFlow::RegExpLiteralNode).getRoot().getAMatchedString()
2525
or
2626
result = e.getStringValue()
2727
}
2828

29-
/** Gets a constant matched by `t`. */
30-
RegExpConstant getAMatchedConstant(RegExpTerm t) {
31-
result = t
32-
or
33-
result = getAMatchedConstant(t.(RegExpAlt).getAlternative())
34-
or
35-
result = getAMatchedConstant(t.(RegExpGroup).getAChild())
36-
or
37-
exists(RegExpCharacterClass recc | recc = t and not recc.isInverted() |
38-
result = getAMatchedConstant(recc.getAChild())
39-
)
40-
}
41-
4229
/** Holds if `t` is simple, that is, a union of constants. */
4330
predicate isSimple(RegExpTerm t) {
4431
t instanceof RegExpConstant
4532
or
4633
isSimple(t.(RegExpGroup).getAChild())
4734
or
48-
(
49-
t instanceof RegExpAlt
50-
or
51-
t instanceof RegExpCharacterClass and not t.(RegExpCharacterClass).isInverted()
52-
) and
35+
isSimpleCharacterClass(t)
36+
or
37+
isSimpleAlt(t)
38+
}
39+
40+
/** Holds if `t` is a non-inverted character class that contains no ranges. */
41+
predicate isSimpleCharacterClass(RegExpCharacterClass t) {
42+
not t.isInverted() and
43+
forall(RegExpTerm ch | ch = t.getAChild() | isSimple(ch))
44+
}
45+
46+
/** Holds if `t` is an alternation of simple terms. */
47+
predicate isSimpleAlt(RegExpAlt t) {
5348
forall(RegExpTerm ch | ch = t.getAChild() | isSimple(ch))
5449
}
5550

5651
/**
5752
* Holds if `mce` is of the form `x.replace(re, new)`, where `re` is a global
5853
* regular expression and `new` prefixes the matched string with a backslash.
5954
*/
60-
predicate isBackslashEscape(MethodCallExpr mce, RegExpLiteral re) {
61-
mce.getMethodName() = "replace" and
62-
re.flow().(DataFlow::SourceNode).flowsToExpr(mce.getArgument(0)) and
63-
re.isGlobal() and
64-
exists(string new | new = mce.getArgument(1).getStringValue() |
65-
// `new` is `\$&`, `\$1` or similar
66-
new.regexpMatch("\\\\\\$(&|\\d)")
55+
predicate isBackslashEscape(StringReplaceCall mce, DataFlow::RegExpLiteralNode re) {
56+
mce.isGlobal() and
57+
re = mce.getRegExp() and
58+
(
59+
// replacement with `\$&`, `\$1` or similar
60+
mce.getRawReplacement().getStringValue().regexpMatch("\\\\\\$(&|\\d)")
6761
or
68-
// `new` is `\c`, where `c` is a constant matched by `re`
69-
new.regexpMatch("\\\\\\Q" + getAMatchedString(re) + "\\E")
62+
// replacement of `c` with `\c`
63+
exists(string c | mce.replaces(c, "\\" + c))
7064
)
7165
}
7266

@@ -78,7 +72,7 @@ predicate allBackslashesEscaped(DataFlow::Node nd) {
7872
nd = DataFlow::globalVarRef("JSON").getAMemberCall("stringify")
7973
or
8074
// check whether `nd` itself escapes backslashes
81-
exists(RegExpLiteral rel | isBackslashEscape(nd.asExpr(), rel) |
75+
exists(DataFlow::RegExpLiteralNode rel | isBackslashEscape(nd, rel) |
8276
// if it's a complex regexp, we conservatively assume that it probably escapes backslashes
8377
not isSimple(rel.getRoot()) or
8478
getAMatchedString(rel) = "\\"
@@ -104,10 +98,8 @@ predicate allBackslashesEscaped(DataFlow::Node nd) {
10498
/**
10599
* Holds if `repl` looks like a call to "String.prototype.replace" that deliberately removes the first occurrence of `str`.
106100
*/
107-
predicate removesFirstOccurence(DataFlow::MethodCallNode repl, string str) {
108-
repl.getMethodName() = "replace" and
109-
repl.getArgument(0).getStringValue() = str and
110-
repl.getArgument(1).getStringValue() = ""
101+
predicate removesFirstOccurence(StringReplaceCall repl, string str) {
102+
not exists(repl.getRegExp()) and repl.replaces(str, "")
111103
}
112104

113105
/**
@@ -134,25 +126,30 @@ predicate isDelimiterUnwrapper(
134126
}
135127

136128
/*
137-
* Holds if `repl` is a standalone use of `String.prototype.replace` to remove a single newline.
129+
* Holds if `repl` is a standalone use of `String.prototype.replace` to remove a single newline,
130+
* dollar or percent character.
131+
*
132+
* This is often done on inputs that are known to only contain a single instance of the character,
133+
* such as output from a shell command that is known to end with a single newline, or strings
134+
* like "$1.20" or "50%".
138135
*/
139136

140-
predicate removesTrailingNewLine(DataFlow::MethodCallNode repl) {
141-
repl.getMethodName() = "replace" and
142-
repl.getArgument(0).mayHaveStringValue("\n") and
143-
repl.getArgument(1).mayHaveStringValue("") and
144-
not exists(DataFlow::MethodCallNode other | other.getMethodName() = "replace" |
145-
repl.getAMethodCall() = other or
146-
other.getAMethodCall() = repl
137+
predicate whitelistedRemoval(StringReplaceCall repl) {
138+
not repl.isGlobal() and
139+
exists(string s | s = "\n" or s = "%" or s = "$" |
140+
repl.replaces(s, "") and
141+
not exists(StringReplaceCall other |
142+
repl.getAMethodCall() = other or
143+
other.getAMethodCall() = repl
144+
)
147145
)
148146
}
149147

150-
from MethodCallExpr repl, Expr old, string msg
148+
from StringReplaceCall repl, DataFlow::Node old, string msg
151149
where
152-
repl.getMethodName() = "replace" and
153-
(old = repl.getArgument(0) or old.flow().(DataFlow::SourceNode).flowsToExpr(repl.getArgument(0))) and
150+
(old = repl.getArgument(0) or old = repl.getRegExp()) and
154151
(
155-
not old.(RegExpLiteral).isGlobal() and
152+
not repl.isGlobal() and
156153
msg = "This replaces only the first occurrence of " + old + "." and
157154
// only flag if this is likely to be a sanitizer or URL encoder or decoder
158155
exists(string m | m = getAMatchedString(old) |
@@ -171,17 +168,17 @@ where
171168
(m = ".." or m = "/.." or m = "../" or m = "/../")
172169
) and
173170
// don't flag replace operations in a loop
174-
not DataFlow::valueNode(repl.getReceiver()) = DataFlow::valueNode(repl).getASuccessor+() and
171+
not repl.getReceiver() = repl.getASuccessor+() and
175172
// dont' flag unwrapper
176-
not isDelimiterUnwrapper(repl.flow(), _) and
177-
not isDelimiterUnwrapper(_, repl.flow()) and
178-
// dont' flag the removal of trailing newlines
179-
not removesTrailingNewLine(repl.flow())
173+
not isDelimiterUnwrapper(repl, _) and
174+
not isDelimiterUnwrapper(_, repl) and
175+
// don't flag replacements of certain characters with whitespace
176+
not whitelistedRemoval(repl)
180177
or
181-
exists(RegExpLiteral rel |
178+
exists(DataFlow::RegExpLiteralNode rel |
182179
isBackslashEscape(repl, rel) and
183-
not allBackslashesEscaped(DataFlow::valueNode(repl)) and
180+
not allBackslashesEscaped(repl) and
184181
msg = "This does not escape backslash characters in the input."
185182
)
186183
)
187-
select repl.getCallee(), msg
184+
select repl.getCalleeNode(), msg

javascript/ql/src/semmle/javascript/Regexp.qll

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,23 @@ class RegExpTerm extends Locatable, @regexpterm {
173173
parent.(StringLiteral).flow() instanceof RegExpPatternSource
174174
)
175175
}
176+
177+
/**
178+
* Gets the single string this regular-expression term matches.
179+
*
180+
* This predicate is only defined for (sequences/groups of) constant regular expressions.
181+
* In particular, terms involving zero-width assertions like `^` or `\b` are not considered
182+
* to have a constant value.
183+
*
184+
* Note that this predicate does not take flags of the enclosing regular-expression literal
185+
* into account.
186+
*/
187+
string getConstantValue() { none() }
188+
189+
/**
190+
* Gets a string that is matched by this regular-expression term.
191+
*/
192+
string getAMatchedString() { result = getConstantValue() }
176193
}
177194

178195
/**
@@ -223,6 +240,8 @@ class RegExpConstant extends RegExpTerm, @regexp_constant {
223240
predicate isCharacter() { any() }
224241

225242
override predicate isNullable() { none() }
243+
244+
override string getConstantValue() { result = getValue() }
226245
}
227246

228247
/**
@@ -266,6 +285,8 @@ class RegExpAlt extends RegExpTerm, @regexp_alt {
266285
int getNumAlternative() { result = getNumChild() }
267286

268287
override predicate isNullable() { getAlternative().isNullable() }
288+
289+
override string getAMatchedString() { result = getAlternative().getAMatchedString() }
269290
}
270291

271292
/**
@@ -289,6 +310,21 @@ class RegExpSequence extends RegExpTerm, @regexp_seq {
289310
override predicate isNullable() {
290311
forall(RegExpTerm child | child = getAChild() | child.isNullable())
291312
}
313+
314+
override string getConstantValue() {
315+
result = getConstantValue(0)
316+
}
317+
318+
/**
319+
* Gets the single string matched by the `i`th child and all following children of
320+
* this sequence, if any.
321+
*/
322+
private string getConstantValue(int i) {
323+
i = getNumChild() and
324+
result = ""
325+
or
326+
result = getChild(i).getConstantValue() + getConstantValue(i+1)
327+
}
292328
}
293329

294330
/**
@@ -549,6 +585,10 @@ class RegExpGroup extends RegExpTerm, @regexp_group {
549585
string getName() { isNamedCapture(this, result) }
550586

551587
override predicate isNullable() { getAChild().isNullable() }
588+
589+
override string getConstantValue() { result = getAChild().getConstantValue() }
590+
591+
override string getAMatchedString() { result = getAChild().getAMatchedString() }
552592
}
553593

554594
/**
@@ -734,6 +774,10 @@ class RegExpCharacterClass extends RegExpTerm, @regexp_char_class {
734774
predicate isInverted() { isInverted(this) }
735775

736776
override predicate isNullable() { none() }
777+
778+
override string getAMatchedString() {
779+
not isInverted() and result = getAChild().getAMatchedString()
780+
}
737781
}
738782

739783
/**

0 commit comments

Comments
 (0)