Skip to content

Commit 1bc73ab

Browse files
author
Esben Sparre Andreasen
committed
JS: address review comments
1 parent 09e7124 commit 1bc73ab

File tree

4 files changed

+34
-23
lines changed

4 files changed

+34
-23
lines changed

javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.qhelp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
Sanitizing untrusted URLs is an important technique for
1010
preventing attacks such as request forgeries and malicious
11-
redirections. Usually, this is done by checking that the host of a URL
11+
redirections. Often, this is done by checking that the host of a URL
1212
is in a set of allowed hosts.
1313

1414
</p>
@@ -56,7 +56,7 @@
5656
an attacker-controlled domain such as <code>wwwXexample.com</code>.
5757

5858
Address this vulnerability by escaping <code>.</code>
59-
appropriately: <code>let regex =/(www|beta|)\.example\.com/</code>.
59+
appropriately: <code>let regex = /(www|beta|)\.example\.com/</code>.
6060

6161
</p>
6262

javascript/ql/src/Security/CWE-020/IncompleteHostnameRegExp.ql

Lines changed: 16 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -12,28 +12,25 @@
1212

1313
import javascript
1414

15-
module IncompleteHostnameRegExpTracking {
16-
17-
/**
18-
* A taint tracking configuration for incomplete hostname regular expressions sources.
19-
*/
20-
class Configuration extends TaintTracking::Configuration {
21-
Configuration() { this = "IncompleteHostnameRegExpTracking" }
22-
23-
override
24-
predicate isSource(DataFlow::Node source) {
25-
isIncompleteHostNameRegExpPattern(source.asExpr().getStringValue(), _)
26-
}
15+
/**
16+
* A taint tracking configuration for incomplete hostname regular expressions sources.
17+
*/
18+
class Configuration extends TaintTracking::Configuration {
19+
Configuration() { this = "IncompleteHostnameRegExpTracking" }
2720

28-
override
29-
predicate isSink(DataFlow::Node sink) {
30-
isInterpretedAsRegExp(sink)
31-
}
21+
override
22+
predicate isSource(DataFlow::Node source) {
23+
isIncompleteHostNameRegExpPattern(source.asExpr().getStringValue(), _)
24+
}
3225

26+
override
27+
predicate isSink(DataFlow::Node sink) {
28+
isInterpretedAsRegExp(sink)
3329
}
3430

3531
}
3632

33+
3734
/**
3835
* Holds if `pattern` is a regular expression pattern for URLs with a host matched by `hostPart`,
3936
* and `pattern` contains a subtle mistake that allows it to match unexpected hosts.
@@ -45,23 +42,23 @@ predicate isIncompleteHostNameRegExpPattern(string pattern, string hostPart) {
4542
// an unescaped single `.`
4643
"(?<!\\\\)[.]" +
4744
// immediately followed by a sequence of subdomains, perhaps with some regex characters mixed in, followed by a known TLD
48-
"([():|?a-z0-9-]+(\\\\)?[.](com|org|edu|gov|uk|net))" +
45+
"([():|?a-z0-9-]+(\\\\)?[.](" + RegExpPatterns::commonTLD() + "))" +
4946
".*", 1)
5047
}
5148

5249
from Expr e, string pattern, string hostPart
5350
where
5451
(
5552
e.(RegExpLiteral).getValue() = pattern or
56-
exists (IncompleteHostnameRegExpTracking::Configuration cfg |
53+
exists (Configuration cfg |
5754
cfg.hasFlow(e.flow(), _) and
5855
e.mayHaveStringValue(pattern)
5956
)
6057
) and
6158
isIncompleteHostNameRegExpPattern(pattern, hostPart)
6259
and
6360
// ignore patterns with capture groups after the TLD
64-
not pattern.regexpMatch("(?i).*[.](com|org|edu|gov|uk|net).*[(][?]:.*[)].*")
61+
not pattern.regexpMatch("(?i).*[.](" + RegExpPatterns::commonTLD() + ").*[(][?]:.*[)].*")
6562

6663

6764
select e, "This regular expression has an unescaped '.' before '" + hostPart + "', so it might match more hosts than expected."

javascript/ql/src/Security/CWE-020/IncompleteUrlSubstringSanitization.ql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ where
2121
substring.mayHaveStringValue(target) and
2222
(
2323
// target contains a domain on a common TLD, and perhaps some other URL components
24-
target.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+(com|org|edu|gov|uk|net)(:[0-9]+)?/?") or
24+
target.regexpMatch("(?i)([a-z]*:?//)?\\.?([a-z0-9-]+\\.)+(" + RegExpPatterns::commonTLD() + ")(:[0-9]+)?/?") or
2525
// target is a HTTP URL to a domain on any TLD
2626
target.regexpMatch("(?i)https?://([a-z0-9-]+\\.)+([a-z]+)(:[0-9]+)?/?")
2727
) and

javascript/ql/src/semmle/javascript/Regexp.qll

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -504,8 +504,22 @@ predicate isInterpretedAsRegExp(DataFlow::Node source) {
504504
methodName = "search" and
505505
source.asExpr() = mce.getArgument(0) and
506506
mce.getNumArgument() = 1 and
507-
// `String.prototype.search` returns a number, so exclude chained accesses
507+
// "search" is a common method name, and so we exclude chained accesses
508+
// because `String.prototype.search` returns a number
508509
not exists(PropAccess p | p.getBase() = mce)
509510
)
510511
)
511512
}
513+
514+
/**
515+
* Provides regular expression patterns.
516+
*/
517+
module RegExpPatterns {
518+
/**
519+
* Gets a pattern that matches common top-level domain names.
520+
*/
521+
string commonTLD() {
522+
// according to ranking by http://google.com/search?q=site:.<<TLD>>
523+
result = "com|org|edu|gov|uk|net|io"
524+
}
525+
}

0 commit comments

Comments
 (0)