Skip to content

Commit 52ca696

Browse files
author
Esben Sparre Andreasen
committed
JS: add query js/incomplete-url-regexp
1 parent a4b3b1e commit 52ca696

File tree

7 files changed

+221
-0
lines changed

7 files changed

+221
-0
lines changed

javascript/config/suites/javascript/security

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
+ semmlecode-javascript-queries/DOM/TargetBlank.ql: /Security/CWE/CWE-200
22
+ semmlecode-javascript-queries/Electron/EnablingNodeIntegration.ql: /Security/CWE/CWE-094
3+
+ semmlecode-javascript-queries/Security/CWE-020/IncompleteUrlRegExp.ql: /Security/CWE/CWE-020
34
+ semmlecode-javascript-queries/Security/CWE-020/IncompleteUrlSubstringSanitization.ql: /Security/CWE/CWE-020
45
+ semmlecode-javascript-queries/Security/CWE-020/IncorrectSuffixCheck.ql: /Security/CWE/CWE-020
56
+ semmlecode-javascript-queries/Security/CWE-022/TaintedPath.ql: /Security/CWE/CWE-022
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
<!DOCTYPE qhelp PUBLIC
2+
"-//Semmle//qhelp//EN"
3+
"qhelp.dtd">
4+
<qhelp>
5+
6+
<overview>
7+
<p>
8+
9+
Sanitizing untrusted URLs is an important technique for
10+
preventing attacks such as request forgeries and malicious
11+
redirections. Usually, this is done by checking that the host of a URL
12+
is in a set of allowed hosts.
13+
14+
</p>
15+
16+
<p>
17+
18+
If a regular expression implements such a check, it is
19+
easy to accidentally make the check too permissive by not escaping the
20+
<code>.</code> meta-characters appropriately.
21+
22+
Even if the check is not used in a security-critical
23+
context, the incomplete check may still cause undesirable behaviors
24+
when the check succeeds accidentally.
25+
26+
</p>
27+
</overview>
28+
29+
<recommendation>
30+
<p>
31+
32+
Escape all meta-characters appropriately when constructing
33+
regular expressions for security checks, pay special attention to the
34+
<code>.</code> meta-character.
35+
36+
</p>
37+
</recommendation>
38+
39+
<example>
40+
41+
<p>
42+
43+
The following example code checks that a URL redirection
44+
will reach the <code>example.com</code> domain, or one of its
45+
subdomains.
46+
47+
</p>
48+
49+
<sample src="examples/IncompleteUrlRegExp.js"/>
50+
51+
<p>
52+
53+
The check is however easy to bypass because the unescaped
54+
<code>.</code> allows for any character before
55+
<code>example.com</code>, effectively allowing the redirect to go to
56+
an attacker-controlled domain such as <code>wwwXexample.com</code>.
57+
58+
Address this vulnerability by escaping <code>.</code>
59+
appropriately: <code>let regex =/(www|beta|)\.example\.com/</code>.
60+
61+
</p>
62+
63+
</example>
64+
65+
<references>
66+
<li>OWASP: <a href="https://www.owasp.org/index.php/Server_Side_Request_Forgery">SSRF</a></li>
67+
<li>OWASP: <a href="https://www.owasp.org/index.php/Unvalidated_Redirects_and_Forwards_Cheat_Sheet">XSS Unvalidated Redirects and Forwards Cheat Sheet</a>.</li>
68+
</references>
69+
</qhelp>
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
/**
2+
* @name Incomplete URL regular expression
3+
* @description Security checks on URLs using regular expressions are sometimes vulnerable to bypassing.
4+
* @kind problem
5+
* @problem.severity error
6+
* @precision high
7+
* @id js/incomplete-url-regexp
8+
* @tags correctness
9+
* security
10+
* external/cwe/cwe-20
11+
*/
12+
13+
import javascript
14+
import semmle.javascript.security.dataflow.RegExpInjection
15+
16+
module IncompleteUrlRegExpTracking {
17+
18+
/**
19+
* A taint tracking configuration for incomplete URL regular expressions sources.
20+
*/
21+
class Configuration extends TaintTracking::Configuration {
22+
Configuration() { this = "IncompleteUrlRegExpTracking" }
23+
24+
override
25+
predicate isSource(DataFlow::Node source) {
26+
isIncompleteHostNameRegExpPattern(source.asExpr().(ConstantString).getStringValue(), _)
27+
}
28+
29+
override
30+
predicate isSink(DataFlow::Node sink) {
31+
sink instanceof RegExpInjection::Sink
32+
}
33+
34+
}
35+
36+
}
37+
38+
/**
39+
* Holds if `pattern` is a regular expression pattern for URLs with a host matched by `hostPart`,
40+
* and `pattern` contains a subtle mistake that allows it to match unexpected hosts.
41+
*/
42+
bindingset[pattern]
43+
predicate isIncompleteHostNameRegExpPattern(string pattern, string hostPart) {
44+
hostPart = pattern.regexpCapture(
45+
"(?i).*" +
46+
// Either:
47+
// - an unescaped and repeated `.`, followed by anything
48+
// - a unescaped single `.`
49+
"(?:(?<!\\\\)[.][+*].*?|(?<!\\\\)[.])" +
50+
// a sequence of subdomains, perhaps with some regex characters mixed in, followed by a known TLD
51+
"([():|?a-z0-9-]+(\\\\)?[.](com|org|edu|gov|uk|net))" +
52+
".*", 1)
53+
}
54+
55+
from Expr e, string pattern, string intendedHost
56+
where
57+
(
58+
e.(RegExpLiteral).getValue() = pattern or
59+
exists (IncompleteUrlRegExpTracking::Configuration cfg |
60+
cfg.hasFlow(e.flow(), _) and
61+
e.mayHaveStringValue(pattern)
62+
)
63+
) and
64+
isIncompleteHostNameRegExpPattern(pattern, intendedHost)
65+
and
66+
// ignore patterns with capture groups after the TLD
67+
not pattern.regexpMatch("(?i).*[.](com|org|edu|gov|uk|net).*[(][?]:.*[)].*")
68+
69+
70+
select e, "This regular expression has an unescaped '.', which means that '" + intendedHost + "' might not match the intended host of a matched URL."
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
app.get('/some/path', function(req, res) {
2+
let url = req.param('url'),
3+
host = urlLib.parse(url).host;
4+
// BAD: the host of `url` may be controlled by an attacker
5+
let regex = /(www|beta|).example.com/;
6+
if (host.match(regex)) {
7+
res.redirect(url);
8+
}
9+
});
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
| tst-IncompleteUrlRegExp.js:3:2:3:28 | /http:\\ ... le.com/ | This regular expression has an unescaped '.', which means that 'example.com' might not match the intended host of a matched URL. |
2+
| tst-IncompleteUrlRegExp.js:5:2:5:28 | /http:\\ ... le.net/ | This regular expression has an unescaped '.', which means that 'example.net' might not match the intended host of a matched URL. |
3+
| tst-IncompleteUrlRegExp.js:6:2:6:42 | /http:\\ ... b).com/ | This regular expression has an unescaped '.', which means that '(example-a\|example-b).com' might not match the intended host of a matched URL. |
4+
| tst-IncompleteUrlRegExp.js:7:2:7:30 | /http:\\ ... le.com/ | This regular expression has an unescaped '.', which means that 'example.com' might not match the intended host of a matched URL. |
5+
| tst-IncompleteUrlRegExp.js:9:2:9:39 | /http:\\ ... le.com/ | This regular expression has an unescaped '.', which means that 'example.com' might not match the intended host of a matched URL. |
6+
| tst-IncompleteUrlRegExp.js:11:13:11:37 | "http:/ ... le.com" | This regular expression has an unescaped '.', which means that 'example.com' might not match the intended host of a matched URL. |
7+
| tst-IncompleteUrlRegExp.js:12:10:12:34 | "http:/ ... le.com" | This regular expression has an unescaped '.', which means that 'example.com' might not match the intended host of a matched URL. |
8+
| tst-IncompleteUrlRegExp.js:15:22:15:46 | "http:/ ... le.com" | This regular expression has an unescaped '.', which means that 'example.com' might not match the intended host of a matched URL. |
9+
| tst-IncompleteUrlRegExp.js:17:13:17:31 | `test.example.com$` | This regular expression has an unescaped '.', which means that 'example.com' might not match the intended host of a matched URL. |
10+
| tst-IncompleteUrlRegExp.js:17:14:17:30 | test.example.com$ | This regular expression has an unescaped '.', which means that 'example.com' might not match the intended host of a matched URL. |
11+
| tst-IncompleteUrlRegExp.js:19:17:19:34 | 'test.example.com' | This regular expression has an unescaped '.', which means that 'example.com' might not match the intended host of a matched URL. |
12+
| tst-IncompleteUrlRegExp.js:22:27:22:44 | 'test.example.com' | This regular expression has an unescaped '.', which means that 'example.com' might not match the intended host of a matched URL. |
13+
| tst-IncompleteUrlRegExp.js:28:22:28:39 | 'test.example.com' | This regular expression has an unescaped '.', which means that 'example.com' might not match the intended host of a matched URL. |
14+
| tst-IncompleteUrlRegExp.js:36:2:36:37 | /(.+\\.( ... \\.com)/ | This regular expression has an unescaped '.', which means that '(?:example-a\|example-b)\\.com' might not match the intended host of a matched URL. |
15+
| tst-IncompleteUrlRegExp.js:37:2:37:54 | /^(http ... =$\|\\/)/ | This regular expression has an unescaped '.', which means that ')?example.com' might not match the intended host of a matched URL. |
16+
| tst-IncompleteUrlRegExp.js:38:2:38:44 | /^(http ... p\\/f\\// | This regular expression has an unescaped '.', which means that 'example.com' might not match the intended host of a matched URL. |
17+
| tst-IncompleteUrlRegExp.js:39:2:39:34 | /\\(http ... m\\/\\)/g | This regular expression has an unescaped '.', which means that 'example.com' might not match the intended host of a matched URL. |
18+
| tst-IncompleteUrlRegExp.js:40:2:40:29 | /https? ... le.com/ | This regular expression has an unescaped '.', which means that 'example.com' might not match the intended host of a matched URL. |
19+
| tst-IncompleteUrlRegExp.js:41:13:41:68 | '^http: ... e\\.com' | This regular expression has an unescaped '.', which means that 'example.com' might not match the intended host of a matched URL. |
20+
| tst-IncompleteUrlRegExp.js:41:41:41:68 | '^https ... e\\.com' | This regular expression has an unescaped '.', which means that 'example.com' might not match the intended host of a matched URL. |
21+
| tst-IncompleteUrlRegExp.js:42:13:42:61 | 'http[s ... \\/(.+)' | This regular expression has an unescaped '.', which means that 'example.com' might not match the intended host of a matched URL. |
22+
| tst-IncompleteUrlRegExp.js:43:2:43:33 | /^https ... e.com$/ | This regular expression has an unescaped '.', which means that 'example.com' might not match the intended host of a matched URL. |
23+
| tst-IncompleteUrlRegExp.js:44:9:44:100 | 'protos ... ernal)' | This regular expression has an unescaped '.', which means that 'example-b.com' might not match the intended host of a matched URL. |
24+
| tst-IncompleteUrlRegExp.js:46:2:46:26 | /exampl ... le.com/ | This regular expression has an unescaped '.', which means that 'dev\|example.com' might not match the intended host of a matched URL. |
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Security/CWE-020/IncompleteUrlRegExp.ql
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
(function() {
2+
/http:\/\/example.com/; // OK
3+
/http:\/\/test.example.com/; // NOT OK
4+
/http:\/\/test\\.example.com/; // OK
5+
/http:\/\/test.example.net/; // NOT OK
6+
/http:\/\/test.(example-a|example-b).com/; // NOT OK
7+
/http:\/\/(.+)\\.example.com/; // NOT OK
8+
/http:\/\/(\\.+)\\.example.com/; // OK
9+
/http:\/\/(?:.+)\\.test\\.example.com/; // NOT OK
10+
/http:\/\/test.example.com\/(?:.*)/; // OK
11+
new RegExp("http://test.example.com"); // NOT OK
12+
s.match("http://test.example.com"); // NOT OK
13+
14+
function id(e) { return e; }
15+
new RegExp(id(id(id("http://test.example.com")))); // NOT OK
16+
17+
new RegExp(`test.example.com$`); // NOT OK
18+
19+
let hostname = 'test.example.com'; // NOT OK
20+
new RegExp(`${hostname}$`);
21+
22+
let domain = { hostname: 'test.example.com' };
23+
new RegExp(domain.hostname);
24+
25+
function convert(domain) {
26+
return new RegExp(domain.hostname);
27+
}
28+
convert({ hostname: 'test.example.com' }); // NOT OK
29+
30+
let domains = [ { hostname: 'test.example.com' } ]; // NOT OK, but not yet supported
31+
function convert(domain) {
32+
return new RegExp(domain.hostname);
33+
}
34+
domains.map(d => convert(d));
35+
36+
/(.+\.(?:example-a|example-b)\.com)/; // NOT OK
37+
/^(https?:)?\/\/((service|www).)?example.com(?=$|\/)/; // NOT OK
38+
/^(http|https):\/\/www.example.com\/p\/f\//; // NOT OK
39+
/\(http:\/\/sub.example.com\/\)/g; // NOT OK
40+
/https?:\/\/api.example.com/; // NOT OK
41+
new RegExp('^http://localhost:8000|' + '^https?://.+\.example\.com'); // NOT OK
42+
new RegExp('http[s]?:\/\/?sub1\.sub2\.example\.com\/f\/(.+)'); // NOT OK
43+
/^https:\/\/[a-z]*.example.com$/; // NOT OK
44+
RegExp('protos?://(localhost|.+.example.net|.+.example-a.com|.+.example-b.com|.+.example.internal)'); // NOT OK
45+
46+
/example.dev|example.com/; // OK, but still flagged
47+
});

0 commit comments

Comments
 (0)