Skip to content

Commit fd270cc

Browse files
committed
Python: Add basic taint support for urlsplit/urlparse
1 parent 74345b1 commit fd270cc

File tree

8 files changed

+206
-0
lines changed

8 files changed

+206
-0
lines changed

python/ql/src/semmle/python/security/strings/External.qll

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@ abstract class ExternalStringKind extends StringKind {
1818
json_load(fromnode, tonode) and result.(ExternalJsonKind).getValue() = this
1919
or
2020
tonode.(DictNode).getAValue() = fromnode and result.(ExternalStringDictKind).getValue() = this
21+
or
22+
urlsplit(fromnode, tonode) and result.(ExternalUrlSplitResult).getItem() = this
23+
or
24+
urlparse(fromnode, tonode) and result.(ExternalUrlParseResult).getItem() = this
2125
}
2226
}
2327

@@ -65,6 +69,65 @@ class ExternalStringSequenceDictKind extends DictKind {
6569
ExternalStringSequenceDictKind() { this.getValue() instanceof ExternalStringSequenceKind }
6670
}
6771

72+
/** TaintKind for the result of `urlsplit(tainted_string)` */
73+
class ExternalUrlSplitResult extends ExternalStringSequenceKind {
74+
// https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlsplit
75+
override TaintKind getTaintOfAttribute(string name) {
76+
result = super.getTaintOfAttribute(name)
77+
or
78+
(
79+
// namedtuple field names
80+
name = "scheme" or
81+
name = "netloc" or
82+
name = "path" or
83+
name = "query" or
84+
name = "fragment" or
85+
// class methods
86+
name = "username" or
87+
name = "password" or
88+
name = "hostname"
89+
) and
90+
result instanceof ExternalStringKind
91+
}
92+
93+
override TaintKind getTaintOfMethodResult(string name) {
94+
result = super.getTaintOfMethodResult(name)
95+
or
96+
name = "geturl" and
97+
result instanceof ExternalStringKind
98+
}
99+
}
100+
101+
/** TaintKind for the result of `urlparse(tainted_string)` */
102+
class ExternalUrlParseResult extends ExternalStringSequenceKind {
103+
// https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse
104+
override TaintKind getTaintOfAttribute(string name) {
105+
result = super.getTaintOfAttribute(name)
106+
or
107+
(
108+
// namedtuple field names
109+
name = "scheme" or
110+
name = "netloc" or
111+
name = "path" or
112+
name = "params" or
113+
name = "query" or
114+
name = "fragment" or
115+
// class methods
116+
name = "username" or
117+
name = "password" or
118+
name = "hostname"
119+
) and
120+
result instanceof ExternalStringKind
121+
}
122+
123+
override TaintKind getTaintOfMethodResult(string name) {
124+
result = super.getTaintOfMethodResult(name)
125+
or
126+
name = "geturl" and
127+
result instanceof ExternalStringKind
128+
}
129+
}
130+
68131
/* Helper for getTaintForStep() */
69132
pragma[noinline]
70133
private predicate json_subscript_taint(
@@ -83,6 +146,44 @@ private predicate json_load(ControlFlowNode fromnode, CallNode tonode) {
83146
)
84147
}
85148

149+
private predicate urlsplit(ControlFlowNode fromnode, CallNode tonode) {
150+
// This could be implemented as `exists(FunctionValue` without the explicit six part,
151+
// but then our tests will need to import +100 modules, so for now this slightly
152+
// altered version gets to live on.
153+
exists(Value urlsplit |
154+
(
155+
urlsplit = Value::named("six.moves.urllib.parse.urlsplit")
156+
or
157+
// Python 2
158+
urlsplit = Value::named("urlparse.urlsplit")
159+
or
160+
// Python 3
161+
urlsplit = Value::named("urllib.parse.urlsplit")
162+
) and
163+
tonode = urlsplit.getACall() and
164+
tonode.getArg(0) = fromnode
165+
)
166+
}
167+
168+
private predicate urlparse(ControlFlowNode fromnode, CallNode tonode) {
169+
// This could be implemented as `exists(FunctionValue` without the explicit six part,
170+
// but then our tests will need to import +100 modules, so for now this slightly
171+
// altered version gets to live on.
172+
exists(Value urlparse |
173+
(
174+
urlparse = Value::named("six.moves.urllib.parse.urlparse")
175+
or
176+
// Python 2
177+
urlparse = Value::named("urlparse.urlparse")
178+
or
179+
// Python 3
180+
urlparse = Value::named("urllib.parse.urlparse")
181+
) and
182+
tonode = urlparse.getACall() and
183+
tonode.getArg(0) = fromnode
184+
)
185+
}
186+
86187
/** A kind of "taint", representing an open file-like object from an external source. */
87188
class ExternalFileObject extends TaintKind {
88189
ExternalFileObject() { this = "file[" + any(ExternalStringKind key) + "]" }
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
import python
2+
import semmle.python.security.TaintTracking
3+
import semmle.python.security.strings.Untrusted
4+
5+
class SimpleSource extends TaintSource {
6+
SimpleSource() { this.(NameNode).getId() = "TAINTED_STRING" }
7+
8+
override predicate isSourceOf(TaintKind kind) { kind instanceof ExternalStringKind }
9+
10+
override string toString() { result = "taint source" }
11+
}
12+
13+
class ListSource extends TaintSource {
14+
ListSource() { this.(NameNode).getId() = "TAINTED_LIST" }
15+
16+
override predicate isSourceOf(TaintKind kind) { kind instanceof ExternalStringSequenceKind }
17+
18+
override string toString() { result = "list taint source" }
19+
}
20+
21+
class DictSource extends TaintSource {
22+
DictSource() { this.(NameNode).getId() = "TAINTED_DICT" }
23+
24+
override predicate isSourceOf(TaintKind kind) { kind instanceof ExternalStringDictKind }
25+
26+
override string toString() { result = "dict taint source" }
27+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
| test.py:13 | test_basic | a | externally controlled string |
2+
| test.py:13 | test_basic | b | externally controlled string |
3+
| test.py:13 | test_basic | c | externally controlled string |
4+
| test.py:13 | test_basic | d | externally controlled string |
5+
| test.py:13 | test_basic | urlsplit_res | [externally controlled string] |
6+
| test.py:20 | test_sanitizer | Attribute | externally controlled string |
7+
| test.py:23 | test_sanitizer | Subscript | externally controlled string |
8+
| test.py:33 | test_namedtuple | a | NO TAINT |
9+
| test.py:33 | test_namedtuple | b | NO TAINT |
10+
| test.py:33 | test_namedtuple | c | NO TAINT |
11+
| test.py:33 | test_namedtuple | d | NO TAINT |
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
import python
2+
import semmle.python.security.TaintTracking
3+
import Taint
4+
5+
from Call call, Expr arg, string taint_string
6+
where
7+
call.getLocation().getFile().getShortName() = "test.py" and
8+
call.getFunc().(Name).getId() = "test" and
9+
arg = call.getAnArg() and
10+
(
11+
not exists(TaintedNode tainted | tainted.getAstNode() = arg) and
12+
taint_string = "NO TAINT"
13+
or
14+
exists(TaintedNode tainted | tainted.getAstNode() = arg |
15+
taint_string = tainted.getTaintKind().toString()
16+
)
17+
)
18+
select arg.getLocation().toString(), call.getScope().(Function).getName(), arg.toString(), taint_string
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
from six.moves.urllib.parse import urlsplit
2+
3+
# Currently we don't have support for namedtuples in general, but do have special support
4+
# for `urlsplit` (and `urlparse`)
5+
6+
def test_basic():
7+
tainted_string = TAINTED_STRING
8+
urlsplit_res = urlsplit(tainted_string)
9+
a = urlsplit_res.netloc # field access
10+
b = urlsplit_res.hostname # property
11+
c = urlsplit_res[3] # indexing
12+
_, _, d, _, _ = urlsplit(tainted_string) # unpacking
13+
test(a, b, c, d, urlsplit_res)
14+
15+
def test_sanitizer():
16+
tainted_string = TAINTED_STRING
17+
urlsplit_res = urlsplit(tainted_string)
18+
19+
if urlsplit_res.netloc == "OK":
20+
test(urlsplit_res.netloc) # TODO: FP, should not be tainted here
21+
22+
if urlsplit_res[2] == "OK":
23+
test(urlsplit_res[0]) # TODO: FP, should not be tainted here
24+
25+
def test_namedtuple():
26+
tainted_string = TAINTED_STRING
27+
Point = namedtuple('Point', ['x', 'y'])
28+
p = Point('safe', tainted_string)
29+
a = p.x
30+
b = p.y
31+
c = p[0]
32+
d = p[1]
33+
test(a, b, c, d) # TODO: FN, at least p.y and p[1] should be tainted

python/ql/test/library-tests/taint/strings/TestStep.expected

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
| Taint [externally controlled string] | test.py:67 | test.py:67:20:67:43 | urlsplit() | | --> | Taint [externally controlled string] | test.py:69 | test.py:69:10:69:21 | urlsplit_res | |
2+
| Taint [externally controlled string] | test.py:68 | test.py:68:20:68:43 | urlparse() | | --> | Taint [externally controlled string] | test.py:69 | test.py:69:24:69:35 | urlparse_res | |
13
| Taint exception.info | test.py:44 | test.py:44:22:44:26 | taint | p1 = exception.info | --> | Taint exception.info | test.py:45 | test.py:45:17:45:21 | taint | p1 = exception.info |
24
| Taint exception.info | test.py:45 | test.py:45:17:45:21 | taint | p1 = exception.info | --> | Taint exception.info | test.py:45 | test.py:45:12:45:22 | func() | p1 = exception.info |
35
| Taint exception.info | test.py:45 | test.py:45:17:45:21 | taint | p1 = exception.info | --> | Taint exception.info | test.py:52 | test.py:52:19:52:21 | arg | p0 = exception.info |
@@ -56,6 +58,10 @@
5658
| Taint externally controlled string | test.py:57 | test.py:57:11:57:41 | cross_over() | | --> | Taint externally controlled string | test.py:58 | test.py:58:10:58:12 | res | |
5759
| Taint externally controlled string | test.py:57 | test.py:57:38:57:40 | ext | | --> | Taint externally controlled string | test.py:44 | test.py:44:22:44:26 | taint | p1 = externally controlled string |
5860
| Taint externally controlled string | test.py:57 | test.py:57:38:57:40 | ext | | --> | Taint externally controlled string | test.py:57 | test.py:57:11:57:41 | cross_over() | |
61+
| Taint externally controlled string | test.py:66 | test.py:66:22:66:35 | TAINTED_STRING | | --> | Taint externally controlled string | test.py:67 | test.py:67:29:67:42 | tainted_string | |
62+
| Taint externally controlled string | test.py:66 | test.py:66:22:66:35 | TAINTED_STRING | | --> | Taint externally controlled string | test.py:68 | test.py:68:29:68:42 | tainted_string | |
63+
| Taint externally controlled string | test.py:67 | test.py:67:29:67:42 | tainted_string | | --> | Taint [externally controlled string] | test.py:67 | test.py:67:20:67:43 | urlsplit() | |
64+
| Taint externally controlled string | test.py:68 | test.py:68:29:68:42 | tainted_string | | --> | Taint [externally controlled string] | test.py:68 | test.py:68:20:68:43 | urlparse() | |
5965
| Taint json[externally controlled string] | test.py:6 | test.py:6:20:6:45 | Attribute() | | --> | Taint json[externally controlled string] | test.py:7 | test.py:7:9:7:20 | tainted_json | |
6066
| Taint json[externally controlled string] | test.py:7 | test.py:7:9:7:20 | tainted_json | | --> | Taint externally controlled string | test.py:7 | test.py:7:9:7:25 | Subscript | |
6167
| Taint json[externally controlled string] | test.py:7 | test.py:7:9:7:20 | tainted_json | | --> | Taint json[externally controlled string] | test.py:7 | test.py:7:9:7:25 | Subscript | |

python/ql/test/library-tests/taint/strings/TestTaint.expected

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,5 @@
2020
| test.py:42 | test_str2 | c | externally controlled string |
2121
| test.py:50 | test_exc_info | res | exception.info |
2222
| test.py:58 | test_untrusted | res | externally controlled string |
23+
| test.py:69 | test_urlsplit_urlparse | urlparse_res | [externally controlled string] |
24+
| test.py:69 | test_urlsplit_urlparse | urlsplit_res | [externally controlled string] |

python/ql/test/library-tests/taint/strings/test.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,3 +59,11 @@ def test_untrusted():
5959

6060
def exc_untrusted_call(arg):
6161
return arg
62+
63+
from six.moves.urllib.parse import urlsplit, urlparse
64+
65+
def test_urlsplit_urlparse():
66+
tainted_string = TAINTED_STRING
67+
urlsplit_res = urlsplit(tainted_string)
68+
urlparse_res = urlparse(tainted_string)
69+
test(urlsplit_res, urlparse_res)

0 commit comments

Comments
 (0)