Skip to content

Commit 39ce50f

Browse files
committed
Python: Fix problems with sinks in pathlib
This must mean that we did not have this flow with the old call-graph, which means the new call-graph is doing a better job (yay).
1 parent edcaff2 commit 39ce50f

File tree

7 files changed

+71
-18
lines changed

7 files changed

+71
-18
lines changed

python/ql/lib/semmle/python/frameworks/Stdlib.qll

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1462,7 +1462,19 @@ private module StdlibPrivate {
14621462
t.start() and
14631463
result = openCall and
14641464
(
1465-
openCall instanceof OpenCall
1465+
openCall instanceof OpenCall and
1466+
// don't include the open call inside of Path.open in pathlib.py since
1467+
// the call to `path_obj.open` is covered by `PathLibOpenCall`.
1468+
not exists(Module mod, Class cls, Function func |
1469+
openCall.(OpenCall).asCfgNode().getScope() = func and
1470+
func.getName() = "open" and
1471+
func.getScope() = cls and
1472+
cls.getName() = "Path" and
1473+
cls.getScope() = mod and
1474+
mod.getName() = "pathlib" and
1475+
// do allow this call if we're analyzing pathlib.py as part of CPython though
1476+
not exists(mod.getFile().getRelativePath())
1477+
)
14661478
or
14671479
openCall instanceof PathLibOpenCall
14681480
)

python/ql/lib/semmle/python/security/dataflow/CleartextStorageCustomizations.qll

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,34 @@ module CleartextStorage {
5050

5151
/** The data written to a file, considered as a flow sink. */
5252
class FileWriteDataAsSink extends Sink {
53-
FileWriteDataAsSink() { this = any(FileSystemWriteAccess write).getADataNode() }
53+
FileWriteDataAsSink() {
54+
this = any(FileSystemWriteAccess write).getADataNode() and
55+
// since implementation of Path.write_bytes in pathlib.py is like
56+
// ```py
57+
// def write_bytes(self, data):
58+
// with self.open(mode='wb') as f:
59+
// return f.write(data)
60+
// ```
61+
// any time we would report flow to the `Path.write_bytes` sink, we can ALSO report
62+
// the flow from the `data` parameter to the `f.write` sink -- obviously we
63+
// don't want that.
64+
//
65+
// However, simply removing taint edges out of a sink is not a good enough solution,
66+
// since we would only flag one of the `p.write` calls in the following example
67+
// due to use-use flow
68+
// ```py
69+
// p.write(user_controlled)
70+
// p.write(user_controlled)
71+
// ```
72+
//
73+
// The same approach is used in the command injection query.
74+
not exists(Module pathlib |
75+
pathlib.getName() = "pathlib" and
76+
this.getScope().getEnclosingModule() = pathlib and
77+
// do allow this call if we're analyzing pathlib.py as part of CPython though
78+
not exists(pathlib.getFile().getRelativePath())
79+
)
80+
}
5481
}
5582

5683
/** The data written to a cookie on a HTTP response, considered as a flow sink. */

python/ql/lib/semmle/python/security/dataflow/CommandInjectionCustomizations.qll

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ module CommandInjection {
7676
// `subprocess`. See:
7777
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/os.py#L974
7878
// https://github.com/python/cpython/blob/fa7ce080175f65d678a7d5756c94f82887fc9803/Lib/subprocess.py#L341
79+
//
80+
// The same approach is used in the path-injection and cleartext-storage queries.
7981
not this.getScope().getEnclosingModule().getName() in [
8082
"os", "subprocess", "platform", "popen2"
8183
]

python/ql/lib/semmle/python/security/dataflow/PathInjectionCustomizations.qll

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,33 @@ module PathInjection {
5858
* A file system access, considered as a flow sink.
5959
*/
6060
class FileSystemAccessAsSink extends Sink {
61-
FileSystemAccessAsSink() { this = any(FileSystemAccess e).getAPathArgument() }
61+
FileSystemAccessAsSink() {
62+
this = any(FileSystemAccess e).getAPathArgument() and
63+
// since implementation of Path.open in pathlib.py is like
64+
// ```py
65+
// def open(self, ...):
66+
// return io.open(self, ...)
67+
// ```
68+
// any time we would report flow to the `path_obj.open` sink, we can ALSO report
69+
// the flow from the `self` parameter to the `io.open` sink -- obviously we
70+
// don't want that.
71+
//
72+
// However, simply removing taint edges out of a sink is not a good enough solution,
73+
// since we would only flag one of the `p.open` calls in the following example
74+
// due to use-use flow
75+
// ```py
76+
// p.open()
77+
// p.open()
78+
// ```
79+
//
80+
// The same approach is used in the command injection query.
81+
not exists(Module pathlib |
82+
pathlib.getName() = "pathlib" and
83+
this.getScope().getEnclosingModule() = pathlib and
84+
// do allow this call if we're analyzing pathlib.py as part of CPython though
85+
not exists(pathlib.getFile().getRelativePath())
86+
)
87+
}
6288
}
6389

6490
private import semmle.python.frameworks.data.ModelsAsData

python/ql/test/library-tests/frameworks/stdlib-py3/FileSystemAccess.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
p.write_bytes(b"hello") # $ getAPathArgument=p fileWriteData=b"hello"
1515
p.write_text("hello") # $ getAPathArgument=p fileWriteData="hello"
16-
p.open("wt").write("hello") # $ getAPathArgument=p fileWriteData="hello" SPURIOUS: getAPathArgument=self
16+
p.open("wt").write("hello") # $ getAPathArgument=p fileWriteData="hello"
1717

1818
name = windows.parent.name
1919
o = open

python/ql/test/query-tests/Security/CWE-022-PathInjection/PathInjection.expected

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
edges
2-
| file:///usr/lib/python3.8/pathlib.py:1214:14:1214:17 | ControlFlowNode for self | file:///usr/lib/python3.8/pathlib.py:1222:24:1222:27 | ControlFlowNode for self |
32
| flask_path_injection.py:0:0:0:0 | ModuleVariableNode for flask_path_injection.request | flask_path_injection.py:19:15:19:21 | ControlFlowNode for request |
43
| flask_path_injection.py:1:26:1:32 | ControlFlowNode for ImportMember | flask_path_injection.py:1:26:1:32 | GSSA Variable request |
54
| flask_path_injection.py:1:26:1:32 | GSSA Variable request | flask_path_injection.py:0:0:0:0 | ModuleVariableNode for flask_path_injection.request |
@@ -56,8 +55,6 @@ edges
5655
| pathlib_use.py:12:16:12:22 | ControlFlowNode for request | pathlib_use.py:12:16:12:27 | ControlFlowNode for Attribute |
5756
| pathlib_use.py:12:16:12:27 | ControlFlowNode for Attribute | pathlib_use.py:14:5:14:5 | ControlFlowNode for p |
5857
| pathlib_use.py:12:16:12:27 | ControlFlowNode for Attribute | pathlib_use.py:17:5:17:6 | ControlFlowNode for p2 |
59-
| pathlib_use.py:12:16:12:27 | ControlFlowNode for Attribute | pathlib_use.py:17:5:17:6 | ControlFlowNode for p2 |
60-
| pathlib_use.py:17:5:17:6 | ControlFlowNode for p2 | file:///usr/lib/python3.8/pathlib.py:1214:14:1214:17 | ControlFlowNode for self |
6158
| test.py:0:0:0:0 | ModuleVariableNode for test.request | test.py:9:12:9:18 | ControlFlowNode for request |
6259
| test.py:3:26:3:32 | ControlFlowNode for ImportMember | test.py:3:26:3:32 | GSSA Variable request |
6360
| test.py:3:26:3:32 | GSSA Variable request | test.py:0:0:0:0 | ModuleVariableNode for test.request |
@@ -80,8 +77,6 @@ edges
8077
| test.py:48:23:48:23 | ControlFlowNode for x | test.py:12:15:12:15 | ControlFlowNode for x |
8178
| test.py:48:23:48:23 | ControlFlowNode for x | test.py:48:13:48:24 | ControlFlowNode for normalize() |
8279
nodes
83-
| file:///usr/lib/python3.8/pathlib.py:1214:14:1214:17 | ControlFlowNode for self | semmle.label | ControlFlowNode for self |
84-
| file:///usr/lib/python3.8/pathlib.py:1222:24:1222:27 | ControlFlowNode for self | semmle.label | ControlFlowNode for self |
8580
| flask_path_injection.py:0:0:0:0 | ModuleVariableNode for flask_path_injection.request | semmle.label | ModuleVariableNode for flask_path_injection.request |
8681
| flask_path_injection.py:1:26:1:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
8782
| flask_path_injection.py:1:26:1:32 | GSSA Variable request | semmle.label | GSSA Variable request |
@@ -143,7 +138,6 @@ nodes
143138
| pathlib_use.py:12:16:12:27 | ControlFlowNode for Attribute | semmle.label | ControlFlowNode for Attribute |
144139
| pathlib_use.py:14:5:14:5 | ControlFlowNode for p | semmle.label | ControlFlowNode for p |
145140
| pathlib_use.py:17:5:17:6 | ControlFlowNode for p2 | semmle.label | ControlFlowNode for p2 |
146-
| pathlib_use.py:17:5:17:6 | ControlFlowNode for p2 | semmle.label | ControlFlowNode for p2 |
147141
| test.py:0:0:0:0 | ModuleVariableNode for test.request | semmle.label | ModuleVariableNode for test.request |
148142
| test.py:3:26:3:32 | ControlFlowNode for ImportMember | semmle.label | ControlFlowNode for ImportMember |
149143
| test.py:3:26:3:32 | GSSA Variable request | semmle.label | GSSA Variable request |
@@ -169,7 +163,6 @@ subpaths
169163
| test.py:25:19:25:19 | ControlFlowNode for x | test.py:12:15:12:15 | ControlFlowNode for x | test.py:13:12:13:30 | ControlFlowNode for Attribute() | test.py:25:9:25:20 | ControlFlowNode for normalize() |
170164
| test.py:48:23:48:23 | ControlFlowNode for x | test.py:12:15:12:15 | ControlFlowNode for x | test.py:13:12:13:30 | ControlFlowNode for Attribute() | test.py:48:13:48:24 | ControlFlowNode for normalize() |
171165
#select
172-
| file:///usr/lib/python3.8/pathlib.py:1222:24:1222:27 | ControlFlowNode for self | pathlib_use.py:3:26:3:32 | ControlFlowNode for ImportMember | file:///usr/lib/python3.8/pathlib.py:1222:24:1222:27 | ControlFlowNode for self | This path depends on a $@. | pathlib_use.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value |
173166
| flask_path_injection.py:21:32:21:38 | ControlFlowNode for dirname | flask_path_injection.py:1:26:1:32 | ControlFlowNode for ImportMember | flask_path_injection.py:21:32:21:38 | ControlFlowNode for dirname | This path depends on a $@. | flask_path_injection.py:1:26:1:32 | ControlFlowNode for ImportMember | user-provided value |
174167
| path_injection.py:13:14:13:47 | ControlFlowNode for Attribute() | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:13:14:13:47 | ControlFlowNode for Attribute() | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value |
175168
| path_injection.py:21:14:21:18 | ControlFlowNode for npath | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | path_injection.py:21:14:21:18 | ControlFlowNode for npath | This path depends on a $@. | path_injection.py:3:26:3:32 | ControlFlowNode for ImportMember | user-provided value |
Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,14 @@
11
edges
2-
| file:///usr/lib/python3.8/pathlib.py:1248:26:1248:29 | ControlFlowNode for data | file:///usr/lib/python3.8/pathlib.py:1256:28:1256:31 | ControlFlowNode for data |
3-
| test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:12:21:12:24 | ControlFlowNode for cert |
42
| test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:12:21:12:24 | ControlFlowNode for cert |
53
| test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:13:22:13:41 | ControlFlowNode for Attribute() |
64
| test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:15:26:15:29 | ControlFlowNode for cert |
7-
| test.py:12:21:12:24 | ControlFlowNode for cert | file:///usr/lib/python3.8/pathlib.py:1248:26:1248:29 | ControlFlowNode for data |
85
nodes
9-
| file:///usr/lib/python3.8/pathlib.py:1248:26:1248:29 | ControlFlowNode for data | semmle.label | ControlFlowNode for data |
10-
| file:///usr/lib/python3.8/pathlib.py:1256:28:1256:31 | ControlFlowNode for data | semmle.label | ControlFlowNode for data |
116
| test.py:9:12:9:21 | ControlFlowNode for get_cert() | semmle.label | ControlFlowNode for get_cert() |
127
| test.py:12:21:12:24 | ControlFlowNode for cert | semmle.label | ControlFlowNode for cert |
13-
| test.py:12:21:12:24 | ControlFlowNode for cert | semmle.label | ControlFlowNode for cert |
148
| test.py:13:22:13:41 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
159
| test.py:15:26:15:29 | ControlFlowNode for cert | semmle.label | ControlFlowNode for cert |
1610
subpaths
1711
#select
18-
| file:///usr/lib/python3.8/pathlib.py:1256:28:1256:31 | ControlFlowNode for data | test.py:9:12:9:21 | ControlFlowNode for get_cert() | file:///usr/lib/python3.8/pathlib.py:1256:28:1256:31 | ControlFlowNode for data | This expression stores $@ as clear text. | test.py:9:12:9:21 | ControlFlowNode for get_cert() | sensitive data (certificate) |
1912
| test.py:12:21:12:24 | ControlFlowNode for cert | test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:12:21:12:24 | ControlFlowNode for cert | This expression stores $@ as clear text. | test.py:9:12:9:21 | ControlFlowNode for get_cert() | sensitive data (certificate) |
2013
| test.py:13:22:13:41 | ControlFlowNode for Attribute() | test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:13:22:13:41 | ControlFlowNode for Attribute() | This expression stores $@ as clear text. | test.py:9:12:9:21 | ControlFlowNode for get_cert() | sensitive data (certificate) |
2114
| test.py:15:26:15:29 | ControlFlowNode for cert | test.py:9:12:9:21 | ControlFlowNode for get_cert() | test.py:15:26:15:29 | ControlFlowNode for cert | This expression stores $@ as clear text. | test.py:9:12:9:21 | ControlFlowNode for get_cert() | sensitive data (certificate) |

0 commit comments

Comments
 (0)