@@ -2,45 +2,40 @@ import python
22import Basic
33private import Common
44
5- /** An extensible kind of taint representing an externally controlled string.
5+ /**
6+ * An extensible kind of taint representing an externally controlled string.
67 */
78abstract class ExternalStringKind extends StringKind {
8-
99 bindingset [ this ]
10- ExternalStringKind ( ) {
11- this = this
12- }
10+ ExternalStringKind ( ) { this = this }
1311
1412 override TaintKind getTaintForFlowStep ( ControlFlowNode fromnode , ControlFlowNode tonode ) {
1513 result = StringKind .super .getTaintForFlowStep ( fromnode , tonode )
1614 or
17- tonode .( SequenceNode ) .getElement ( _) = fromnode and result .( ExternalStringSequenceKind ) .getItem ( ) = this
15+ tonode .( SequenceNode ) .getElement ( _) = fromnode and
16+ result .( ExternalStringSequenceKind ) .getItem ( ) = this
1817 or
1918 json_load ( fromnode , tonode ) and result .( ExternalJsonKind ) .getValue ( ) = this
2019 or
2120 tonode .( DictNode ) .getAValue ( ) = fromnode and result .( ExternalStringDictKind ) .getValue ( ) = this
21+ or
22+ urlsplit ( fromnode , tonode ) and result .( ExternalUrlSplitResult ) .getItem ( ) = this
23+ or
24+ urlparse ( fromnode , tonode ) and result .( ExternalUrlParseResult ) .getItem ( ) = this
2225 }
23-
2426}
2527
2628/** A kind of "taint", representing a sequence, with a "taint" member */
2729class ExternalStringSequenceKind extends SequenceKind {
28-
29- ExternalStringSequenceKind ( ) {
30- this .getItem ( ) instanceof ExternalStringKind
31- }
32-
30+ ExternalStringSequenceKind ( ) { this .getItem ( ) instanceof ExternalStringKind }
3331}
3432
35- /** An hierachical dictionary or list where the entire structure is externally controlled
33+ /**
34+ * An hierachical dictionary or list where the entire structure is externally controlled
3635 * This is typically a parsed JSON object.
3736 */
3837class ExternalJsonKind extends TaintKind {
39-
40- ExternalJsonKind ( ) {
41- this = "json[" + any ( ExternalStringKind key ) + "]"
42- }
43-
38+ ExternalJsonKind ( ) { this = "json[" + any ( ExternalStringKind key ) + "]" }
4439
4540 /** Gets the taint kind for item in this sequence */
4641 TaintKind getValue ( ) {
@@ -54,65 +49,225 @@ class ExternalJsonKind extends TaintKind {
5449 json_subscript_taint ( tonode , fromnode , this , result )
5550 or
5651 result = this and copy_call ( fromnode , tonode )
57- }
52+ }
5853
5954 override TaintKind getTaintOfMethodResult ( string name ) {
6055 name = "get" and result = this .getValue ( )
61- }
62-
56+ }
6357}
6458
6559/** A kind of "taint", representing a dictionary mapping str->"taint" */
6660class ExternalStringDictKind extends DictKind {
61+ ExternalStringDictKind ( ) { this .getValue ( ) instanceof ExternalStringKind }
62+ }
6763
68- ExternalStringDictKind ( ) {
69- this .getValue ( ) instanceof ExternalStringKind
64+ /**
65+ * A kind of "taint", representing a dictionary mapping strings to sequences of
66+ * tainted strings
67+ */
68+ class ExternalStringSequenceDictKind extends DictKind {
69+ ExternalStringSequenceDictKind ( ) { this .getValue ( ) instanceof ExternalStringSequenceKind }
70+ }
71+
72+ /** TaintKind for the result of `urlsplit(tainted_string)` */
73+ class ExternalUrlSplitResult extends ExternalStringSequenceKind {
74+ // https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlsplit
75+ override TaintKind getTaintOfAttribute ( string name ) {
76+ result = super .getTaintOfAttribute ( name )
77+ or
78+ (
79+ // namedtuple field names
80+ name = "scheme" or
81+ name = "netloc" or
82+ name = "path" or
83+ name = "query" or
84+ name = "fragment" or
85+ // class methods
86+ name = "username" or
87+ name = "password" or
88+ name = "hostname"
89+ ) and
90+ result instanceof ExternalStringKind
7091 }
7192
93+ override TaintKind getTaintOfMethodResult ( string name ) {
94+ result = super .getTaintOfMethodResult ( name )
95+ or
96+ name = "geturl" and
97+ result instanceof ExternalStringKind
98+ }
7299}
73100
74- /** A kind of "taint", representing a dictionary mapping strings to sequences of
75- * tainted strings */
101+ /** TaintKind for the result of `urlparse(tainted_string)` */
102+ class ExternalUrlParseResult extends ExternalStringSequenceKind {
103+ // https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlparse
104+ override TaintKind getTaintOfAttribute ( string name ) {
105+ result = super .getTaintOfAttribute ( name )
106+ or
107+ (
108+ // namedtuple field names
109+ name = "scheme" or
110+ name = "netloc" or
111+ name = "path" or
112+ name = "params" or
113+ name = "query" or
114+ name = "fragment" or
115+ // class methods
116+ name = "username" or
117+ name = "password" or
118+ name = "hostname"
119+ ) and
120+ result instanceof ExternalStringKind
121+ }
76122
77- class ExternalStringSequenceDictKind extends DictKind {
78- ExternalStringSequenceDictKind ( ) {
79- this .getValue ( ) instanceof ExternalStringSequenceKind
123+ override TaintKind getTaintOfMethodResult ( string name ) {
124+ result = super .getTaintOfMethodResult ( name )
125+ or
126+ name = "geturl" and
127+ result instanceof ExternalStringKind
80128 }
81129}
82130
83131/* Helper for getTaintForStep() */
84- pragma [ noinline]
85- private predicate json_subscript_taint ( SubscriptNode sub , ControlFlowNode obj , ExternalJsonKind seq , TaintKind key ) {
132+ pragma [ noinline]
133+ private predicate json_subscript_taint (
134+ SubscriptNode sub , ControlFlowNode obj , ExternalJsonKind seq , TaintKind key
135+ ) {
86136 sub .isLoad ( ) and
87137 sub .getValue ( ) = obj and
88138 key = seq .getValue ( )
89139}
90140
91-
92141private predicate json_load ( ControlFlowNode fromnode , CallNode tonode ) {
93142 exists ( FunctionObject json_loads |
94143 ModuleObject:: named ( "json" ) .attr ( "loads" ) = json_loads and
95- json_loads .getACall ( ) = tonode and tonode .getArg ( 0 ) = fromnode
144+ json_loads .getACall ( ) = tonode and
145+ tonode .getArg ( 0 ) = fromnode
96146 )
97147}
98148
99- /** A kind of "taint", representing an open file-like object from an external source. */
100- class ExternalFileObject extends TaintKind {
149+ private predicate urlsplit ( ControlFlowNode fromnode , CallNode tonode ) {
150+ // This could be implemented as `exists(FunctionValue` without the explicit six part,
151+ // but then our tests will need to import +100 modules, so for now this slightly
152+ // altered version gets to live on.
153+ exists ( Value urlsplit |
154+ (
155+ urlsplit = Value:: named ( "six.moves.urllib.parse.urlsplit" )
156+ or
157+ // Python 2
158+ urlsplit = Value:: named ( "urlparse.urlsplit" )
159+ or
160+ // Python 3
161+ urlsplit = Value:: named ( "urllib.parse.urlsplit" )
162+ ) and
163+ tonode = urlsplit .getACall ( ) and
164+ tonode .getArg ( 0 ) = fromnode
165+ )
166+ }
101167
102- ExternalFileObject ( ) {
103- this = "file[" + any ( ExternalStringKind key ) + "]"
104- }
168+ private predicate urlparse ( ControlFlowNode fromnode , CallNode tonode ) {
169+ // This could be implemented as `exists(FunctionValue` without the explicit six part,
170+ // but then our tests will need to import +100 modules, so for now this slightly
171+ // altered version gets to live on.
172+ exists ( Value urlparse |
173+ (
174+ urlparse = Value:: named ( "six.moves.urllib.parse.urlparse" )
175+ or
176+ // Python 2
177+ urlparse = Value:: named ( "urlparse.urlparse" )
178+ or
179+ // Python 3
180+ urlparse = Value:: named ( "urllib.parse.urlparse" )
181+ ) and
182+ tonode = urlparse .getACall ( ) and
183+ tonode .getArg ( 0 ) = fromnode
184+ )
185+ }
105186
187+ /** A kind of "taint", representing an open file-like object from an external source. */
188+ class ExternalFileObject extends TaintKind {
189+ ExternalFileObject ( ) { this = "file[" + any ( ExternalStringKind key ) + "]" }
106190
107191 /** Gets the taint kind for the contents of this file */
108- TaintKind getValue ( ) {
109- this = "file[" + result + "]"
110- }
192+ TaintKind getValue ( ) { this = "file[" + result + "]" }
111193
112194 override TaintKind getTaintOfMethodResult ( string name ) {
113195 name = "read" and result = this .getValue ( )
114196 }
115-
116197}
117198
199+ /**
200+ * Temporary sanitizer for the tainted result from `urlsplit` and `urlparse`. Can be used to reduce FPs until
201+ * we have better support for namedtuples.
202+ *
203+ * Will clear **all** taint on a test of the kind. That is, on the true edge of any matching test,
204+ * all fields/indexes will be cleared of taint.
205+ *
206+ * Handles:
207+ * - `if splitres.netloc == "KNOWN_VALUE"`
208+ * - `if splitres[0] == "KNOWN_VALUE"`
209+ */
210+ class UrlsplitUrlparseTempSanitizer extends Sanitizer {
211+ // TODO: remove this once we have better support for named tuples
212+
213+ UrlsplitUrlparseTempSanitizer ( ) { this = "UrlsplitUrlparseTempSanitizer" }
118214
215+ override predicate sanitizingEdge ( TaintKind taint , PyEdgeRefinement test ) {
216+ (
217+ taint instanceof ExternalUrlSplitResult
218+ or
219+ taint instanceof ExternalUrlParseResult
220+ ) and
221+ exists ( ControlFlowNode full_use |
222+ full_use .( SubscriptNode ) .getObject ( ) = test .getInput ( ) .getAUse ( )
223+ or
224+ full_use .( AttrNode ) .getObject ( ) = test .getInput ( ) .getAUse ( )
225+ |
226+ clears_taint ( _, full_use , test .getTest ( ) , test .getSense ( ) )
227+ )
228+ }
229+
230+ private predicate clears_taint ( ControlFlowNode final_test , ControlFlowNode tainted , ControlFlowNode test , boolean sense ) {
231+ test_equality_with_const ( final_test , tainted , sense )
232+ or
233+ test_in_const_seq ( final_test , tainted , sense )
234+ or
235+ test .( UnaryExprNode ) .getNode ( ) .getOp ( ) instanceof Not and
236+ exists ( ControlFlowNode nested_test |
237+ nested_test = test .( UnaryExprNode ) .getOperand ( ) and
238+ clears_taint ( final_test , tainted , nested_test , sense .booleanNot ( ) )
239+ )
240+ }
241+
242+ /** holds for `== "KNOWN_VALUE"` on `true` edge, and `!= "KNOWN_VALUE"` on `false` edge */
243+ private predicate test_equality_with_const ( CompareNode cmp , ControlFlowNode tainted , boolean sense ) {
244+ exists ( ControlFlowNode const , Cmpop op |
245+ const .getNode ( ) instanceof StrConst
246+ |
247+ (
248+ cmp .operands ( const , op , tainted )
249+ or
250+ cmp .operands ( tainted , op , const )
251+ ) and
252+ (
253+ op instanceof Eq and sense = true
254+ or
255+ op instanceof NotEq and sense = false
256+ )
257+ )
258+ }
259+
260+ /** holds for `in ["KNOWN_VALUE", ...]` on `true` edge, and `not in ["KNOWN_VALUE", ...]` on `false` edge */
261+ private predicate test_in_const_seq ( CompareNode cmp , ControlFlowNode tainted , boolean sense ) {
262+ exists ( SequenceNode const_seq , Cmpop op |
263+ forall ( ControlFlowNode elem | elem = const_seq .getAnElement ( ) | elem .getNode ( ) instanceof StrConst )
264+ |
265+ cmp .operands ( tainted , op , const_seq ) and
266+ (
267+ op instanceof In and sense = true
268+ or
269+ op instanceof NotIn and sense = false
270+ )
271+ )
272+ }
273+ }
0 commit comments