1+ import copy
12import functools
23import sys
34import unicodedata
45import unittest
56import urllib .parse
6- from urllib .parse import urlparse , urlsplit , urlunparse , urlunsplit
7+ from urllib .parse import urldefrag , urlparse , urlsplit , urlunparse , urlunsplit
78
89RFC1808_BASE = "http://a/b/c/d;p?q#f"
910RFC2396_BASE = "http://a/b/c/d;p?q"
@@ -391,14 +392,14 @@ def checkJoin(self, base, relurl, expected, *, relroundtrip=True):
391392 self .assertEqual (urllib .parse .urljoin (baseb , relurlb ), expectedb )
392393
393394 if relroundtrip :
394- relurl2 = urllib . parse . urlunsplit (urllib . parse . urlsplit (relurl ))
395+ relurl2 = urlunsplit (urlsplit (relurl ))
395396 self .assertEqual (urllib .parse .urljoin (base , relurl2 ), expected )
396- relurlb2 = urllib . parse . urlunsplit (urllib . parse . urlsplit (relurlb ))
397+ relurlb2 = urlunsplit (urlsplit (relurlb ))
397398 self .assertEqual (urllib .parse .urljoin (baseb , relurlb2 ), expectedb )
398399
399- relurl3 = urllib . parse . urlunsplit (urllib . parse . urlsplit (relurl , allow_none = True ))
400+ relurl3 = urlunsplit (urlsplit (relurl , allow_none = True ))
400401 self .assertEqual (urllib .parse .urljoin (base , relurl3 ), expected )
401- relurlb3 = urllib . parse . urlunsplit (urllib . parse . urlsplit (relurlb , allow_none = True ))
402+ relurlb3 = urlunsplit (urlsplit (relurlb , allow_none = True ))
402403 self .assertEqual (urllib .parse .urljoin (baseb , relurlb3 ), expectedb )
403404
404405 def test_unparse_parse (self ):
@@ -458,9 +459,9 @@ def test_RFC1808(self):
458459
459460 def test_RFC2368 (self ):
460461 # Issue 11467: path that starts with a number is not parsed correctly
461- self .assertEqual (urllib . parse . urlparse ('mailto:1337@example.org' ),
462+ self .assertEqual (urlparse ('mailto:1337@example.org' ),
462463 ('mailto' , '' , '1337@example.org' , '' , '' , '' ))
463- self .assertEqual (urllib . parse . urlparse ('mailto:1337@example.org' , allow_none = True ),
464+ self .assertEqual (urlparse ('mailto:1337@example.org' , allow_none = True ),
464465 ('mailto' , None , '1337@example.org' , None , None , None ))
465466
466467 def test_RFC2396 (self ):
@@ -1119,50 +1120,50 @@ def test_withoutscheme(self, allow_none):
11191120 # RFC 1808 specifies that netloc should start with //, urlparse expects
11201121 # the same, otherwise it classifies the portion of url as path.
11211122 none = None if allow_none else ''
1122- self .assertEqual (urllib . parse . urlparse ("path" , allow_none = allow_none ),
1123+ self .assertEqual (urlparse ("path" , allow_none = allow_none ),
11231124 (none , none , 'path' , none , none , none ))
1124- self .assertEqual (urllib . parse . urlparse ("//www.python.org:80" , allow_none = allow_none ),
1125+ self .assertEqual (urlparse ("//www.python.org:80" , allow_none = allow_none ),
11251126 (none , 'www.python.org:80' , '' , none , none , none ))
1126- self .assertEqual (urllib . parse . urlparse ("http://www.python.org:80" , allow_none = allow_none ),
1127+ self .assertEqual (urlparse ("http://www.python.org:80" , allow_none = allow_none ),
11271128 ('http' , 'www.python.org:80' , '' , none , none , none ))
11281129 # Repeat for bytes input
11291130 none = None if allow_none else b''
1130- self .assertEqual (urllib . parse . urlparse (b"path" , allow_none = allow_none ),
1131+ self .assertEqual (urlparse (b"path" , allow_none = allow_none ),
11311132 (none , none , b'path' , none , none , none ))
1132- self .assertEqual (urllib . parse . urlparse (b"//www.python.org:80" , allow_none = allow_none ),
1133+ self .assertEqual (urlparse (b"//www.python.org:80" , allow_none = allow_none ),
11331134 (none , b'www.python.org:80' , b'' , none , none , none ))
1134- self .assertEqual (urllib . parse . urlparse (b"http://www.python.org:80" , allow_none = allow_none ),
1135+ self .assertEqual (urlparse (b"http://www.python.org:80" , allow_none = allow_none ),
11351136 (b'http' , b'www.python.org:80' , b'' , none , none , none ))
11361137
11371138 @parametrise_allow_none
11381139 def test_portseparator (self , allow_none ):
11391140 # Issue 754016 makes changes for port separator ':' from scheme separator
11401141 none = None if allow_none else ''
1141- self .assertEqual (urllib . parse . urlparse ("http:80" , allow_none = allow_none ),
1142+ self .assertEqual (urlparse ("http:80" , allow_none = allow_none ),
11421143 ('http' , none , '80' , none , none , none ))
1143- self .assertEqual (urllib . parse . urlparse ("https:80" , allow_none = allow_none ),
1144+ self .assertEqual (urlparse ("https:80" , allow_none = allow_none ),
11441145 ('https' , none , '80' , none , none , none ))
1145- self .assertEqual (urllib . parse . urlparse ("path:80" , allow_none = allow_none ),
1146+ self .assertEqual (urlparse ("path:80" , allow_none = allow_none ),
11461147 ('path' , none , '80' , none , none , none ))
1147- self .assertEqual (urllib . parse . urlparse ("http:" , allow_none = allow_none ),
1148+ self .assertEqual (urlparse ("http:" , allow_none = allow_none ),
11481149 ('http' , none , '' , none , none , none ))
1149- self .assertEqual (urllib . parse . urlparse ("https:" , allow_none = allow_none ),
1150+ self .assertEqual (urlparse ("https:" , allow_none = allow_none ),
11501151 ('https' , none , '' , none , none , none ))
1151- self .assertEqual (urllib . parse . urlparse ("http://www.python.org:80" , allow_none = allow_none ),
1152+ self .assertEqual (urlparse ("http://www.python.org:80" , allow_none = allow_none ),
11521153 ('http' , 'www.python.org:80' , '' , none , none , none ))
11531154 # As usual, need to check bytes input as well
11541155 none = None if allow_none else b''
1155- self .assertEqual (urllib . parse . urlparse (b"http:80" , allow_none = allow_none ),
1156+ self .assertEqual (urlparse (b"http:80" , allow_none = allow_none ),
11561157 (b'http' , none , b'80' , none , none , none ))
1157- self .assertEqual (urllib . parse . urlparse (b"https:80" , allow_none = allow_none ),
1158+ self .assertEqual (urlparse (b"https:80" , allow_none = allow_none ),
11581159 (b'https' , none , b'80' , none , none , none ))
1159- self .assertEqual (urllib . parse . urlparse (b"path:80" , allow_none = allow_none ),
1160+ self .assertEqual (urlparse (b"path:80" , allow_none = allow_none ),
11601161 (b'path' , none , b'80' , none , none , none ))
1161- self .assertEqual (urllib . parse . urlparse (b"http:" , allow_none = allow_none ),
1162+ self .assertEqual (urlparse (b"http:" , allow_none = allow_none ),
11621163 (b'http' , none , b'' , none , none , none ))
1163- self .assertEqual (urllib . parse . urlparse (b"https:" , allow_none = allow_none ),
1164+ self .assertEqual (urlparse (b"https:" , allow_none = allow_none ),
11641165 (b'https' , none , b'' , none , none , none ))
1165- self .assertEqual (urllib . parse . urlparse (b"http://www.python.org:80" , allow_none = allow_none ),
1166+ self .assertEqual (urlparse (b"http://www.python.org:80" , allow_none = allow_none ),
11661167 (b'http' , b'www.python.org:80' , b'' , none , none , none ))
11671168
11681169 def test_usingsys (self ):
@@ -1173,24 +1174,24 @@ def test_usingsys(self):
11731174 def test_anyscheme (self , allow_none ):
11741175 # Issue 7904: s3://foo.com/stuff has netloc "foo.com".
11751176 none = None if allow_none else ''
1176- self .assertEqual (urllib . parse . urlparse ("s3://foo.com/stuff" , allow_none = allow_none ),
1177+ self .assertEqual (urlparse ("s3://foo.com/stuff" , allow_none = allow_none ),
11771178 ('s3' , 'foo.com' , '/stuff' , none , none , none ))
1178- self .assertEqual (urllib . parse . urlparse ("x-newscheme://foo.com/stuff" , allow_none = allow_none ),
1179+ self .assertEqual (urlparse ("x-newscheme://foo.com/stuff" , allow_none = allow_none ),
11791180 ('x-newscheme' , 'foo.com' , '/stuff' , none , none , none ))
1180- self .assertEqual (urllib . parse . urlparse ("x-newscheme://foo.com/stuff?query#fragment" , allow_none = allow_none ),
1181+ self .assertEqual (urlparse ("x-newscheme://foo.com/stuff?query#fragment" , allow_none = allow_none ),
11811182 ('x-newscheme' , 'foo.com' , '/stuff' , none , 'query' , 'fragment' ))
1182- self .assertEqual (urllib . parse . urlparse ("x-newscheme://foo.com/stuff?query" , allow_none = allow_none ),
1183+ self .assertEqual (urlparse ("x-newscheme://foo.com/stuff?query" , allow_none = allow_none ),
11831184 ('x-newscheme' , 'foo.com' , '/stuff' , none , 'query' , none ))
11841185
11851186 # And for bytes...
11861187 none = None if allow_none else b''
1187- self .assertEqual (urllib . parse . urlparse (b"s3://foo.com/stuff" , allow_none = allow_none ),
1188+ self .assertEqual (urlparse (b"s3://foo.com/stuff" , allow_none = allow_none ),
11881189 (b's3' , b'foo.com' , b'/stuff' , none , none , none ))
1189- self .assertEqual (urllib . parse . urlparse (b"x-newscheme://foo.com/stuff" , allow_none = allow_none ),
1190+ self .assertEqual (urlparse (b"x-newscheme://foo.com/stuff" , allow_none = allow_none ),
11901191 (b'x-newscheme' , b'foo.com' , b'/stuff' , none , none , none ))
1191- self .assertEqual (urllib . parse . urlparse (b"x-newscheme://foo.com/stuff?query#fragment" , allow_none = allow_none ),
1192+ self .assertEqual (urlparse (b"x-newscheme://foo.com/stuff?query#fragment" , allow_none = allow_none ),
11921193 (b'x-newscheme' , b'foo.com' , b'/stuff' , none , b'query' , b'fragment' ))
1193- self .assertEqual (urllib . parse . urlparse (b"x-newscheme://foo.com/stuff?query" , allow_none = allow_none ),
1194+ self .assertEqual (urlparse (b"x-newscheme://foo.com/stuff?query" , allow_none = allow_none ),
11941195 (b'x-newscheme' , b'foo.com' , b'/stuff' , none , b'query' , none ))
11951196
11961197 def test_default_scheme (self ):
@@ -1274,12 +1275,10 @@ def test_mixed_types_rejected(self):
12741275 with self .assertRaisesRegex (TypeError , "Cannot mix str" ):
12751276 urllib .parse .urljoin (b"http://python.org" , "http://python.org" )
12761277
1277- def _check_result_type (self , str_type ):
1278- num_args = len (str_type ._fields )
1278+ def _check_result_type (self , str_type , str_args ):
12791279 bytes_type = str_type ._encoded_counterpart
12801280 self .assertIs (bytes_type ._decoded_counterpart , str_type )
1281- str_args = ('' ,) * num_args
1282- bytes_args = (b'' ,) * num_args
1281+ bytes_args = tuple (self ._encode (s ) for s in str_args )
12831282 str_result = str_type (* str_args )
12841283 bytes_result = bytes_type (* bytes_args )
12851284 encoding = 'ascii'
@@ -1298,16 +1297,52 @@ def _check_result_type(self, str_type):
12981297 self .assertEqual (str_result .encode (encoding ), bytes_result )
12991298 self .assertEqual (str_result .encode (encoding , errors ), bytes_args )
13001299 self .assertEqual (str_result .encode (encoding , errors ), bytes_result )
1300+ for result in str_result , bytes_result :
1301+ self .assertEqual (copy .copy (result ), result )
1302+ self .assertEqual (copy .deepcopy (result ), result )
1303+ self .assertEqual (copy .replace (result ), result )
1304+ self .assertEqual (result ._replace (), result )
13011305
13021306 def test_result_pairs (self ):
13031307 # Check encoding and decoding between result pairs
1304- result_types = [
1305- urllib .parse .DefragResult ,
1306- urllib .parse .SplitResult ,
1307- urllib .parse .ParseResult ,
1308- ]
1309- for result_type in result_types :
1310- self ._check_result_type (result_type )
1308+ self ._check_result_type (urllib .parse .DefragResult , ('' , '' ))
1309+ self ._check_result_type (urllib .parse .DefragResult , ('' , None ))
1310+ self ._check_result_type (urllib .parse .SplitResult , ('' , '' , '' , '' , '' ))
1311+ self ._check_result_type (urllib .parse .SplitResult , (None , None , '' , None , None ))
1312+ self ._check_result_type (urllib .parse .ParseResult , ('' , '' , '' , '' , '' , '' ))
1313+ self ._check_result_type (urllib .parse .ParseResult , (None , None , '' , None , None , None ))
1314+
1315+ def test_result_encoding_decoding (self ):
1316+ def check (str_result , bytes_result ):
1317+ self .assertEqual (str_result .encode (), bytes_result )
1318+ self .assertEqual (str_result .encode ().geturl (), bytes_result .geturl ())
1319+ self .assertEqual (bytes_result .decode (), str_result )
1320+ self .assertEqual (bytes_result .decode ().geturl (), str_result .geturl ())
1321+
1322+ url = 'http://example.com/?#'
1323+ burl = url .encode ()
1324+ for func in urldefrag , urlsplit , urlparse :
1325+ check (func (url , allow_none = True ), func (burl , allow_none = True ))
1326+ check (func (url ), func (burl ))
1327+
1328+ def test_result_copying (self ):
1329+ def check (result ):
1330+ self .assertEqual (copy .copy (result ), result )
1331+ self .assertEqual (copy .copy (result ).geturl (), result .geturl ())
1332+ self .assertEqual (copy .deepcopy (result ), result )
1333+ self .assertEqual (copy .deepcopy (result ).geturl (), result .geturl ())
1334+ self .assertEqual (copy .replace (result ), result )
1335+ self .assertEqual (copy .replace (result ).geturl (), result .geturl ())
1336+ self .assertEqual (result ._replace (), result )
1337+ self .assertEqual (result ._replace ().geturl (), result .geturl ())
1338+
1339+ url = 'http://example.com/?#'
1340+ burl = url .encode ()
1341+ for func in urldefrag , urlsplit , urlparse :
1342+ check (func (url ))
1343+ check (func (url , allow_none = True ))
1344+ check (func (burl ))
1345+ check (func (burl , allow_none = True ))
13111346
13121347 def test_parse_qs_encoding (self ):
13131348 result = urllib .parse .parse_qs ("key=\u0141 %E9" , encoding = "latin-1" )
0 commit comments