Skip to content

Commit e5c31dd

Browse files
Preserve _keep_empty in copying and encoding.
1 parent eaa9ce6 commit e5c31dd

File tree

2 files changed

+118
-49
lines changed

2 files changed

+118
-49
lines changed

Lib/test/test_urlparse.py

Lines changed: 79 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1+
import copy
12
import functools
23
import sys
34
import unicodedata
45
import unittest
56
import urllib.parse
6-
from urllib.parse import urlparse, urlsplit, urlunparse, urlunsplit
7+
from urllib.parse import urldefrag, urlparse, urlsplit, urlunparse, urlunsplit
78

89
RFC1808_BASE = "http://a/b/c/d;p?q#f"
910
RFC2396_BASE = "http://a/b/c/d;p?q"
@@ -391,14 +392,14 @@ def checkJoin(self, base, relurl, expected, *, relroundtrip=True):
391392
self.assertEqual(urllib.parse.urljoin(baseb, relurlb), expectedb)
392393

393394
if relroundtrip:
394-
relurl2 = urllib.parse.urlunsplit(urllib.parse.urlsplit(relurl))
395+
relurl2 = urlunsplit(urlsplit(relurl))
395396
self.assertEqual(urllib.parse.urljoin(base, relurl2), expected)
396-
relurlb2 = urllib.parse.urlunsplit(urllib.parse.urlsplit(relurlb))
397+
relurlb2 = urlunsplit(urlsplit(relurlb))
397398
self.assertEqual(urllib.parse.urljoin(baseb, relurlb2), expectedb)
398399

399-
relurl3 = urllib.parse.urlunsplit(urllib.parse.urlsplit(relurl, allow_none=True))
400+
relurl3 = urlunsplit(urlsplit(relurl, allow_none=True))
400401
self.assertEqual(urllib.parse.urljoin(base, relurl3), expected)
401-
relurlb3 = urllib.parse.urlunsplit(urllib.parse.urlsplit(relurlb, allow_none=True))
402+
relurlb3 = urlunsplit(urlsplit(relurlb, allow_none=True))
402403
self.assertEqual(urllib.parse.urljoin(baseb, relurlb3), expectedb)
403404

404405
def test_unparse_parse(self):
@@ -458,9 +459,9 @@ def test_RFC1808(self):
458459

459460
def test_RFC2368(self):
460461
# Issue 11467: path that starts with a number is not parsed correctly
461-
self.assertEqual(urllib.parse.urlparse('mailto:1337@example.org'),
462+
self.assertEqual(urlparse('mailto:1337@example.org'),
462463
('mailto', '', '1337@example.org', '', '', ''))
463-
self.assertEqual(urllib.parse.urlparse('mailto:1337@example.org', allow_none=True),
464+
self.assertEqual(urlparse('mailto:1337@example.org', allow_none=True),
464465
('mailto', None, '1337@example.org', None, None, None))
465466

466467
def test_RFC2396(self):
@@ -1119,50 +1120,50 @@ def test_withoutscheme(self, allow_none):
11191120
# RFC 1808 specifies that netloc should start with //, urlparse expects
11201121
# the same, otherwise it classifies the portion of url as path.
11211122
none = None if allow_none else ''
1122-
self.assertEqual(urllib.parse.urlparse("path", allow_none=allow_none),
1123+
self.assertEqual(urlparse("path", allow_none=allow_none),
11231124
(none, none, 'path', none, none, none))
1124-
self.assertEqual(urllib.parse.urlparse("//www.python.org:80", allow_none=allow_none),
1125+
self.assertEqual(urlparse("//www.python.org:80", allow_none=allow_none),
11251126
(none, 'www.python.org:80', '', none, none, none))
1126-
self.assertEqual(urllib.parse.urlparse("http://www.python.org:80", allow_none=allow_none),
1127+
self.assertEqual(urlparse("http://www.python.org:80", allow_none=allow_none),
11271128
('http', 'www.python.org:80', '', none, none, none))
11281129
# Repeat for bytes input
11291130
none = None if allow_none else b''
1130-
self.assertEqual(urllib.parse.urlparse(b"path", allow_none=allow_none),
1131+
self.assertEqual(urlparse(b"path", allow_none=allow_none),
11311132
(none, none, b'path', none, none, none))
1132-
self.assertEqual(urllib.parse.urlparse(b"//www.python.org:80", allow_none=allow_none),
1133+
self.assertEqual(urlparse(b"//www.python.org:80", allow_none=allow_none),
11331134
(none, b'www.python.org:80', b'', none, none, none))
1134-
self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80", allow_none=allow_none),
1135+
self.assertEqual(urlparse(b"http://www.python.org:80", allow_none=allow_none),
11351136
(b'http', b'www.python.org:80', b'', none, none, none))
11361137

11371138
@parametrise_allow_none
11381139
def test_portseparator(self, allow_none):
11391140
# Issue 754016 makes changes for port separator ':' from scheme separator
11401141
none = None if allow_none else ''
1141-
self.assertEqual(urllib.parse.urlparse("http:80", allow_none=allow_none),
1142+
self.assertEqual(urlparse("http:80", allow_none=allow_none),
11421143
('http', none, '80', none, none, none))
1143-
self.assertEqual(urllib.parse.urlparse("https:80", allow_none=allow_none),
1144+
self.assertEqual(urlparse("https:80", allow_none=allow_none),
11441145
('https', none, '80', none, none, none))
1145-
self.assertEqual(urllib.parse.urlparse("path:80", allow_none=allow_none),
1146+
self.assertEqual(urlparse("path:80", allow_none=allow_none),
11461147
('path', none, '80', none, none, none))
1147-
self.assertEqual(urllib.parse.urlparse("http:", allow_none=allow_none),
1148+
self.assertEqual(urlparse("http:", allow_none=allow_none),
11481149
('http', none, '', none, none, none))
1149-
self.assertEqual(urllib.parse.urlparse("https:", allow_none=allow_none),
1150+
self.assertEqual(urlparse("https:", allow_none=allow_none),
11501151
('https', none, '', none, none, none))
1151-
self.assertEqual(urllib.parse.urlparse("http://www.python.org:80", allow_none=allow_none),
1152+
self.assertEqual(urlparse("http://www.python.org:80", allow_none=allow_none),
11521153
('http', 'www.python.org:80', '', none, none, none))
11531154
# As usual, need to check bytes input as well
11541155
none = None if allow_none else b''
1155-
self.assertEqual(urllib.parse.urlparse(b"http:80", allow_none=allow_none),
1156+
self.assertEqual(urlparse(b"http:80", allow_none=allow_none),
11561157
(b'http', none, b'80', none, none, none))
1157-
self.assertEqual(urllib.parse.urlparse(b"https:80", allow_none=allow_none),
1158+
self.assertEqual(urlparse(b"https:80", allow_none=allow_none),
11581159
(b'https', none, b'80', none, none, none))
1159-
self.assertEqual(urllib.parse.urlparse(b"path:80", allow_none=allow_none),
1160+
self.assertEqual(urlparse(b"path:80", allow_none=allow_none),
11601161
(b'path', none, b'80', none, none, none))
1161-
self.assertEqual(urllib.parse.urlparse(b"http:", allow_none=allow_none),
1162+
self.assertEqual(urlparse(b"http:", allow_none=allow_none),
11621163
(b'http', none, b'', none, none, none))
1163-
self.assertEqual(urllib.parse.urlparse(b"https:", allow_none=allow_none),
1164+
self.assertEqual(urlparse(b"https:", allow_none=allow_none),
11641165
(b'https', none, b'', none, none, none))
1165-
self.assertEqual(urllib.parse.urlparse(b"http://www.python.org:80", allow_none=allow_none),
1166+
self.assertEqual(urlparse(b"http://www.python.org:80", allow_none=allow_none),
11661167
(b'http', b'www.python.org:80', b'', none, none, none))
11671168

11681169
def test_usingsys(self):
@@ -1173,24 +1174,24 @@ def test_usingsys(self):
11731174
def test_anyscheme(self, allow_none):
11741175
# Issue 7904: s3://foo.com/stuff has netloc "foo.com".
11751176
none = None if allow_none else ''
1176-
self.assertEqual(urllib.parse.urlparse("s3://foo.com/stuff", allow_none=allow_none),
1177+
self.assertEqual(urlparse("s3://foo.com/stuff", allow_none=allow_none),
11771178
('s3', 'foo.com', '/stuff', none, none, none))
1178-
self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff", allow_none=allow_none),
1179+
self.assertEqual(urlparse("x-newscheme://foo.com/stuff", allow_none=allow_none),
11791180
('x-newscheme', 'foo.com', '/stuff', none, none, none))
1180-
self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query#fragment", allow_none=allow_none),
1181+
self.assertEqual(urlparse("x-newscheme://foo.com/stuff?query#fragment", allow_none=allow_none),
11811182
('x-newscheme', 'foo.com', '/stuff', none, 'query', 'fragment'))
1182-
self.assertEqual(urllib.parse.urlparse("x-newscheme://foo.com/stuff?query", allow_none=allow_none),
1183+
self.assertEqual(urlparse("x-newscheme://foo.com/stuff?query", allow_none=allow_none),
11831184
('x-newscheme', 'foo.com', '/stuff', none, 'query', none))
11841185

11851186
# And for bytes...
11861187
none = None if allow_none else b''
1187-
self.assertEqual(urllib.parse.urlparse(b"s3://foo.com/stuff", allow_none=allow_none),
1188+
self.assertEqual(urlparse(b"s3://foo.com/stuff", allow_none=allow_none),
11881189
(b's3', b'foo.com', b'/stuff', none, none, none))
1189-
self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff", allow_none=allow_none),
1190+
self.assertEqual(urlparse(b"x-newscheme://foo.com/stuff", allow_none=allow_none),
11901191
(b'x-newscheme', b'foo.com', b'/stuff', none, none, none))
1191-
self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query#fragment", allow_none=allow_none),
1192+
self.assertEqual(urlparse(b"x-newscheme://foo.com/stuff?query#fragment", allow_none=allow_none),
11921193
(b'x-newscheme', b'foo.com', b'/stuff', none, b'query', b'fragment'))
1193-
self.assertEqual(urllib.parse.urlparse(b"x-newscheme://foo.com/stuff?query", allow_none=allow_none),
1194+
self.assertEqual(urlparse(b"x-newscheme://foo.com/stuff?query", allow_none=allow_none),
11941195
(b'x-newscheme', b'foo.com', b'/stuff', none, b'query', none))
11951196

11961197
def test_default_scheme(self):
@@ -1274,12 +1275,10 @@ def test_mixed_types_rejected(self):
12741275
with self.assertRaisesRegex(TypeError, "Cannot mix str"):
12751276
urllib.parse.urljoin(b"http://python.org", "http://python.org")
12761277

1277-
def _check_result_type(self, str_type):
1278-
num_args = len(str_type._fields)
1278+
def _check_result_type(self, str_type, str_args):
12791279
bytes_type = str_type._encoded_counterpart
12801280
self.assertIs(bytes_type._decoded_counterpart, str_type)
1281-
str_args = ('',) * num_args
1282-
bytes_args = (b'',) * num_args
1281+
bytes_args = tuple(self._encode(s) for s in str_args)
12831282
str_result = str_type(*str_args)
12841283
bytes_result = bytes_type(*bytes_args)
12851284
encoding = 'ascii'
@@ -1298,16 +1297,52 @@ def _check_result_type(self, str_type):
12981297
self.assertEqual(str_result.encode(encoding), bytes_result)
12991298
self.assertEqual(str_result.encode(encoding, errors), bytes_args)
13001299
self.assertEqual(str_result.encode(encoding, errors), bytes_result)
1300+
for result in str_result, bytes_result:
1301+
self.assertEqual(copy.copy(result), result)
1302+
self.assertEqual(copy.deepcopy(result), result)
1303+
self.assertEqual(copy.replace(result), result)
1304+
self.assertEqual(result._replace(), result)
13011305

13021306
def test_result_pairs(self):
13031307
# Check encoding and decoding between result pairs
1304-
result_types = [
1305-
urllib.parse.DefragResult,
1306-
urllib.parse.SplitResult,
1307-
urllib.parse.ParseResult,
1308-
]
1309-
for result_type in result_types:
1310-
self._check_result_type(result_type)
1308+
self._check_result_type(urllib.parse.DefragResult, ('', ''))
1309+
self._check_result_type(urllib.parse.DefragResult, ('', None))
1310+
self._check_result_type(urllib.parse.SplitResult, ('', '', '', '', ''))
1311+
self._check_result_type(urllib.parse.SplitResult, (None, None, '', None, None))
1312+
self._check_result_type(urllib.parse.ParseResult, ('', '', '', '', '', ''))
1313+
self._check_result_type(urllib.parse.ParseResult, (None, None, '', None, None, None))
1314+
1315+
def test_result_encoding_decoding(self):
1316+
def check(str_result, bytes_result):
1317+
self.assertEqual(str_result.encode(), bytes_result)
1318+
self.assertEqual(str_result.encode().geturl(), bytes_result.geturl())
1319+
self.assertEqual(bytes_result.decode(), str_result)
1320+
self.assertEqual(bytes_result.decode().geturl(), str_result.geturl())
1321+
1322+
url = 'http://example.com/?#'
1323+
burl = url.encode()
1324+
for func in urldefrag, urlsplit, urlparse:
1325+
check(func(url, allow_none=True), func(burl, allow_none=True))
1326+
check(func(url), func(burl))
1327+
1328+
def test_result_copying(self):
1329+
def check(result):
1330+
self.assertEqual(copy.copy(result), result)
1331+
self.assertEqual(copy.copy(result).geturl(), result.geturl())
1332+
self.assertEqual(copy.deepcopy(result), result)
1333+
self.assertEqual(copy.deepcopy(result).geturl(), result.geturl())
1334+
self.assertEqual(copy.replace(result), result)
1335+
self.assertEqual(copy.replace(result).geturl(), result.geturl())
1336+
self.assertEqual(result._replace(), result)
1337+
self.assertEqual(result._replace().geturl(), result.geturl())
1338+
1339+
url = 'http://example.com/?#'
1340+
burl = url.encode()
1341+
for func in urldefrag, urlsplit, urlparse:
1342+
check(func(url))
1343+
check(func(url, allow_none=True))
1344+
check(func(burl))
1345+
check(func(burl, allow_none=True))
13111346

13121347
def test_parse_qs_encoding(self):
13131348
result = urllib.parse.parse_qs("key=\u0141%E9", encoding="latin-1")

Lib/urllib/parse.py

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -146,19 +146,29 @@ class _ResultMixinStr(object):
146146
__slots__ = ()
147147

148148
def encode(self, encoding='ascii', errors='strict'):
149-
return self._encoded_counterpart(*(x.encode(encoding, errors)
149+
result = self._encoded_counterpart(*(x.encode(encoding, errors)
150150
if x is not None else None
151151
for x in self))
152+
try:
153+
result._keep_empty = self._keep_empty
154+
except AttributeError:
155+
pass
156+
return result
152157

153158

154159
class _ResultMixinBytes(object):
155160
"""Standard approach to decoding parsed results from bytes to str"""
156161
__slots__ = ()
157162

158163
def decode(self, encoding='ascii', errors='strict'):
159-
return self._decoded_counterpart(*(x.decode(encoding, errors)
164+
result = self._decoded_counterpart(*(x.decode(encoding, errors)
160165
if x is not None else None
161166
for x in self))
167+
try:
168+
result._keep_empty = self._keep_empty
169+
except AttributeError:
170+
pass
171+
return result
162172

163173

164174
class _NetlocResultMixinBase(object):
@@ -270,20 +280,44 @@ def _hostinfo(self):
270280
_UNSPECIFIED = ['not specified']
271281
_ALLOW_NONE_DEFAULT = False
272282

273-
class _DefragResultBase(namedtuple('_DefragResultBase', 'url fragment')):
283+
class _ResultBase:
284+
def __replace__(self, /, **kwargs):
285+
result = super().__replace__(**kwargs)
286+
try:
287+
result._keep_empty = self._keep_empty
288+
except AttributeError:
289+
pass
290+
return result
291+
292+
def _replace(self, /, **kwargs):
293+
result = super()._replace(**kwargs)
294+
try:
295+
result._keep_empty = self._keep_empty
296+
except AttributeError:
297+
pass
298+
return result
299+
300+
def __copy__(self):
301+
return self
302+
303+
def __deepcopy__(self, memo):
304+
return self
305+
306+
307+
class _DefragResultBase(_ResultBase, namedtuple('_DefragResultBase', 'url fragment')):
274308
def geturl(self):
275309
if self.fragment or (self.fragment is not None and
276310
getattr(self, '_keep_empty', _ALLOW_NONE_DEFAULT)):
277311
return self.url + self._HASH + self.fragment
278312
else:
279313
return self.url
280314

281-
class _SplitResultBase(namedtuple(
315+
class _SplitResultBase(_ResultBase, namedtuple(
282316
'_SplitResultBase', 'scheme netloc path query fragment')):
283317
def geturl(self):
284318
return urlunsplit(self)
285319

286-
class _ParseResultBase(namedtuple(
320+
class _ParseResultBase(_ResultBase, namedtuple(
287321
'_ParseResultBase', 'scheme netloc path params query fragment')):
288322
def geturl(self):
289323
return urlunparse(self)

0 commit comments

Comments
 (0)