2929
3030import re
3131import sys
32+ import types
3233import collections
3334import warnings
3435
@@ -179,6 +180,8 @@ def port(self):
179180 raise ValueError ("Port out of range 0-65535" )
180181 return port
181182
183+ __class_getitem__ = classmethod (types .GenericAlias )
184+
182185
183186class _NetlocResultMixinStr (_NetlocResultMixinBase , _ResultMixinStr ):
184187 __slots__ = ()
@@ -369,9 +372,23 @@ def _fix_result_transcoding():
369372def urlparse (url , scheme = '' , allow_fragments = True ):
370373 """Parse a URL into 6 components:
371374 <scheme>://<netloc>/<path>;<params>?<query>#<fragment>
372- Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
373- Note that we don't break the components up in smaller bits
374- (e.g. netloc is a single string) and we don't expand % escapes."""
375+
376+ The result is a named 6-tuple with fields corresponding to the
377+ above. It is either a ParseResult or ParseResultBytes object,
378+ depending on the type of the url parameter.
379+
380+ The username, password, hostname, and port sub-components of netloc
381+ can also be accessed as attributes of the returned object.
382+
383+ The scheme argument provides the default value of the scheme
384+ component when no scheme is found in url.
385+
386+ If allow_fragments is False, no attempt is made to separate the
387+ fragment component from the previous component, which can be either
388+ path or query.
389+
390+ Note that % escapes are not expanded.
391+ """
375392 url , scheme , _coerce_result = _coerce_args (url , scheme )
376393 splitresult = urlsplit (url , scheme , allow_fragments )
377394 scheme , netloc , url , query , fragment = splitresult
@@ -417,20 +434,33 @@ def _checknetloc(netloc):
417434 raise ValueError ("netloc '" + netloc + "' contains invalid " +
418435 "characters under NFKC normalization" )
419436
420- def _remove_unsafe_bytes_from_url (url ):
421- for b in _UNSAFE_URL_BYTES_TO_REMOVE :
422- url = url .replace (b , "" )
423- return url
424-
425437def urlsplit (url , scheme = '' , allow_fragments = True ):
426438 """Parse a URL into 5 components:
427439 <scheme>://<netloc>/<path>?<query>#<fragment>
428- Return a 5-tuple: (scheme, netloc, path, query, fragment).
429- Note that we don't break the components up in smaller bits
430- (e.g. netloc is a single string) and we don't expand % escapes."""
440+
441+ The result is a named 5-tuple with fields corresponding to the
442+ above. It is either a SplitResult or SplitResultBytes object,
443+ depending on the type of the url parameter.
444+
445+ The username, password, hostname, and port sub-components of netloc
446+ can also be accessed as attributes of the returned object.
447+
448+ The scheme argument provides the default value of the scheme
449+ component when no scheme is found in url.
450+
451+ If allow_fragments is False, no attempt is made to separate the
452+ fragment component from the previous component, which can be either
453+ path or query.
454+
455+ Note that % escapes are not expanded.
456+ """
457+
431458 url , scheme , _coerce_result = _coerce_args (url , scheme )
432- url = _remove_unsafe_bytes_from_url (url )
433- scheme = _remove_unsafe_bytes_from_url (scheme )
459+
460+ for b in _UNSAFE_URL_BYTES_TO_REMOVE :
461+ url = url .replace (b , "" )
462+ scheme = scheme .replace (b , "" )
463+
434464 allow_fragments = bool (allow_fragments )
435465 key = url , scheme , allow_fragments , type (url ), type (scheme )
436466 cached = _parse_cache .get (key , None )
@@ -441,31 +471,11 @@ def urlsplit(url, scheme='', allow_fragments=True):
441471 netloc = query = fragment = ''
442472 i = url .find (':' )
443473 if i > 0 :
444- if url [:i ] == 'http' : # optimize the common case
445- url = url [i + 1 :]
446- if url [:2 ] == '//' :
447- netloc , url = _splitnetloc (url , 2 )
448- if (('[' in netloc and ']' not in netloc ) or
449- (']' in netloc and '[' not in netloc )):
450- raise ValueError ("Invalid IPv6 URL" )
451- if allow_fragments and '#' in url :
452- url , fragment = url .split ('#' , 1 )
453- if '?' in url :
454- url , query = url .split ('?' , 1 )
455- _checknetloc (netloc )
456- v = SplitResult ('http' , netloc , url , query , fragment )
457- _parse_cache [key ] = v
458- return _coerce_result (v )
459474 for c in url [:i ]:
460475 if c not in scheme_chars :
461476 break
462477 else :
463- # make sure "url" is not actually a port number (in which case
464- # "scheme" is really part of the path)
465- rest = url [i + 1 :]
466- if not rest or any (c not in '0123456789' for c in rest ):
467- # not a port number
468- scheme , url = url [:i ].lower (), rest
478+ scheme , url = url [:i ].lower (), url [i + 1 :]
469479
470480 if url [:2 ] == '//' :
471481 netloc , url = _splitnetloc (url , 2 )
@@ -642,7 +652,7 @@ def unquote(string, encoding='utf-8', errors='replace'):
642652 unquote('abc%20def') -> 'abc def'.
643653 """
644654 if isinstance (string , bytes ):
645- raise TypeError ( 'Expected str, got bytes' )
655+ return unquote_to_bytes ( string ). decode ( encoding , errors )
646656 if '%' not in string :
647657 string .split
648658 return string
@@ -744,9 +754,8 @@ def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
744754 if max_num_fields < num_fields :
745755 raise ValueError ('Max number of fields exceeded' )
746756
747- pairs = [s1 for s1 in qs .split (separator )]
748757 r = []
749- for name_value in pairs :
758+ for name_value in qs . split ( separator ) :
750759 if not name_value and not strict_parsing :
751760 continue
752761 nv = name_value .split ('=' , 1 )
0 commit comments