@@ -183,7 +183,7 @@ def __nonzero__(self):
183183_SCHEMELESS_PATH_DELIMS = _ALL_DELIMS - _SCHEMELESS_PATH_SAFE
184184_FRAGMENT_SAFE = _UNRESERVED_CHARS | _PATH_SAFE | set (u"/?" )
185185_FRAGMENT_DELIMS = _ALL_DELIMS - _FRAGMENT_SAFE
186- _QUERY_VALUE_SAFE = _UNRESERVED_CHARS | _FRAGMENT_SAFE - set (u"&+ " )
186+ _QUERY_VALUE_SAFE = _UNRESERVED_CHARS | _FRAGMENT_SAFE - set (u"&" )
187187_QUERY_VALUE_DELIMS = _ALL_DELIMS - _QUERY_VALUE_SAFE
188188_QUERY_KEY_SAFE = _UNRESERVED_CHARS | _QUERY_VALUE_SAFE - set (u"=" )
189189_QUERY_KEY_DELIMS = _ALL_DELIMS - _QUERY_KEY_SAFE
@@ -467,9 +467,13 @@ def _encode_userinfo_part(text, maximal=True):
467467)
468468# As of Mar 11, 2017, there were 44 netloc schemes, and 13 non-netloc
469469
470+ NO_QUERY_PLUS_SCHEMES = set ()
470471
471- def register_scheme (text , uses_netloc = True , default_port = None ):
472- # type: (Text, bool, Optional[int]) -> None
472+
473+ def register_scheme (
474+ text , uses_netloc = True , default_port = None , query_plus_is_space = True
475+ ):
476+ # type: (Text, bool, Optional[int], bool) -> None
473477 """Registers new scheme information, resulting in correct port and
474478 slash behavior from the URL object. There are dozens of standard
475479 schemes preregistered, so this function is mostly meant for
@@ -485,6 +489,8 @@ def register_scheme(text, uses_netloc=True, default_port=None):
485489 not. Defaults to True.
486490 default_port: The default port, if any, for
487491 netloc-using schemes.
492+ query_plus_is_space: If true, a "+" in the query string should be
493+ decoded as a space by DecodedURL.
488494
489495 .. _file an issue: https://github.com/mahmoud/hyperlink/issues
490496 """
@@ -510,6 +516,9 @@ def register_scheme(text, uses_netloc=True, default_port=None):
510516 else :
511517 raise ValueError ("uses_netloc expected bool, not: %r" % uses_netloc )
512518
519+ if not query_plus_is_space :
520+ NO_QUERY_PLUS_SCHEMES .add (text )
521+
513522 return
514523
515524
@@ -922,9 +931,9 @@ class URL(object):
922931 https://example.com/hello/world
923932
924933 The constructor runs basic type checks. All strings are expected
925- to be decoded (:class:`unicode` in Python 2). All arguments are
926- optional, defaulting to appropriately empty values. A full list of
927- constructor arguments is below.
934+ to be text (:class:`str` in Python 3, :class:` unicode` in Python 2). All
935+ arguments are optional, defaulting to appropriately empty values. A full
936+ list of constructor arguments is below.
928937
929938 Args:
930939 scheme: The text name of the scheme.
@@ -934,9 +943,9 @@ class URL(object):
934943 it is known. See the ``SCHEME_PORT_MAP`` and
935944 :func:`register_default_port` for more info.
936945 path: A tuple of strings representing the slash-separated parts of the
937- path.
946+ path, each percent-encoded .
938947 query: The query parameters, as a dictionary or as an sequence of
939- key-value pairs.
948+ percent-encoded key-value pairs.
940949 fragment: The fragment part of the URL.
941950 rooted: A rooted URL is one which indicates an absolute path.
942951 This is True on any URL that includes a host, or any relative URL
@@ -1969,6 +1978,16 @@ def remove(
19691978_EMPTY_URL = URL ()
19701979
19711980
1981+ def _replace_plus (text ):
1982+ # type: (Text) -> Text
1983+ return text .replace ("+" , "%20" )
1984+
1985+
1986+ def _no_op (text ):
1987+ # type: (Text) -> Text
1988+ return text
1989+
1990+
19721991class DecodedURL (object ):
19731992 """
19741993 :class:`DecodedURL` is a type designed to act as a higher-level
@@ -1998,6 +2017,9 @@ class DecodedURL(object):
19982017 lazy: Set to True to avoid pre-decode all parts of the URL to check for
19992018 validity.
20002019 Defaults to False.
2020+ query_plus_is_space: + characters in the query string should be treated
2021+ as spaces when decoding. If unspecified, the default is taken from
2022+ the scheme.
20012023
20022024 .. note::
20032025
@@ -2012,18 +2034,21 @@ class DecodedURL(object):
20122034 .. versionadded:: 18.0.0
20132035 """
20142036
2015- def __init__ (self , url = _EMPTY_URL , lazy = False ):
2016- # type: (URL, bool) -> None
2037+ def __init__ (self , url = _EMPTY_URL , lazy = False , query_plus_is_space = None ):
2038+ # type: (URL, bool, Optional[bool] ) -> None
20172039 self ._url = url
2040+ if query_plus_is_space is None :
2041+ query_plus_is_space = url .scheme not in NO_QUERY_PLUS_SCHEMES
2042+ self ._query_plus_is_space = query_plus_is_space
20182043 if not lazy :
20192044 # cache the following, while triggering any decoding
20202045 # issues with decodable fields
20212046 self .host , self .userinfo , self .path , self .query , self .fragment
20222047 return
20232048
20242049 @classmethod
2025- def from_text (cls , text , lazy = False ):
2026- # type: (Text, bool) -> DecodedURL
2050+ def from_text (cls , text , lazy = False , query_plus_is_space = None ):
2051+ # type: (Text, bool, Optional[bool] ) -> DecodedURL
20272052 """\
20282053 Make a `DecodedURL` instance from any text string containing a URL.
20292054
@@ -2034,7 +2059,7 @@ def from_text(cls, text, lazy=False):
20342059 Defaults to True.
20352060 """
20362061 _url = URL .from_text (text )
2037- return cls (_url , lazy = lazy )
2062+ return cls (_url , lazy = lazy , query_plus_is_space = query_plus_is_space )
20382063
20392064 @property
20402065 def encoded_url (self ):
@@ -2059,22 +2084,34 @@ def to_iri(self):
20592084 "Passthrough to :meth:`~hyperlink.URL.to_iri()`"
20602085 return self ._url .to_iri ()
20612086
2087+ def _clone (self , url ):
2088+ # type: (URL) -> DecodedURL
2089+ return self .__class__ (
2090+ url ,
2091+ # TODO: propagate laziness?
2092+ query_plus_is_space = self ._query_plus_is_space ,
2093+ )
2094+
20622095 def click (self , href = u"" ):
20632096 # type: (Union[Text, URL, DecodedURL]) -> DecodedURL
20642097 """Return a new DecodedURL wrapping the result of
20652098 :meth:`~hyperlink.URL.click()`
20662099 """
20672100 if isinstance (href , DecodedURL ):
20682101 href = href ._url
2069- return self .__class__ (self ._url .click (href = href ))
2102+ return self ._clone (
2103+ self ._url .click (href = href ),
2104+ )
20702105
20712106 def sibling (self , segment ):
20722107 # type: (Text) -> DecodedURL
20732108 """Automatically encode any reserved characters in *segment* and
20742109 return a new `DecodedURL` wrapping the result of
20752110 :meth:`~hyperlink.URL.sibling()`
20762111 """
2077- return self .__class__ (self ._url .sibling (_encode_reserved (segment )))
2112+ return self ._clone (
2113+ self ._url .sibling (_encode_reserved (segment )),
2114+ )
20782115
20792116 def child (self , * segments ):
20802117 # type: (Text) -> DecodedURL
@@ -2085,7 +2122,7 @@ def child(self, *segments):
20852122 if not segments :
20862123 return self
20872124 new_segs = [_encode_reserved (s ) for s in segments ]
2088- return self .__class__ (self ._url .child (* new_segs ))
2125+ return self ._clone (self ._url .child (* new_segs ))
20892126
20902127 def normalize (
20912128 self ,
@@ -2101,7 +2138,7 @@ def normalize(
21012138 """Return a new `DecodedURL` wrapping the result of
21022139 :meth:`~hyperlink.URL.normalize()`
21032140 """
2104- return self .__class__ (
2141+ return self ._clone (
21052142 self ._url .normalize (
21062143 scheme , host , path , query , fragment , userinfo , percents
21072144 )
@@ -2148,11 +2185,18 @@ def path(self):
21482185 def query (self ):
21492186 # type: () -> QueryPairs
21502187 if not hasattr (self , "_query" ):
2188+ if self ._query_plus_is_space :
2189+ predecode = _replace_plus
2190+ else :
2191+ predecode = _no_op
2192+
21512193 self ._query = cast (
21522194 QueryPairs ,
21532195 tuple (
21542196 tuple (
2155- _percent_decode (x , raise_subencoding_exc = True )
2197+ _percent_decode (
2198+ predecode (x ), raise_subencoding_exc = True
2199+ )
21562200 if x is not None
21572201 else None
21582202 for x in (k , v )
@@ -2248,7 +2292,7 @@ def replace(
22482292 userinfo = userinfo_text ,
22492293 uses_netloc = uses_netloc ,
22502294 )
2251- return self .__class__ (url = new_url )
2295+ return self ._clone (url = new_url )
22522296
22532297 def get (self , name ):
22542298 # type: (Text) -> List[Optional[Text]]
0 commit comments