Skip to content

Commit cf23cd5

Browse files
authored
Merge branch 'master' into gh-actions
2 parents f62cf49 + 512b464 commit cf23cd5

File tree

8 files changed

+116
-47
lines changed

8 files changed

+116
-47
lines changed

src/hyperlink/_url.py

Lines changed: 63 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ def __nonzero__(self):
183183
_SCHEMELESS_PATH_DELIMS = _ALL_DELIMS - _SCHEMELESS_PATH_SAFE
184184
_FRAGMENT_SAFE = _UNRESERVED_CHARS | _PATH_SAFE | set(u"/?")
185185
_FRAGMENT_DELIMS = _ALL_DELIMS - _FRAGMENT_SAFE
186-
_QUERY_VALUE_SAFE = _UNRESERVED_CHARS | _FRAGMENT_SAFE - set(u"&+")
186+
_QUERY_VALUE_SAFE = _UNRESERVED_CHARS | _FRAGMENT_SAFE - set(u"&")
187187
_QUERY_VALUE_DELIMS = _ALL_DELIMS - _QUERY_VALUE_SAFE
188188
_QUERY_KEY_SAFE = _UNRESERVED_CHARS | _QUERY_VALUE_SAFE - set(u"=")
189189
_QUERY_KEY_DELIMS = _ALL_DELIMS - _QUERY_KEY_SAFE
@@ -467,9 +467,13 @@ def _encode_userinfo_part(text, maximal=True):
467467
)
468468
# As of Mar 11, 2017, there were 44 netloc schemes, and 13 non-netloc
469469

470+
NO_QUERY_PLUS_SCHEMES = set()
470471

471-
def register_scheme(text, uses_netloc=True, default_port=None):
472-
# type: (Text, bool, Optional[int]) -> None
472+
473+
def register_scheme(
474+
text, uses_netloc=True, default_port=None, query_plus_is_space=True
475+
):
476+
# type: (Text, bool, Optional[int], bool) -> None
473477
"""Registers new scheme information, resulting in correct port and
474478
slash behavior from the URL object. There are dozens of standard
475479
schemes preregistered, so this function is mostly meant for
@@ -485,6 +489,8 @@ def register_scheme(text, uses_netloc=True, default_port=None):
485489
not. Defaults to True.
486490
default_port: The default port, if any, for
487491
netloc-using schemes.
492+
query_plus_is_space: If true, a "+" in the query string should be
493+
decoded as a space by DecodedURL.
488494
489495
.. _file an issue: https://github.com/mahmoud/hyperlink/issues
490496
"""
@@ -510,6 +516,9 @@ def register_scheme(text, uses_netloc=True, default_port=None):
510516
else:
511517
raise ValueError("uses_netloc expected bool, not: %r" % uses_netloc)
512518

519+
if not query_plus_is_space:
520+
NO_QUERY_PLUS_SCHEMES.add(text)
521+
513522
return
514523

515524

@@ -922,9 +931,9 @@ class URL(object):
922931
https://example.com/hello/world
923932
924933
The constructor runs basic type checks. All strings are expected
925-
to be decoded (:class:`unicode` in Python 2). All arguments are
926-
optional, defaulting to appropriately empty values. A full list of
927-
constructor arguments is below.
934+
to be text (:class:`str` in Python 3, :class:`unicode` in Python 2). All
935+
arguments are optional, defaulting to appropriately empty values. A full
936+
list of constructor arguments is below.
928937
929938
Args:
930939
scheme: The text name of the scheme.
@@ -934,9 +943,9 @@ class URL(object):
934943
it is known. See the ``SCHEME_PORT_MAP`` and
935944
:func:`register_default_port` for more info.
936945
path: A tuple of strings representing the slash-separated parts of the
937-
path.
946+
path, each percent-encoded.
938947
query: The query parameters, as a dictionary or as an sequence of
939-
key-value pairs.
948+
percent-encoded key-value pairs.
940949
fragment: The fragment part of the URL.
941950
rooted: A rooted URL is one which indicates an absolute path.
942951
This is True on any URL that includes a host, or any relative URL
@@ -1969,6 +1978,16 @@ def remove(
19691978
_EMPTY_URL = URL()
19701979

19711980

1981+
def _replace_plus(text):
1982+
# type: (Text) -> Text
1983+
return text.replace("+", "%20")
1984+
1985+
1986+
def _no_op(text):
1987+
# type: (Text) -> Text
1988+
return text
1989+
1990+
19721991
class DecodedURL(object):
19731992
"""
19741993
:class:`DecodedURL` is a type designed to act as a higher-level
@@ -1998,6 +2017,9 @@ class DecodedURL(object):
19982017
lazy: Set to True to avoid pre-decode all parts of the URL to check for
19992018
validity.
20002019
Defaults to False.
2020+
query_plus_is_space: + characters in the query string should be treated
2021+
as spaces when decoding. If unspecified, the default is taken from
2022+
the scheme.
20012023
20022024
.. note::
20032025
@@ -2012,18 +2034,21 @@ class DecodedURL(object):
20122034
.. versionadded:: 18.0.0
20132035
"""
20142036

2015-
def __init__(self, url=_EMPTY_URL, lazy=False):
2016-
# type: (URL, bool) -> None
2037+
def __init__(self, url=_EMPTY_URL, lazy=False, query_plus_is_space=None):
2038+
# type: (URL, bool, Optional[bool]) -> None
20172039
self._url = url
2040+
if query_plus_is_space is None:
2041+
query_plus_is_space = url.scheme not in NO_QUERY_PLUS_SCHEMES
2042+
self._query_plus_is_space = query_plus_is_space
20182043
if not lazy:
20192044
# cache the following, while triggering any decoding
20202045
# issues with decodable fields
20212046
self.host, self.userinfo, self.path, self.query, self.fragment
20222047
return
20232048

20242049
@classmethod
2025-
def from_text(cls, text, lazy=False):
2026-
# type: (Text, bool) -> DecodedURL
2050+
def from_text(cls, text, lazy=False, query_plus_is_space=None):
2051+
# type: (Text, bool, Optional[bool]) -> DecodedURL
20272052
"""\
20282053
Make a `DecodedURL` instance from any text string containing a URL.
20292054
@@ -2034,7 +2059,7 @@ def from_text(cls, text, lazy=False):
20342059
Defaults to True.
20352060
"""
20362061
_url = URL.from_text(text)
2037-
return cls(_url, lazy=lazy)
2062+
return cls(_url, lazy=lazy, query_plus_is_space=query_plus_is_space)
20382063

20392064
@property
20402065
def encoded_url(self):
@@ -2059,22 +2084,34 @@ def to_iri(self):
20592084
"Passthrough to :meth:`~hyperlink.URL.to_iri()`"
20602085
return self._url.to_iri()
20612086

2087+
def _clone(self, url):
2088+
# type: (URL) -> DecodedURL
2089+
return self.__class__(
2090+
url,
2091+
# TODO: propagate laziness?
2092+
query_plus_is_space=self._query_plus_is_space,
2093+
)
2094+
20622095
def click(self, href=u""):
20632096
# type: (Union[Text, URL, DecodedURL]) -> DecodedURL
20642097
"""Return a new DecodedURL wrapping the result of
20652098
:meth:`~hyperlink.URL.click()`
20662099
"""
20672100
if isinstance(href, DecodedURL):
20682101
href = href._url
2069-
return self.__class__(self._url.click(href=href))
2102+
return self._clone(
2103+
self._url.click(href=href),
2104+
)
20702105

20712106
def sibling(self, segment):
20722107
# type: (Text) -> DecodedURL
20732108
"""Automatically encode any reserved characters in *segment* and
20742109
return a new `DecodedURL` wrapping the result of
20752110
:meth:`~hyperlink.URL.sibling()`
20762111
"""
2077-
return self.__class__(self._url.sibling(_encode_reserved(segment)))
2112+
return self._clone(
2113+
self._url.sibling(_encode_reserved(segment)),
2114+
)
20782115

20792116
def child(self, *segments):
20802117
# type: (Text) -> DecodedURL
@@ -2085,7 +2122,7 @@ def child(self, *segments):
20852122
if not segments:
20862123
return self
20872124
new_segs = [_encode_reserved(s) for s in segments]
2088-
return self.__class__(self._url.child(*new_segs))
2125+
return self._clone(self._url.child(*new_segs))
20892126

20902127
def normalize(
20912128
self,
@@ -2101,7 +2138,7 @@ def normalize(
21012138
"""Return a new `DecodedURL` wrapping the result of
21022139
:meth:`~hyperlink.URL.normalize()`
21032140
"""
2104-
return self.__class__(
2141+
return self._clone(
21052142
self._url.normalize(
21062143
scheme, host, path, query, fragment, userinfo, percents
21072144
)
@@ -2148,11 +2185,18 @@ def path(self):
21482185
def query(self):
21492186
# type: () -> QueryPairs
21502187
if not hasattr(self, "_query"):
2188+
if self._query_plus_is_space:
2189+
predecode = _replace_plus
2190+
else:
2191+
predecode = _no_op
2192+
21512193
self._query = cast(
21522194
QueryPairs,
21532195
tuple(
21542196
tuple(
2155-
_percent_decode(x, raise_subencoding_exc=True)
2197+
_percent_decode(
2198+
predecode(x), raise_subencoding_exc=True
2199+
)
21562200
if x is not None
21572201
else None
21582202
for x in (k, v)
@@ -2248,7 +2292,7 @@ def replace(
22482292
userinfo=userinfo_text,
22492293
uses_netloc=uses_netloc,
22502294
)
2251-
return self.__class__(url=new_url)
2295+
return self._clone(url=new_url)
22522296

22532297
def get(self, name):
22542298
# type: (Text) -> List[Optional[Text]]

src/hyperlink/hypothesis.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,8 @@ def idna_characters():
7878
)
7979
with open_gzip(dataFileName) as dataFile:
8080
reader = csv_reader(
81-
(line.decode("utf-8") for line in dataFile), delimiter=",",
81+
(line.decode("utf-8") for line in dataFile),
82+
delimiter=",",
8283
)
8384
next(reader) # Skip header row
8485
for row in reader:

src/hyperlink/test/common.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,26 +16,26 @@ def assertRaises( # type: ignore[override]
1616
):
1717
# type: (...) -> Any
1818
"""Fail unless an exception of class expected_exception is raised
19-
by callableObj when invoked with arguments args and keyword
20-
arguments kwargs. If a different type of exception is
21-
raised, it will not be caught, and the test case will be
22-
deemed to have suffered an error, exactly as for an
23-
unexpected exception.
19+
by callableObj when invoked with arguments args and keyword
20+
arguments kwargs. If a different type of exception is
21+
raised, it will not be caught, and the test case will be
22+
deemed to have suffered an error, exactly as for an
23+
unexpected exception.
2424
25-
If called with callableObj omitted or None, will return a
26-
context object used like this::
25+
If called with callableObj omitted or None, will return a
26+
context object used like this::
2727
28-
with self.assertRaises(SomeException):
29-
do_something()
28+
with self.assertRaises(SomeException):
29+
do_something()
3030
31-
The context manager keeps a reference to the exception as
32-
the 'exception' attribute. This allows you to inspect the
33-
exception after the assertion::
31+
The context manager keeps a reference to the exception as
32+
the 'exception' attribute. This allows you to inspect the
33+
exception after the assertion::
3434
35-
with self.assertRaises(SomeException) as cm:
36-
do_something()
37-
the_exception = cm.exception
38-
self.assertEqual(the_exception.error_code, 3)
35+
with self.assertRaises(SomeException) as cm:
36+
do_something()
37+
the_exception = cm.exception
38+
self.assertEqual(the_exception.error_code, 3)
3939
"""
4040
context = _AssertRaisesContext(expected_exception, self)
4141
if callableObj is None:

src/hyperlink/test/test_common.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,11 @@
77

88

99
class _ExpectedException(Exception):
10-
"""An exception used to test HyperlinkTestCase.assertRaises.
11-
12-
"""
10+
"""An exception used to test HyperlinkTestCase.assertRaises."""
1311

1412

1513
class _UnexpectedException(Exception):
16-
"""An exception used to test HyperlinkTestCase.assertRaises.
17-
18-
"""
14+
"""An exception used to test HyperlinkTestCase.assertRaises."""
1915

2016

2117
class TestHyperlink(TestCase):

src/hyperlink/test/test_decoded_url.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,3 +210,19 @@ def test_click_decoded_url(self):
210210
assert clicked.host == durl.host
211211
assert clicked.path == durl_dest.path
212212
assert clicked.path == ("tëst",)
213+
214+
def test_decode_plus(self):
215+
# type: () -> None
216+
durl = DecodedURL.from_text("/x+y%2B?a=b+c%2B")
217+
assert durl.path == ("x+y+",)
218+
assert durl.get("a") == ["b c+"]
219+
assert durl.query == (("a", "b c+"),)
220+
221+
def test_decode_nonplussed(self):
222+
# type: () -> None
223+
durl = DecodedURL.from_text(
224+
"/x+y%2B?a=b+c%2B", query_plus_is_space=False
225+
)
226+
assert durl.path == ("x+y+",)
227+
assert durl.get("a") == ["b+c+"]
228+
assert durl.query == (("a", "b+c+"),)

src/hyperlink/test/test_scheme_registration.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
from .. import _url
77
from .common import HyperlinkTestCase
8-
from .._url import register_scheme, URL
8+
from .._url import register_scheme, URL, DecodedURL
99

1010

1111
class TestSchemeRegistration(HyperlinkTestCase):
@@ -70,3 +70,13 @@ def test_register_invalid_port(self):
7070
# type: () -> None
7171
with self.assertRaises(ValueError):
7272
register_scheme("nope", default_port=cast(bool, object()))
73+
74+
def test_register_no_quote_plus_scheme(self):
75+
# type: () -> None
76+
register_scheme("keepplus", query_plus_is_space=False)
77+
plus_is_not_space = DecodedURL.from_text(
78+
"keepplus://example.com/?q=a+b"
79+
)
80+
plus_is_space = DecodedURL.from_text("https://example.com/?q=a+b")
81+
assert plus_is_not_space.get("q") == ["a+b"]
82+
assert plus_is_space.get("q") == ["a b"]

src/hyperlink/test/test_url.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,8 @@
133133
"https://example.com/?a=%23", # hash in query param value
134134
"https://example.com/?a=%26", # ampersand in query param value
135135
"https://example.com/?a=%3D", # equals in query param value
136+
"https://example.com/?foo+bar=baz", # plus in query param name
137+
"https://example.com/?foo=bar+baz", # plus in query param value
136138
# double-encoded percent sign in all percent-encodable positions:
137139
"http://(%2525):(%2525)@example.com/(%2525)/?(%2525)=(%2525)#(%2525)",
138140
# colon in first part of schemeless relative url

tox.ini

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -318,8 +318,8 @@ description = build documentation
318318
basepython = {[default]basepython}
319319

320320
deps =
321-
Sphinx==3.3.1
322-
sphinx-rtd-theme==0.5.0
321+
Sphinx==3.4.2
322+
sphinx-rtd-theme==0.5.1
323323

324324
commands =
325325
sphinx-build \
@@ -359,9 +359,9 @@ basepython = {[default]basepython}
359359
skip_install = True
360360

361361
deps =
362-
check-manifest==0.45
362+
check-manifest==0.46
363363
readme-renderer==28.0
364-
twine==3.2.0
364+
twine==3.3.0
365365

366366
commands =
367367
check-manifest

0 commit comments

Comments
 (0)