Skip to content

Commit 7ec84f3

Browse files
committed
Don't normalise or double-escape urls
`requote_uri` was added in 2013 in psf#1361 to resolve psf#1360. But in 2019 this was also solved in `urllib3` in urllib3/urllib3#1647. Not only does the `urllib3` implementation more closely match what all other clients are doing. It also means that we are double-encoding invalid uris. If the redirect is invalid we're in unchartered territory but the path that `requests` uses has no chance to decode to the same that any other client uses, even to the most lenient server. Simply drop `requote_uri`, as `urllib3`'s `parse_url` will always handle invalid urls.
1 parent 1764cc9 commit 7ec84f3

File tree

4 files changed

+1
-104
lines changed

4 files changed

+1
-104
lines changed

src/requests/models.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,6 @@
6060
guess_json_utf,
6161
iter_slices,
6262
parse_header_links,
63-
requote_uri,
6463
stream_decode_response_unicode,
6564
super_len,
6665
to_key_val_list,
@@ -477,7 +476,7 @@ def prepare_url(self, url, params):
477476
else:
478477
query = enc_params
479478

480-
url = requote_uri(urlunparse([scheme, netloc, path, None, query, fragment]))
479+
url = urlunparse([scheme, netloc, path, None, query, fragment])
481480
self.url = url
482481

483482
def prepare_headers(self, headers):

src/requests/sessions.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@
4444
get_auth_from_url,
4545
get_environ_proxies,
4646
get_netrc_auth,
47-
requote_uri,
4847
resolve_proxies,
4948
rewind_body,
5049
should_bypass_proxies,
@@ -208,14 +207,6 @@ def resolve_redirects(
208207
previous_fragment = parsed.fragment
209208
url = parsed.geturl()
210209

211-
# Facilitate relative 'location' headers, as allowed by RFC 7231.
212-
# (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource')
213-
# Compliant with RFC3986, we percent encode the url.
214-
if not parsed.netloc:
215-
url = urljoin(resp.url, requote_uri(url))
216-
else:
217-
url = requote_uri(url)
218-
219210
prepared_request.url = to_native_string(url)
220211

221212
self.rebuild_method(prepared_request, resp)

src/requests/utils.py

Lines changed: 0 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -630,58 +630,6 @@ def get_unicode_from_response(r):
630630
return r.content
631631

632632

633-
# The unreserved URI characters (RFC 3986)
634-
UNRESERVED_SET = frozenset(
635-
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789-._~"
636-
)
637-
638-
639-
def unquote_unreserved(uri):
640-
"""Un-escape any percent-escape sequences in a URI that are unreserved
641-
characters. This leaves all reserved, illegal and non-ASCII bytes encoded.
642-
643-
:rtype: str
644-
"""
645-
parts = uri.split("%")
646-
for i in range(1, len(parts)):
647-
h = parts[i][0:2]
648-
if len(h) == 2 and h.isalnum():
649-
try:
650-
c = chr(int(h, 16))
651-
except ValueError:
652-
raise InvalidURL(f"Invalid percent-escape sequence: '{h}'")
653-
654-
if c in UNRESERVED_SET:
655-
parts[i] = c + parts[i][2:]
656-
else:
657-
parts[i] = f"%{parts[i]}"
658-
else:
659-
parts[i] = f"%{parts[i]}"
660-
return "".join(parts)
661-
662-
663-
def requote_uri(uri):
664-
"""Re-quote the given URI.
665-
666-
This function passes the given URI through an unquote/quote cycle to
667-
ensure that it is fully and consistently quoted.
668-
669-
:rtype: str
670-
"""
671-
safe_with_percent = "!#$%&'()*+,/:;=?@[]~"
672-
safe_without_percent = "!#$&'()*+,/:;=?@[]~"
673-
try:
674-
# Unquote only the unreserved characters
675-
# Then quote only illegal characters (do not quote reserved,
676-
# unreserved, or '%')
677-
return quote(unquote_unreserved(uri), safe=safe_with_percent)
678-
except InvalidURL:
679-
# We couldn't unquote the given URI, so let's try quoting it, but
680-
# there may be unquoted '%'s in the URI. We need to make sure they're
681-
# properly quoted so they do not cause issues elsewhere.
682-
return quote(uri, safe=safe_without_percent)
683-
684-
685633
def address_in_network(ip, net):
686634
"""This function allows you to check if an IP belongs to a network subnet
687635

tests/test_utils.py

Lines changed: 0 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,13 @@
3131
parse_dict_header,
3232
parse_header_links,
3333
prepend_scheme_if_needed,
34-
requote_uri,
3534
select_proxy,
3635
set_environ,
3736
should_bypass_proxies,
3837
super_len,
3938
to_key_val_list,
4039
to_native_string,
4140
unquote_header_value,
42-
unquote_unreserved,
4341
urldefragauth,
4442
)
4543

@@ -441,45 +439,6 @@ def test_get_auth_from_url(url, auth):
441439
assert get_auth_from_url(url) == auth
442440

443441

444-
@pytest.mark.parametrize(
445-
"uri, expected",
446-
(
447-
(
448-
# Ensure requoting doesn't break expectations
449-
"http://example.com/fiz?buz=%25ppicture",
450-
"http://example.com/fiz?buz=%25ppicture",
451-
),
452-
(
453-
# Ensure we handle unquoted percent signs in redirects
454-
"http://example.com/fiz?buz=%ppicture",
455-
"http://example.com/fiz?buz=%25ppicture",
456-
),
457-
),
458-
)
459-
def test_requote_uri_with_unquoted_percents(uri, expected):
460-
"""See: https://github.com/psf/requests/issues/2356"""
461-
assert requote_uri(uri) == expected
462-
463-
464-
@pytest.mark.parametrize(
465-
"uri, expected",
466-
(
467-
(
468-
# Illegal bytes
469-
"http://example.com/?a=%--",
470-
"http://example.com/?a=%--",
471-
),
472-
(
473-
# Reserved characters
474-
"http://example.com/?a=%300",
475-
"http://example.com/?a=00",
476-
),
477-
),
478-
)
479-
def test_unquote_unreserved(uri, expected):
480-
assert unquote_unreserved(uri) == expected
481-
482-
483442
@pytest.mark.parametrize(
484443
"mask, expected",
485444
(

0 commit comments

Comments
 (0)