Don't normalise or double-escape urls

moben · moben · commit 7ec84f386a4d · 2025-03-29T00:50:29.000+01:00
`requote_uri` was added in 2013 in psf#1361 to resolve psf#1360. But in 2019 this was also solved in `urllib3` in urllib3/urllib3#1647. Not only does the `urllib3` implementation more closely match what all other clients are doing. It also means that we are double-encoding invalid uris. If the redirect is invalid we're in unchartered territory but the path that `requests` uses has no chance to decode to the same that any other client uses, even to the most lenient server. Simply drop `requote_uri`, as `urllib3`'s `parse_url` will always handle invalid urls.
diff --git a/src/requests/models.py b/src/requests/models.py
@@ -60,7 +60,6 @@
     guess_json_utf,
     iter_slices,
     parse_header_links,
-    requote_uri,
     stream_decode_response_unicode,
     super_len,
     to_key_val_list,
@@ -477,7 +476,7 @@ def prepare_url(self, url, params):
             else:
                 query = enc_params
 
-        url = requote_uri(urlunparse([scheme, netloc, path, None, query, fragment]))
+        url = urlunparse([scheme, netloc, path, None, query, fragment])
         self.url = url
 
     def prepare_headers(self, headers):
diff --git a/src/requests/sessions.py b/src/requests/sessions.py
@@ -44,7 +44,6 @@
     get_auth_from_url,
     get_environ_proxies,
     get_netrc_auth,
-    requote_uri,
     resolve_proxies,
     rewind_body,
     should_bypass_proxies,
@@ -208,14 +207,6 @@ def resolve_redirects(
                 previous_fragment = parsed.fragment
             url = parsed.geturl()
 
-            # Facilitate relative 'location' headers, as allowed by RFC 7231.
-            # (e.g. '/path/to/resource' instead of 'http://domain.tld/path/to/resource')
-            # Compliant with RFC3986, we percent encode the url.
-            if not parsed.netloc:
-                url = urljoin(resp.url, requote_uri(url))
-            else:
-                url = requote_uri(url)
-
             prepared_request.url = to_native_string(url)
 
             self.rebuild_method(prepared_request, resp)
diff --git a/src/requests/utils.py b/src/requests/utils.py
@@ -630,58 +630,6 @@ def get_unicode_from_response(r):
         return r.content
 
 
-# The unreserved URI characters (RFC 3986)
-UNRESERVED_SET = frozenset(
-    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789-._~"
-)
-
-
-def unquote_unreserved(uri):
-    """Un-escape any percent-escape sequences in a URI that are unreserved
-    characters. This leaves all reserved, illegal and non-ASCII bytes encoded.
-
-    :rtype: str
-    """
-    parts = uri.split("%")
-    for i in range(1, len(parts)):
-        h = parts[i][0:2]
-        if len(h) == 2 and h.isalnum():
-            try:
-                c = chr(int(h, 16))
-            except ValueError:
-                raise InvalidURL(f"Invalid percent-escape sequence: '{h}'")
-
-            if c in UNRESERVED_SET:
-                parts[i] = c + parts[i][2:]
-            else:
-                parts[i] = f"%{parts[i]}"
-        else:
-            parts[i] = f"%{parts[i]}"
-    return "".join(parts)
-
-
-def requote_uri(uri):
-    """Re-quote the given URI.
-
-    This function passes the given URI through an unquote/quote cycle to
-    ensure that it is fully and consistently quoted.
-
-    :rtype: str
-    """
-    safe_with_percent = "!#$%&'()*+,/:;=?@[]~"
-    safe_without_percent = "!#$&'()*+,/:;=?@[]~"
-    try:
-        # Unquote only the unreserved characters
-        # Then quote only illegal characters (do not quote reserved,
-        # unreserved, or '%')
-        return quote(unquote_unreserved(uri), safe=safe_with_percent)
-    except InvalidURL:
-        # We couldn't unquote the given URI, so let's try quoting it, but
-        # there may be unquoted '%'s in the URI. We need to make sure they're
-        # properly quoted so they do not cause issues elsewhere.
-        return quote(uri, safe=safe_without_percent)
-
-
 def address_in_network(ip, net):
     """This function allows you to check if an IP belongs to a network subnet
 
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -31,15 +31,13 @@
     parse_dict_header,
     parse_header_links,
     prepend_scheme_if_needed,
-    requote_uri,
     select_proxy,
     set_environ,
     should_bypass_proxies,
     super_len,
     to_key_val_list,
     to_native_string,
     unquote_header_value,
-    unquote_unreserved,
     urldefragauth,
 )
 
@@ -441,45 +439,6 @@ def test_get_auth_from_url(url, auth):
     assert get_auth_from_url(url) == auth
 
 
-@pytest.mark.parametrize(
-    "uri, expected",
-    (
-        (
-            # Ensure requoting doesn't break expectations
-            "http://example.com/fiz?buz=%25ppicture",
-            "http://example.com/fiz?buz=%25ppicture",
-        ),
-        (
-            # Ensure we handle unquoted percent signs in redirects
-            "http://example.com/fiz?buz=%ppicture",
-            "http://example.com/fiz?buz=%25ppicture",
-        ),
-    ),
-)
-def test_requote_uri_with_unquoted_percents(uri, expected):
-    """See: https://github.com/psf/requests/issues/2356"""
-    assert requote_uri(uri) == expected
-
-
-@pytest.mark.parametrize(
-    "uri, expected",
-    (
-        (
-            # Illegal bytes
-            "http://example.com/?a=%--",
-            "http://example.com/?a=%--",
-        ),
-        (
-            # Reserved characters
-            "http://example.com/?a=%300",
-            "http://example.com/?a=00",
-        ),
-    ),
-)
-def test_unquote_unreserved(uri, expected):
-    assert unquote_unreserved(uri) == expected
-
-
 @pytest.mark.parametrize(
     "mask, expected",
     (