Skip to content

Commit 378a07b

Browse files
kenballusmcepl
authored andcommitted
[CVE-2023-24329] blocklist bypass via the urllib.parse component
Blocklist bypass via the urllib.parse component when supplying a URL starting with non-alphabetic characters. Code is from gh#python/cpython!99421, it was released in 3.11.1. Fixes: bsc#1208471 Fixes: gh#99418 Patch: CVE-2023-24329-blank-URL-bypass.patch
1 parent 3c864b7 commit 378a07b

3 files changed

Lines changed: 26 additions & 1 deletion

File tree

Lib/test/test_urlparse.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -676,6 +676,24 @@ def test_attributes_bad_port(self):
676676
with self.assertRaises(ValueError):
677677
p.port
678678

679+
def test_attributes_bad_scheme(self):
680+
"""Check handling of invalid schemes."""
681+
for bytes in (False, True):
682+
for parse in (urllib.parse.urlsplit, urllib.parse.urlparse):
683+
for scheme in (".", "+", "-", "0", "http&", "६http"):
684+
with self.subTest(bytes=bytes, parse=parse, scheme=scheme):
685+
url = scheme + "://www.example.net"
686+
if bytes:
687+
if urllib.parse.isascii(url):
688+
url = url.encode("ascii")
689+
else:
690+
continue
691+
p = parse(url)
692+
if bytes:
693+
self.assertEqual(p.scheme, b"")
694+
else:
695+
self.assertEqual(p.scheme, "")
696+
679697
def test_attributes_without_netloc(self):
680698
# This example is straight from RFC 3261. It looks like it
681699
# should allow the username, hostname, and port to be filled

Lib/urllib/parse.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
"urlsplit", "urlunsplit", "urlencode", "parse_qs",
3636
"parse_qsl", "quote", "quote_plus", "quote_from_bytes",
3737
"unquote", "unquote_plus", "unquote_to_bytes",
38+
"isascii",
3839
"DefragResult", "ParseResult", "SplitResult",
3940
"DefragResultBytes", "ParseResultBytes", "SplitResultBytes"]
4041

@@ -79,6 +80,10 @@
7980
# Unsafe bytes to be removed per WHATWG spec
8081
_UNSAFE_URL_BYTES_TO_REMOVE = ['\t', '\r', '\n']
8182

83+
# Python >= 3.7 shim
84+
def isascii(word):
85+
return all([ord(c) < 128 for c in word])
86+
8287
# XXX: Consider replacing with functools.lru_cache
8388
MAX_CACHE_SIZE = 20
8489
_parse_cache = {}
@@ -435,7 +440,7 @@ def urlsplit(url, scheme='', allow_fragments=True):
435440
clear_cache()
436441
netloc = query = fragment = ''
437442
i = url.find(':')
438-
if i > 0:
443+
if i > 0 and isascii(url[0]) and url[0].isalpha():
439444
if url[:i] == 'http': # optimize the common case
440445
scheme = url[:i].lower()
441446
url = url[i+1:]
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix bug in :func:`urllib.parse.urlparse` that causes URL schemes that begin
2+
with a digit, a plus sign, or a minus sign to be parsed incorrectly.

0 commit comments

Comments
 (0)