at 25.11-pre 12 kB view raw
1From 6204ab9f989be3841c8c47e1e2cfe6a658fe16d5 Mon Sep 17 00:00:00 2001 2From: Seth Michael Larson <seth@python.org> 3Date: Tue, 28 Jan 2025 14:09:00 -0600 4Subject: [PATCH 1/4] gh-105704: Disallow square brackets ( and ) in domain 5 names for parsed URLs 6 7--- 8 Lib/test/test_urlparse.py | 14 +++++++++++++ 9 Lib/urllib/parse.py | 20 +++++++++++++++++-- 10 ...-01-28-14-08-03.gh-issue-105704.EnhHxu.rst | 4 ++++ 11 3 files changed, 36 insertions(+), 2 deletions(-) 12 create mode 100644 Misc/NEWS.d/next/Security/2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst 13 14diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py 15index 4516bdea6adb19..0f15a0998ff2ea 100644 16--- a/Lib/test/test_urlparse.py 17+++ b/Lib/test/test_urlparse.py 18@@ -1412,6 +1412,20 @@ def test_invalid_bracketed_hosts(self): 19 self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af::2309::fae7:1234]/Path?Query') 20 self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@[0439:23af:2309::fae7:1234:2342:438e:192.0.2.146]/Path?Query') 21 self.assertRaises(ValueError, urllib.parse.urlsplit, 'Scheme://user@]v6a.ip[/Path') 22+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[v6a.ip]') 23+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[v6a.ip].suffix') 24+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[v6a.ip]/') 25+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[v6a.ip].suffix/') 26+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[v6a.ip]?') 27+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[v6a.ip].suffix?') 28+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]') 29+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix') 30+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]/') 31+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix/') 32+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]?') 33+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix?') 34+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://user@prefix.[v6a.ip]') 35+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://user@[v6a.ip].suffix') 36 37 def test_splitting_bracketed_hosts(self): 38 p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]/path?query') 39diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py 40index c412c729852272..9d51f4c6812b57 100644 41--- a/Lib/urllib/parse.py 42+++ b/Lib/urllib/parse.py 43@@ -439,6 +439,23 @@ def _checknetloc(netloc): 44 raise ValueError("netloc '" + netloc + "' contains invalid " + 45 "characters under NFKC normalization") 46 47+def _check_bracketed_netloc(netloc): 48+ # Note that this function must mirror the splitting 49+ # done in NetlocResultMixins._hostinfo(). 50+ hostname_and_port = netloc.rpartition('@')[2] 51+ before_bracket, have_open_br, bracketed = hostname_and_port.partition('[') 52+ if have_open_br: 53+ # No data is allowed before a bracket. 54+ if before_bracket: 55+ raise ValueError("Invalid IPv6 URL") 56+ hostname, _, port = bracketed.partition(']') 57+ # No data is allowed after the bracket but before the port delimiter. 58+ if port and not port.startswith(":"): 59+ raise ValueError("Invalid IPv6 URL") 60+ else: 61+ hostname, _, port = hostname_and_port.partition(':') 62+ _check_bracketed_host(hostname) 63+ 64 # Valid bracketed hosts are defined in 65 # https://www.rfc-editor.org/rfc/rfc3986#page-49 and https://url.spec.whatwg.org/ 66 def _check_bracketed_host(hostname): 67@@ -505,8 +522,7 @@ def _urlsplit(url, scheme=None, allow_fragments=True): 68 (']' in netloc and '[' not in netloc)): 69 raise ValueError("Invalid IPv6 URL") 70 if '[' in netloc and ']' in netloc: 71- bracketed_host = netloc.partition('[')[2].partition(']')[0] 72- _check_bracketed_host(bracketed_host) 73+ _check_bracketed_netloc(netloc) 74 if allow_fragments and '#' in url: 75 url, fragment = url.split('#', 1) 76 if '?' in url: 77diff --git a/Misc/NEWS.d/next/Security/2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst b/Misc/NEWS.d/next/Security/2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst 78new file mode 100644 79index 00000000000000..aaeac71678de87 80--- /dev/null 81+++ b/Misc/NEWS.d/next/Security/2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst 82@@ -0,0 +1,4 @@ 83+When using ``urllib.parse.urlsplit()`` and ``urlparse()`` host parsing would 84+not reject domain names containing square brackets (``[`` and ``]``). Square 85+brackets are only valid for IPv6 and IPvFuture hosts according to `RFC 3986 86+Section 3.2.2 <https://www.rfc-editor.org/rfc/rfc3986#section-3.2.2>`__. 87 88From 3ab35e8d890e2c5d4e6b0c0299f94775a3ded9ae Mon Sep 17 00:00:00 2001 89From: Seth Michael Larson <sethmichaellarson@gmail.com> 90Date: Thu, 30 Jan 2025 09:50:14 -0600 91Subject: [PATCH 2/4] Use Sphinx references 92 93Co-authored-by: Peter Bierma <zintensitydev@gmail.com> 94--- 95 .../Security/2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst | 2 +- 96 1 file changed, 1 insertion(+), 1 deletion(-) 97 98diff --git a/Misc/NEWS.d/next/Security/2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst b/Misc/NEWS.d/next/Security/2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst 99index aaeac71678de87..fb8674f558db59 100644 100--- a/Misc/NEWS.d/next/Security/2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst 101+++ b/Misc/NEWS.d/next/Security/2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst 102@@ -1,4 +1,4 @@ 103-When using ``urllib.parse.urlsplit()`` and ``urlparse()`` host parsing would 104+When using :func:`urllib.parse.urlsplit()` and :func:`urllib.parse.urlparse()` host parsing would 105 not reject domain names containing square brackets (``[`` and ``]``). Square 106 brackets are only valid for IPv6 and IPvFuture hosts according to `RFC 3986 107 Section 3.2.2 <https://www.rfc-editor.org/rfc/rfc3986#section-3.2.2>`__. 108 109From ebf92bb4d323d41778e5de6df177b26f18ecf7f9 Mon Sep 17 00:00:00 2001 110From: Seth Michael Larson <seth@python.org> 111Date: Thu, 30 Jan 2025 11:10:35 -0600 112Subject: [PATCH 3/4] Add mismatched bracket test cases, fix news format 113 114--- 115 Lib/test/test_urlparse.py | 10 ++++++++++ 116 .../2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst | 8 ++++---- 117 2 files changed, 14 insertions(+), 4 deletions(-) 118 119diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py 120index 0f15a0998ff2ea..f8ce61b2b49621 100644 121--- a/Lib/test/test_urlparse.py 122+++ b/Lib/test/test_urlparse.py 123@@ -1426,6 +1426,16 @@ def test_invalid_bracketed_hosts(self): 124 self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix?') 125 self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://user@prefix.[v6a.ip]') 126 self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://user@[v6a.ip].suffix') 127+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[v6a.ip') 128+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://v6a.ip]') 129+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://]v6a.ip[') 130+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://]v6a.ip') 131+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://v6a.ip[') 132+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[v6a.ip') 133+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://v6a.ip].suffix') 134+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix]v6a.ip[suffix') 135+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix]v6a.ip') 136+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://v6a.ip[suffix') 137 138 def test_splitting_bracketed_hosts(self): 139 p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]/path?query') 140diff --git a/Misc/NEWS.d/next/Security/2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst b/Misc/NEWS.d/next/Security/2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst 141index fb8674f558db59..bff1bc6b0d609c 100644 142--- a/Misc/NEWS.d/next/Security/2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst 143+++ b/Misc/NEWS.d/next/Security/2025-01-28-14-08-03.gh-issue-105704.EnhHxu.rst 144@@ -1,4 +1,4 @@ 145-When using :func:`urllib.parse.urlsplit()` and :func:`urllib.parse.urlparse()` host parsing would 146-not reject domain names containing square brackets (``[`` and ``]``). Square 147-brackets are only valid for IPv6 and IPvFuture hosts according to `RFC 3986 148-Section 3.2.2 <https://www.rfc-editor.org/rfc/rfc3986#section-3.2.2>`__. 149+When using :func:`urllib.parse.urlsplit` and :func:`urllib.parse.urlparse` host 150+parsing would not reject domain names containing square brackets (``[`` and 151+``]``). Square brackets are only valid for IPv6 and IPvFuture hosts according to 152+`RFC 3986 Section 3.2.2 <https://www.rfc-editor.org/rfc/rfc3986#section-3.2.2>`__. 153 154From 2817b2e29c8b28a24f9eb97abce1e1b60b1162fa Mon Sep 17 00:00:00 2001 155From: Seth Michael Larson <seth@python.org> 156Date: Thu, 30 Jan 2025 13:01:19 -0600 157Subject: [PATCH 4/4] Add more test coverage for ports 158 159--- 160 Lib/test/test_urlparse.py | 13 ++++++++++++- 161 1 file changed, 12 insertions(+), 1 deletion(-) 162 163diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py 164index f8ce61b2b49621..b51cc006b73280 100644 165--- a/Lib/test/test_urlparse.py 166+++ b/Lib/test/test_urlparse.py 167@@ -1424,6 +1424,15 @@ def test_invalid_bracketed_hosts(self): 168 self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix/') 169 self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]?') 170 self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix?') 171+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]:a') 172+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix:a') 173+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]:a1') 174+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix:a1') 175+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]:1a') 176+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix:1a') 177+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]:') 178+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[::1].suffix:/') 179+ self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://prefix.[::1]:?') 180 self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://user@prefix.[v6a.ip]') 181 self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://user@[v6a.ip].suffix') 182 self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://[v6a.ip') 183@@ -1438,14 +1447,16 @@ def test_invalid_bracketed_hosts(self): 184 self.assertRaises(ValueError, urllib.parse.urlsplit, 'scheme://v6a.ip[suffix') 185 186 def test_splitting_bracketed_hosts(self): 187- p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]/path?query') 188+ p1 = urllib.parse.urlsplit('scheme://user@[v6a.ip]:1234/path?query') 189 self.assertEqual(p1.hostname, 'v6a.ip') 190 self.assertEqual(p1.username, 'user') 191 self.assertEqual(p1.path, '/path') 192+ self.assertEqual(p1.port, 1234) 193 p2 = urllib.parse.urlsplit('scheme://user@[0439:23af:2309::fae7%test]/path?query') 194 self.assertEqual(p2.hostname, '0439:23af:2309::fae7%test') 195 self.assertEqual(p2.username, 'user') 196 self.assertEqual(p2.path, '/path') 197+ self.assertIs(p2.port, None) 198 p3 = urllib.parse.urlsplit('scheme://user@[0439:23af:2309::fae7:1234:192.0.2.146%test]/path?query') 199 self.assertEqual(p3.hostname, '0439:23af:2309::fae7:1234:192.0.2.146%test') 200 self.assertEqual(p3.username, 'user')