at 22.05-pre 17 kB view raw
1From e7b005c05dbdbce967a409abd71641281a8604bf Mon Sep 17 00:00:00 2001 2From: Senthil Kumaran <senthil@uthcode.com> 3Date: Mon, 15 Feb 2021 11:16:43 -0800 4Subject: [PATCH 24/26] [3.6] bpo-42967: only use '&' as a query string 5 separator (GH-24297) (GH-24532) 6MIME-Version: 1.0 7Content-Type: text/plain; charset=UTF-8 8Content-Transfer-Encoding: 8bit 9 10bpo-42967: [security] Address a web cache-poisoning issue reported in 11urllib.parse.parse_qsl(). 12 13urllib.parse will only us "&" as query string separator by default 14instead of both ";" and "&" as allowed in earlier versions. An optional 15argument seperator with default value "&" is added to specify the 16separator. 17 18Co-authored-by: Éric Araujo <merwok@netwok.org> 19Co-authored-by: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com> 20Co-authored-by: Adam Goldschmidt <adamgold7@gmail.com> 21 22Rebased for Python 2.7 by Michał Górny 23--- 24 Doc/library/cgi.rst | 7 +++- 25 Doc/library/urlparse.rst | 23 ++++++++++- 26 Lib/cgi.py | 20 +++++++--- 27 Lib/test/test_cgi.py | 29 +++++++++++--- 28 Lib/test/test_urlparse.py | 38 +++++++++---------- 29 Lib/urlparse.py | 22 ++++++++--- 30 .../2021-02-14-15-59-16.bpo-42967.YApqDS.rst | 1 + 31 7 files changed, 100 insertions(+), 40 deletions(-) 32 create mode 100644 Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst 33 34diff --git a/Doc/library/cgi.rst b/Doc/library/cgi.rst 35index ecd62c8c01..b85cdd8b61 100644 36--- a/Doc/library/cgi.rst 37+++ b/Doc/library/cgi.rst 38@@ -285,10 +285,10 @@ These are useful if you want more control, or if you want to employ some of the 39 algorithms implemented in this module in other circumstances. 40 41 42-.. function:: parse(fp[, environ[, keep_blank_values[, strict_parsing]]]) 43+.. function:: parse(fp[, environ[, keep_blank_values[, strict_parsing]]], separator="&") 44 45 Parse a query in the environment or from a file (the file defaults to 46- ``sys.stdin`` and environment defaults to ``os.environ``). The *keep_blank_values* and *strict_parsing* parameters are 47+ ``sys.stdin`` and environment defaults to ``os.environ``). The *keep_blank_values*, *strict_parsing* and *separator* parameters are 48 passed to :func:`urlparse.parse_qs` unchanged. 49 50 51@@ -316,6 +316,9 @@ algorithms implemented in this module in other circumstances. 52 Note that this does not parse nested multipart parts --- use 53 :class:`FieldStorage` for that. 54 55+ .. versionchanged:: 3.6.13 56+ Added the *separator* parameter. 57+ 58 59 .. function:: parse_header(string) 60 61diff --git a/Doc/library/urlparse.rst b/Doc/library/urlparse.rst 62index 0989c88c30..2f8e4c5a44 100644 63--- a/Doc/library/urlparse.rst 64+++ b/Doc/library/urlparse.rst 65@@ -136,7 +136,7 @@ The :mod:`urlparse` module defines the following functions: 66 now raise :exc:`ValueError`. 67 68 69-.. function:: parse_qs(qs[, keep_blank_values[, strict_parsing[, max_num_fields]]]) 70+.. function:: parse_qs(qs[, keep_blank_values[, strict_parsing[, max_num_fields]]], separator='&') 71 72 Parse a query string given as a string argument (data of type 73 :mimetype:`application/x-www-form-urlencoded`). Data are returned as a 74@@ -157,6 +157,9 @@ The :mod:`urlparse` module defines the following functions: 75 read. If set, then throws a :exc:`ValueError` if there are more than 76 *max_num_fields* fields read. 77 78+ The optional argument *separator* is the symbol to use for separating the 79+ query arguments. It defaults to ``&``. 80+ 81 Use the :func:`urllib.urlencode` function to convert such dictionaries into 82 query strings. 83 84@@ -166,7 +169,14 @@ The :mod:`urlparse` module defines the following functions: 85 .. versionchanged:: 2.7.16 86 Added *max_num_fields* parameter. 87 88-.. function:: parse_qsl(qs[, keep_blank_values[, strict_parsing[, max_num_fields]]]) 89+ .. versionchanged:: 2.7.18-gentoo 90+ Added *separator* parameter with the default value of ``&``. Earlier 91+ Python versions allowed using both ``;`` and ``&`` as query parameter 92+ separator. This has been changed to allow only a single separator key, 93+ with ``&`` as the default separator. 94+ 95+ 96+.. function:: parse_qsl(qs[, keep_blank_values[, strict_parsing[, max_num_fields]]], separator='&') 97 98 Parse a query string given as a string argument (data of type 99 :mimetype:`application/x-www-form-urlencoded`). Data are returned as a list of 100@@ -186,6 +196,9 @@ The :mod:`urlparse` module defines the following functions: 101 read. If set, then throws a :exc:`ValueError` if there are more than 102 *max_num_fields* fields read. 103 104+ The optional argument *separator* is the symbol to use for separating the 105+ query arguments. It defaults to ``&``. 106+ 107 Use the :func:`urllib.urlencode` function to convert such lists of pairs into 108 query strings. 109 110@@ -195,6 +208,12 @@ The :mod:`urlparse` module defines the following functions: 111 .. versionchanged:: 2.7.16 112 Added *max_num_fields* parameter. 113 114+ .. versionchanged:: 2.7.18-gentoo 115+ Added *separator* parameter with the default value of ``&``. Earlier 116+ Python versions allowed using both ``;`` and ``&`` as query parameter 117+ separator. This has been changed to allow only a single separator key, 118+ with ``&`` as the default separator. 119+ 120 .. function:: urlunparse(parts) 121 122 Construct a URL from a tuple as returned by ``urlparse()``. The *parts* argument 123diff --git a/Lib/cgi.py b/Lib/cgi.py 124index 5b903e0347..9d0848b6b1 100755 125--- a/Lib/cgi.py 126+++ b/Lib/cgi.py 127@@ -121,7 +121,8 @@ log = initlog # The current logging function 128 # 0 ==> unlimited input 129 maxlen = 0 130 131-def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0): 132+def parse(fp=None, environ=os.environ, keep_blank_values=0, 133+ strict_parsing=0, separator='&'): 134 """Parse a query in the environment or from a file (default stdin) 135 136 Arguments, all optional: 137@@ -140,6 +141,9 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0): 138 strict_parsing: flag indicating what to do with parsing errors. 139 If false (the default), errors are silently ignored. 140 If true, errors raise a ValueError exception. 141+ 142+ separator: str. The symbol to use for separating the query arguments. 143+ Defaults to &. 144 """ 145 if fp is None: 146 fp = sys.stdin 147@@ -171,7 +175,8 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0): 148 else: 149 qs = "" 150 environ['QUERY_STRING'] = qs # XXX Shouldn't, really 151- return urlparse.parse_qs(qs, keep_blank_values, strict_parsing) 152+ return urlparse.parse_qs(qs, keep_blank_values, strict_parsing, 153+ separator=separator) 154 155 156 # parse query string function called from urlparse, 157@@ -395,7 +400,7 @@ class FieldStorage: 158 159 def __init__(self, fp=None, headers=None, outerboundary="", 160 environ=os.environ, keep_blank_values=0, strict_parsing=0, 161- max_num_fields=None): 162+ max_num_fields=None, separator='&'): 163 """Constructor. Read multipart/* until last part. 164 165 Arguments, all optional: 166@@ -430,6 +435,7 @@ class FieldStorage: 167 self.keep_blank_values = keep_blank_values 168 self.strict_parsing = strict_parsing 169 self.max_num_fields = max_num_fields 170+ self.separator = separator 171 if 'REQUEST_METHOD' in environ: 172 method = environ['REQUEST_METHOD'].upper() 173 self.qs_on_post = None 174@@ -613,7 +619,8 @@ class FieldStorage: 175 if self.qs_on_post: 176 qs += '&' + self.qs_on_post 177 query = urlparse.parse_qsl(qs, self.keep_blank_values, 178- self.strict_parsing, self.max_num_fields) 179+ self.strict_parsing, self.max_num_fields, 180+ separator=self.separator) 181 self.list = [MiniFieldStorage(key, value) for key, value in query] 182 self.skip_lines() 183 184@@ -629,7 +636,8 @@ class FieldStorage: 185 query = urlparse.parse_qsl(self.qs_on_post, 186 self.keep_blank_values, 187 self.strict_parsing, 188- self.max_num_fields) 189+ self.max_num_fields, 190+ separator=self.separator) 191 self.list.extend(MiniFieldStorage(key, value) 192 for key, value in query) 193 FieldStorageClass = None 194@@ -649,7 +657,7 @@ class FieldStorage: 195 headers = rfc822.Message(self.fp) 196 part = klass(self.fp, headers, ib, 197 environ, keep_blank_values, strict_parsing, 198- max_num_fields) 199+ max_num_fields, separator=self.separator) 200 201 if max_num_fields is not None: 202 max_num_fields -= 1 203diff --git a/Lib/test/test_cgi.py b/Lib/test/test_cgi.py 204index 743c2afbd4..f414faa23b 100644 205--- a/Lib/test/test_cgi.py 206+++ b/Lib/test/test_cgi.py 207@@ -61,12 +61,9 @@ parse_strict_test_cases = [ 208 ("", ValueError("bad query field: ''")), 209 ("&", ValueError("bad query field: ''")), 210 ("&&", ValueError("bad query field: ''")), 211- (";", ValueError("bad query field: ''")), 212- (";&;", ValueError("bad query field: ''")), 213 # Should the next few really be valid? 214 ("=", {}), 215 ("=&=", {}), 216- ("=;=", {}), 217 # This rest seem to make sense 218 ("=a", {'': ['a']}), 219 ("&=a", ValueError("bad query field: ''")), 220@@ -81,8 +78,6 @@ parse_strict_test_cases = [ 221 ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}), 222 ("a=a+b&a=b+a", {'a': ['a b', 'b a']}), 223 ("x=1&y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}), 224- ("x=1;y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}), 225- ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}), 226 ("Hbc5161168c542333633315dee1182227:key_store_seqid=400006&cuyer=r&view=bustomer&order_id=0bb2e248638833d48cb7fed300000f1b&expire=964546263&lobale=en-US&kid=130003.300038&ss=env", 227 {'Hbc5161168c542333633315dee1182227:key_store_seqid': ['400006'], 228 'cuyer': ['r'], 229@@ -188,6 +183,30 @@ class CgiTests(unittest.TestCase): 230 self.assertEqual(expect[k], v) 231 self.assertItemsEqual(expect.values(), d.values()) 232 233+ def test_separator(self): 234+ parse_semicolon = [ 235+ ("x=1;y=2.0", {'x': ['1'], 'y': ['2.0']}), 236+ ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}), 237+ (";", ValueError("bad query field: ''")), 238+ (";;", ValueError("bad query field: ''")), 239+ ("=;a", ValueError("bad query field: 'a'")), 240+ (";b=a", ValueError("bad query field: ''")), 241+ ("b;=a", ValueError("bad query field: 'b'")), 242+ ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}), 243+ ("a=a+b;a=b+a", {'a': ['a b', 'b a']}), 244+ ] 245+ for orig, expect in parse_semicolon: 246+ env = {'QUERY_STRING': orig} 247+ fs = cgi.FieldStorage(separator=';', environ=env) 248+ if isinstance(expect, dict): 249+ for key in expect.keys(): 250+ expect_val = expect[key] 251+ self.assertIn(key, fs) 252+ if len(expect_val) > 1: 253+ self.assertEqual(fs.getvalue(key), expect_val) 254+ else: 255+ self.assertEqual(fs.getvalue(key), expect_val[0]) 256+ 257 def test_log(self): 258 cgi.log("Testing") 259 260diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py 261index 86c4a0595c..0b2107339a 100644 262--- a/Lib/test/test_urlparse.py 263+++ b/Lib/test/test_urlparse.py 264@@ -24,16 +24,20 @@ parse_qsl_test_cases = [ 265 ("&a=b", [('a', 'b')]), 266 ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]), 267 ("a=1&a=2", [('a', '1'), ('a', '2')]), 268- (";", []), 269- (";;", []), 270- (";a=b", [('a', 'b')]), 271- ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]), 272- ("a=1;a=2", [('a', '1'), ('a', '2')]), 273- (b";", []), 274- (b";;", []), 275- (b";a=b", [(b'a', b'b')]), 276- (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]), 277- (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]), 278+ (b"", []), 279+ (b"&", []), 280+ (b"&&", []), 281+ (b"=", [(b'', b'')]), 282+ (b"=a", [(b'', b'a')]), 283+ (b"a", [(b'a', b'')]), 284+ (b"a=", [(b'a', b'')]), 285+ (b"&a=b", [(b'a', b'b')]), 286+ (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]), 287+ (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]), 288+ (";a=b", [(';a', 'b')]), 289+ ("a=a+b;b=b+c", [('a', 'a b;b=b c')]), 290+ (b";a=b", [(b';a', b'b')]), 291+ (b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]), 292 ] 293 294 parse_qs_test_cases = [ 295@@ -57,16 +61,10 @@ parse_qs_test_cases = [ 296 (b"&a=b", {b'a': [b'b']}), 297 (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}), 298 (b"a=1&a=2", {b'a': [b'1', b'2']}), 299- (";", {}), 300- (";;", {}), 301- (";a=b", {'a': ['b']}), 302- ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}), 303- ("a=1;a=2", {'a': ['1', '2']}), 304- (b";", {}), 305- (b";;", {}), 306- (b";a=b", {b'a': [b'b']}), 307- (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}), 308- (b"a=1;a=2", {b'a': [b'1', b'2']}), 309+ (";a=b", {';a': ['b']}), 310+ ("a=a+b;b=b+c", {'a': ['a b;b=b c']}), 311+ (b";a=b", {b';a': [b'b']}), 312+ (b"a=a+b;b=b+c", {b'a':[ b'a b;b=b c']}), 313 ] 314 315 class UrlParseTestCase(unittest.TestCase): 316diff --git a/Lib/urlparse.py b/Lib/urlparse.py 317index 798b467b60..6c32727fce 100644 318--- a/Lib/urlparse.py 319+++ b/Lib/urlparse.py 320@@ -382,7 +382,8 @@ def unquote(s): 321 append(item) 322 return ''.join(res) 323 324-def parse_qs(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None): 325+def parse_qs(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None, 326+ separator='&'): 327 """Parse a query given as a string argument. 328 329 Arguments: 330@@ -402,17 +403,22 @@ def parse_qs(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None): 331 332 max_num_fields: int. If set, then throws a ValueError if there 333 are more than n fields read by parse_qsl(). 334+ 335+ separator: str. The symbol to use for separating the query arguments. 336+ Defaults to &. 337+ 338 """ 339 dict = {} 340 for name, value in parse_qsl(qs, keep_blank_values, strict_parsing, 341- max_num_fields): 342+ max_num_fields, separator=separator): 343 if name in dict: 344 dict[name].append(value) 345 else: 346 dict[name] = [value] 347 return dict 348 349-def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None): 350+def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None, 351+ separator='&'): 352 """Parse a query given as a string argument. 353 354 Arguments: 355@@ -432,17 +438,23 @@ def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None): 356 max_num_fields: int. If set, then throws a ValueError if there 357 are more than n fields read by parse_qsl(). 358 359+ separator: str. The symbol to use for separating the query arguments. 360+ Defaults to &. 361+ 362 Returns a list, as G-d intended. 363 """ 364+ if not separator or (not isinstance(separator, (str, bytes))): 365+ raise ValueError("Separator must be of type string or bytes.") 366+ 367 # If max_num_fields is defined then check that the number of fields 368 # is less than max_num_fields. This prevents a memory exhaustion DOS 369 # attack via post bodies with many fields. 370 if max_num_fields is not None: 371- num_fields = 1 + qs.count('&') + qs.count(';') 372+ num_fields = 1 + qs.count(separator) 373 if max_num_fields < num_fields: 374 raise ValueError('Max number of fields exceeded') 375 376- pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')] 377+ pairs = [s1 for s1 in qs.split(separator)] 378 r = [] 379 for name_value in pairs: 380 if not name_value and not strict_parsing: 381diff --git a/Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst b/Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst 382new file mode 100644 383index 0000000000..f08489b414 384--- /dev/null 385+++ b/Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst 386@@ -0,0 +1 @@ 387+Fix web cache poisoning vulnerability by defaulting the query args separator to ``&``, and allowing the user to choose a custom separator. 388-- 3892.31.1 390