1From e7b005c05dbdbce967a409abd71641281a8604bf Mon Sep 17 00:00:00 2001
2From: Senthil Kumaran <senthil@uthcode.com>
3Date: Mon, 15 Feb 2021 11:16:43 -0800
4Subject: [PATCH 24/26] [3.6] bpo-42967: only use '&' as a query string
5 separator (GH-24297) (GH-24532)
6MIME-Version: 1.0
7Content-Type: text/plain; charset=UTF-8
8Content-Transfer-Encoding: 8bit
9
10bpo-42967: [security] Address a web cache-poisoning issue reported in
11urllib.parse.parse_qsl().
12
13urllib.parse will only us "&" as query string separator by default
14instead of both ";" and "&" as allowed in earlier versions. An optional
15argument seperator with default value "&" is added to specify the
16separator.
17
18Co-authored-by: Éric Araujo <merwok@netwok.org>
19Co-authored-by: Ken Jin <28750310+Fidget-Spinner@users.noreply.github.com>
20Co-authored-by: Adam Goldschmidt <adamgold7@gmail.com>
21
22Rebased for Python 2.7 by Michał Górny
23---
24 Doc/library/cgi.rst | 7 +++-
25 Doc/library/urlparse.rst | 23 ++++++++++-
26 Lib/cgi.py | 20 +++++++---
27 Lib/test/test_cgi.py | 29 +++++++++++---
28 Lib/test/test_urlparse.py | 38 +++++++++----------
29 Lib/urlparse.py | 22 ++++++++---
30 .../2021-02-14-15-59-16.bpo-42967.YApqDS.rst | 1 +
31 7 files changed, 100 insertions(+), 40 deletions(-)
32 create mode 100644 Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst
33
34diff --git a/Doc/library/cgi.rst b/Doc/library/cgi.rst
35index ecd62c8c01..b85cdd8b61 100644
36--- a/Doc/library/cgi.rst
37+++ b/Doc/library/cgi.rst
38@@ -285,10 +285,10 @@ These are useful if you want more control, or if you want to employ some of the
39 algorithms implemented in this module in other circumstances.
40
41
42-.. function:: parse(fp[, environ[, keep_blank_values[, strict_parsing]]])
43+.. function:: parse(fp[, environ[, keep_blank_values[, strict_parsing]]], separator="&")
44
45 Parse a query in the environment or from a file (the file defaults to
46- ``sys.stdin`` and environment defaults to ``os.environ``). The *keep_blank_values* and *strict_parsing* parameters are
47+ ``sys.stdin`` and environment defaults to ``os.environ``). The *keep_blank_values*, *strict_parsing* and *separator* parameters are
48 passed to :func:`urlparse.parse_qs` unchanged.
49
50
51@@ -316,6 +316,9 @@ algorithms implemented in this module in other circumstances.
52 Note that this does not parse nested multipart parts --- use
53 :class:`FieldStorage` for that.
54
55+ .. versionchanged:: 3.6.13
56+ Added the *separator* parameter.
57+
58
59 .. function:: parse_header(string)
60
61diff --git a/Doc/library/urlparse.rst b/Doc/library/urlparse.rst
62index 0989c88c30..2f8e4c5a44 100644
63--- a/Doc/library/urlparse.rst
64+++ b/Doc/library/urlparse.rst
65@@ -136,7 +136,7 @@ The :mod:`urlparse` module defines the following functions:
66 now raise :exc:`ValueError`.
67
68
69-.. function:: parse_qs(qs[, keep_blank_values[, strict_parsing[, max_num_fields]]])
70+.. function:: parse_qs(qs[, keep_blank_values[, strict_parsing[, max_num_fields]]], separator='&')
71
72 Parse a query string given as a string argument (data of type
73 :mimetype:`application/x-www-form-urlencoded`). Data are returned as a
74@@ -157,6 +157,9 @@ The :mod:`urlparse` module defines the following functions:
75 read. If set, then throws a :exc:`ValueError` if there are more than
76 *max_num_fields* fields read.
77
78+ The optional argument *separator* is the symbol to use for separating the
79+ query arguments. It defaults to ``&``.
80+
81 Use the :func:`urllib.urlencode` function to convert such dictionaries into
82 query strings.
83
84@@ -166,7 +169,14 @@ The :mod:`urlparse` module defines the following functions:
85 .. versionchanged:: 2.7.16
86 Added *max_num_fields* parameter.
87
88-.. function:: parse_qsl(qs[, keep_blank_values[, strict_parsing[, max_num_fields]]])
89+ .. versionchanged:: 2.7.18-gentoo
90+ Added *separator* parameter with the default value of ``&``. Earlier
91+ Python versions allowed using both ``;`` and ``&`` as query parameter
92+ separator. This has been changed to allow only a single separator key,
93+ with ``&`` as the default separator.
94+
95+
96+.. function:: parse_qsl(qs[, keep_blank_values[, strict_parsing[, max_num_fields]]], separator='&')
97
98 Parse a query string given as a string argument (data of type
99 :mimetype:`application/x-www-form-urlencoded`). Data are returned as a list of
100@@ -186,6 +196,9 @@ The :mod:`urlparse` module defines the following functions:
101 read. If set, then throws a :exc:`ValueError` if there are more than
102 *max_num_fields* fields read.
103
104+ The optional argument *separator* is the symbol to use for separating the
105+ query arguments. It defaults to ``&``.
106+
107 Use the :func:`urllib.urlencode` function to convert such lists of pairs into
108 query strings.
109
110@@ -195,6 +208,12 @@ The :mod:`urlparse` module defines the following functions:
111 .. versionchanged:: 2.7.16
112 Added *max_num_fields* parameter.
113
114+ .. versionchanged:: 2.7.18-gentoo
115+ Added *separator* parameter with the default value of ``&``. Earlier
116+ Python versions allowed using both ``;`` and ``&`` as query parameter
117+ separator. This has been changed to allow only a single separator key,
118+ with ``&`` as the default separator.
119+
120 .. function:: urlunparse(parts)
121
122 Construct a URL from a tuple as returned by ``urlparse()``. The *parts* argument
123diff --git a/Lib/cgi.py b/Lib/cgi.py
124index 5b903e0347..9d0848b6b1 100755
125--- a/Lib/cgi.py
126+++ b/Lib/cgi.py
127@@ -121,7 +121,8 @@ log = initlog # The current logging function
128 # 0 ==> unlimited input
129 maxlen = 0
130
131-def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
132+def parse(fp=None, environ=os.environ, keep_blank_values=0,
133+ strict_parsing=0, separator='&'):
134 """Parse a query in the environment or from a file (default stdin)
135
136 Arguments, all optional:
137@@ -140,6 +141,9 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
138 strict_parsing: flag indicating what to do with parsing errors.
139 If false (the default), errors are silently ignored.
140 If true, errors raise a ValueError exception.
141+
142+ separator: str. The symbol to use for separating the query arguments.
143+ Defaults to &.
144 """
145 if fp is None:
146 fp = sys.stdin
147@@ -171,7 +175,8 @@ def parse(fp=None, environ=os.environ, keep_blank_values=0, strict_parsing=0):
148 else:
149 qs = ""
150 environ['QUERY_STRING'] = qs # XXX Shouldn't, really
151- return urlparse.parse_qs(qs, keep_blank_values, strict_parsing)
152+ return urlparse.parse_qs(qs, keep_blank_values, strict_parsing,
153+ separator=separator)
154
155
156 # parse query string function called from urlparse,
157@@ -395,7 +400,7 @@ class FieldStorage:
158
159 def __init__(self, fp=None, headers=None, outerboundary="",
160 environ=os.environ, keep_blank_values=0, strict_parsing=0,
161- max_num_fields=None):
162+ max_num_fields=None, separator='&'):
163 """Constructor. Read multipart/* until last part.
164
165 Arguments, all optional:
166@@ -430,6 +435,7 @@ class FieldStorage:
167 self.keep_blank_values = keep_blank_values
168 self.strict_parsing = strict_parsing
169 self.max_num_fields = max_num_fields
170+ self.separator = separator
171 if 'REQUEST_METHOD' in environ:
172 method = environ['REQUEST_METHOD'].upper()
173 self.qs_on_post = None
174@@ -613,7 +619,8 @@ class FieldStorage:
175 if self.qs_on_post:
176 qs += '&' + self.qs_on_post
177 query = urlparse.parse_qsl(qs, self.keep_blank_values,
178- self.strict_parsing, self.max_num_fields)
179+ self.strict_parsing, self.max_num_fields,
180+ separator=self.separator)
181 self.list = [MiniFieldStorage(key, value) for key, value in query]
182 self.skip_lines()
183
184@@ -629,7 +636,8 @@ class FieldStorage:
185 query = urlparse.parse_qsl(self.qs_on_post,
186 self.keep_blank_values,
187 self.strict_parsing,
188- self.max_num_fields)
189+ self.max_num_fields,
190+ separator=self.separator)
191 self.list.extend(MiniFieldStorage(key, value)
192 for key, value in query)
193 FieldStorageClass = None
194@@ -649,7 +657,7 @@ class FieldStorage:
195 headers = rfc822.Message(self.fp)
196 part = klass(self.fp, headers, ib,
197 environ, keep_blank_values, strict_parsing,
198- max_num_fields)
199+ max_num_fields, separator=self.separator)
200
201 if max_num_fields is not None:
202 max_num_fields -= 1
203diff --git a/Lib/test/test_cgi.py b/Lib/test/test_cgi.py
204index 743c2afbd4..f414faa23b 100644
205--- a/Lib/test/test_cgi.py
206+++ b/Lib/test/test_cgi.py
207@@ -61,12 +61,9 @@ parse_strict_test_cases = [
208 ("", ValueError("bad query field: ''")),
209 ("&", ValueError("bad query field: ''")),
210 ("&&", ValueError("bad query field: ''")),
211- (";", ValueError("bad query field: ''")),
212- (";&;", ValueError("bad query field: ''")),
213 # Should the next few really be valid?
214 ("=", {}),
215 ("=&=", {}),
216- ("=;=", {}),
217 # This rest seem to make sense
218 ("=a", {'': ['a']}),
219 ("&=a", ValueError("bad query field: ''")),
220@@ -81,8 +78,6 @@ parse_strict_test_cases = [
221 ("a=a+b&b=b+c", {'a': ['a b'], 'b': ['b c']}),
222 ("a=a+b&a=b+a", {'a': ['a b', 'b a']}),
223 ("x=1&y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
224- ("x=1;y=2.0&z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
225- ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
226 ("Hbc5161168c542333633315dee1182227:key_store_seqid=400006&cuyer=r&view=bustomer&order_id=0bb2e248638833d48cb7fed300000f1b&expire=964546263&lobale=en-US&kid=130003.300038&ss=env",
227 {'Hbc5161168c542333633315dee1182227:key_store_seqid': ['400006'],
228 'cuyer': ['r'],
229@@ -188,6 +183,30 @@ class CgiTests(unittest.TestCase):
230 self.assertEqual(expect[k], v)
231 self.assertItemsEqual(expect.values(), d.values())
232
233+ def test_separator(self):
234+ parse_semicolon = [
235+ ("x=1;y=2.0", {'x': ['1'], 'y': ['2.0']}),
236+ ("x=1;y=2.0;z=2-3.%2b0", {'x': ['1'], 'y': ['2.0'], 'z': ['2-3.+0']}),
237+ (";", ValueError("bad query field: ''")),
238+ (";;", ValueError("bad query field: ''")),
239+ ("=;a", ValueError("bad query field: 'a'")),
240+ (";b=a", ValueError("bad query field: ''")),
241+ ("b;=a", ValueError("bad query field: 'b'")),
242+ ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
243+ ("a=a+b;a=b+a", {'a': ['a b', 'b a']}),
244+ ]
245+ for orig, expect in parse_semicolon:
246+ env = {'QUERY_STRING': orig}
247+ fs = cgi.FieldStorage(separator=';', environ=env)
248+ if isinstance(expect, dict):
249+ for key in expect.keys():
250+ expect_val = expect[key]
251+ self.assertIn(key, fs)
252+ if len(expect_val) > 1:
253+ self.assertEqual(fs.getvalue(key), expect_val)
254+ else:
255+ self.assertEqual(fs.getvalue(key), expect_val[0])
256+
257 def test_log(self):
258 cgi.log("Testing")
259
260diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
261index 86c4a0595c..0b2107339a 100644
262--- a/Lib/test/test_urlparse.py
263+++ b/Lib/test/test_urlparse.py
264@@ -24,16 +24,20 @@ parse_qsl_test_cases = [
265 ("&a=b", [('a', 'b')]),
266 ("a=a+b&b=b+c", [('a', 'a b'), ('b', 'b c')]),
267 ("a=1&a=2", [('a', '1'), ('a', '2')]),
268- (";", []),
269- (";;", []),
270- (";a=b", [('a', 'b')]),
271- ("a=a+b;b=b+c", [('a', 'a b'), ('b', 'b c')]),
272- ("a=1;a=2", [('a', '1'), ('a', '2')]),
273- (b";", []),
274- (b";;", []),
275- (b";a=b", [(b'a', b'b')]),
276- (b"a=a+b;b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
277- (b"a=1;a=2", [(b'a', b'1'), (b'a', b'2')]),
278+ (b"", []),
279+ (b"&", []),
280+ (b"&&", []),
281+ (b"=", [(b'', b'')]),
282+ (b"=a", [(b'', b'a')]),
283+ (b"a", [(b'a', b'')]),
284+ (b"a=", [(b'a', b'')]),
285+ (b"&a=b", [(b'a', b'b')]),
286+ (b"a=a+b&b=b+c", [(b'a', b'a b'), (b'b', b'b c')]),
287+ (b"a=1&a=2", [(b'a', b'1'), (b'a', b'2')]),
288+ (";a=b", [(';a', 'b')]),
289+ ("a=a+b;b=b+c", [('a', 'a b;b=b c')]),
290+ (b";a=b", [(b';a', b'b')]),
291+ (b"a=a+b;b=b+c", [(b'a', b'a b;b=b c')]),
292 ]
293
294 parse_qs_test_cases = [
295@@ -57,16 +61,10 @@ parse_qs_test_cases = [
296 (b"&a=b", {b'a': [b'b']}),
297 (b"a=a+b&b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
298 (b"a=1&a=2", {b'a': [b'1', b'2']}),
299- (";", {}),
300- (";;", {}),
301- (";a=b", {'a': ['b']}),
302- ("a=a+b;b=b+c", {'a': ['a b'], 'b': ['b c']}),
303- ("a=1;a=2", {'a': ['1', '2']}),
304- (b";", {}),
305- (b";;", {}),
306- (b";a=b", {b'a': [b'b']}),
307- (b"a=a+b;b=b+c", {b'a': [b'a b'], b'b': [b'b c']}),
308- (b"a=1;a=2", {b'a': [b'1', b'2']}),
309+ (";a=b", {';a': ['b']}),
310+ ("a=a+b;b=b+c", {'a': ['a b;b=b c']}),
311+ (b";a=b", {b';a': [b'b']}),
312+ (b"a=a+b;b=b+c", {b'a':[ b'a b;b=b c']}),
313 ]
314
315 class UrlParseTestCase(unittest.TestCase):
316diff --git a/Lib/urlparse.py b/Lib/urlparse.py
317index 798b467b60..6c32727fce 100644
318--- a/Lib/urlparse.py
319+++ b/Lib/urlparse.py
320@@ -382,7 +382,8 @@ def unquote(s):
321 append(item)
322 return ''.join(res)
323
324-def parse_qs(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None):
325+def parse_qs(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None,
326+ separator='&'):
327 """Parse a query given as a string argument.
328
329 Arguments:
330@@ -402,17 +403,22 @@ def parse_qs(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None):
331
332 max_num_fields: int. If set, then throws a ValueError if there
333 are more than n fields read by parse_qsl().
334+
335+ separator: str. The symbol to use for separating the query arguments.
336+ Defaults to &.
337+
338 """
339 dict = {}
340 for name, value in parse_qsl(qs, keep_blank_values, strict_parsing,
341- max_num_fields):
342+ max_num_fields, separator=separator):
343 if name in dict:
344 dict[name].append(value)
345 else:
346 dict[name] = [value]
347 return dict
348
349-def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None):
350+def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None,
351+ separator='&'):
352 """Parse a query given as a string argument.
353
354 Arguments:
355@@ -432,17 +438,23 @@ def parse_qsl(qs, keep_blank_values=0, strict_parsing=0, max_num_fields=None):
356 max_num_fields: int. If set, then throws a ValueError if there
357 are more than n fields read by parse_qsl().
358
359+ separator: str. The symbol to use for separating the query arguments.
360+ Defaults to &.
361+
362 Returns a list, as G-d intended.
363 """
364+ if not separator or (not isinstance(separator, (str, bytes))):
365+ raise ValueError("Separator must be of type string or bytes.")
366+
367 # If max_num_fields is defined then check that the number of fields
368 # is less than max_num_fields. This prevents a memory exhaustion DOS
369 # attack via post bodies with many fields.
370 if max_num_fields is not None:
371- num_fields = 1 + qs.count('&') + qs.count(';')
372+ num_fields = 1 + qs.count(separator)
373 if max_num_fields < num_fields:
374 raise ValueError('Max number of fields exceeded')
375
376- pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
377+ pairs = [s1 for s1 in qs.split(separator)]
378 r = []
379 for name_value in pairs:
380 if not name_value and not strict_parsing:
381diff --git a/Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst b/Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst
382new file mode 100644
383index 0000000000..f08489b414
384--- /dev/null
385+++ b/Misc/NEWS.d/next/Security/2021-02-14-15-59-16.bpo-42967.YApqDS.rst
386@@ -0,0 +1 @@
387+Fix web cache poisoning vulnerability by defaulting the query args separator to ``&``, and allowing the user to choose a custom separator.
388--
3892.31.1
390