summaryrefslogtreecommitdiffstats
path: root/meta/recipes-devtools/python/python3
diff options
context:
space:
mode:
authorHitendra Prajapati <hprajapati@mvista.com>2024-09-09 11:00:04 +0530
committerSteve Sakoman <steve@sakoman.com>2024-09-16 06:09:56 -0700
commitbfbf6d481d4d9d21fd624528c8a61ccc64fadc4c (patch)
tree5a7ecfc16d1ea5222af51ed870fef45827464721 /meta/recipes-devtools/python/python3
parent85134c7690551835f1ad2295533cf1c6e81b4ab6 (diff)
downloadpoky-bfbf6d481d4d9d21fd624528c8a61ccc64fadc4c.tar.gz
python3: fix CVE-2023-27043
Upstream-Status: Backport from https://github.com/python/cpython/commit/2a9273a0e4466e2f057f9ce6fe98cd8ce570331b (From OE-Core rev: 793c22623e8b3da2ca8e28fe662d8428b0f805a7) Signed-off-by: Hitendra Prajapati <hprajapati@mvista.com> Signed-off-by: Steve Sakoman <steve@sakoman.com>
Diffstat (limited to 'meta/recipes-devtools/python/python3')
-rw-r--r--meta/recipes-devtools/python/python3/CVE-2023-27043.patch510
1 files changed, 510 insertions, 0 deletions
diff --git a/meta/recipes-devtools/python/python3/CVE-2023-27043.patch b/meta/recipes-devtools/python/python3/CVE-2023-27043.patch
new file mode 100644
index 0000000000..d27afc41a9
--- /dev/null
+++ b/meta/recipes-devtools/python/python3/CVE-2023-27043.patch
@@ -0,0 +1,510 @@
1From 2a9273a0e4466e2f057f9ce6fe98cd8ce570331b Mon Sep 17 00:00:00 2001
2From: Petr Viktorin <encukou@gmail.com>
3Date: Fri, 6 Sep 2024 13:14:22 +0200
4Subject: [PATCH] [3.10] [CVE-2023-27043] gh-102988: Reject malformed addresses
5 in email.parseaddr() (GH-111116) (#123768)
6
7Detect email address parsing errors and return empty tuple to
8indicate the parsing error (old API). Add an optional 'strict'
9parameter to getaddresses() and parseaddr() functions. Patch by
10Thomas Dwyer.
11
12(cherry picked from commit 4a153a1d3b18803a684cd1bcc2cdf3ede3dbae19)
13
14Co-authored-by: Victor Stinner <vstinner@python.org>
15Co-Authored-By: Thomas Dwyer <github@tomd.tel>
16
17Upstream-Status: Backport [https://github.com/python/cpython/commit/2a9273a0e4466e2f057f9ce6fe98cd8ce570331b]
18CVE: CVE-2023-27043
19Signed-off-by: Hitendra Prajapati <hprajapati@mvista.com>
20---
21 Doc/library/email.utils.rst | 19 +-
22 Lib/email/utils.py | 151 ++++++++++++-
23 Lib/test/test_email/test_email.py | 204 +++++++++++++++++-
24 ...-10-20-15-28-08.gh-issue-102988.dStNO7.rst | 8 +
25 4 files changed, 361 insertions(+), 21 deletions(-)
26 create mode 100644 Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst
27
28diff --git a/Doc/library/email.utils.rst b/Doc/library/email.utils.rst
29index 0e266b6..65948fb 100644
30--- a/Doc/library/email.utils.rst
31+++ b/Doc/library/email.utils.rst
32@@ -60,13 +60,18 @@ of the new API.
33 begins with angle brackets, they are stripped off.
34
35
36-.. function:: parseaddr(address)
37+.. function:: parseaddr(address, *, strict=True)
38
39 Parse address -- which should be the value of some address-containing field such
40 as :mailheader:`To` or :mailheader:`Cc` -- into its constituent *realname* and
41 *email address* parts. Returns a tuple of that information, unless the parse
42 fails, in which case a 2-tuple of ``('', '')`` is returned.
43
44+ If *strict* is true, use a strict parser which rejects malformed inputs.
45+
46+ .. versionchanged:: 3.10.15
47+ Add *strict* optional parameter and reject malformed inputs by default.
48+
49
50 .. function:: formataddr(pair, charset='utf-8')
51
52@@ -84,12 +89,15 @@ of the new API.
53 Added the *charset* option.
54
55
56-.. function:: getaddresses(fieldvalues)
57+.. function:: getaddresses(fieldvalues, *, strict=True)
58
59 This method returns a list of 2-tuples of the form returned by ``parseaddr()``.
60 *fieldvalues* is a sequence of header field values as might be returned by
61- :meth:`Message.get_all <email.message.Message.get_all>`. Here's a simple
62- example that gets all the recipients of a message::
63+ :meth:`Message.get_all <email.message.Message.get_all>`.
64+
65+ If *strict* is true, use a strict parser which rejects malformed inputs.
66+
67+ Here's a simple example that gets all the recipients of a message::
68
69 from email.utils import getaddresses
70
71@@ -99,6 +107,9 @@ of the new API.
72 resent_ccs = msg.get_all('resent-cc', [])
73 all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
74
75+ .. versionchanged:: 3.10.15
76+ Add *strict* optional parameter and reject malformed inputs by default.
77+
78
79 .. function:: parsedate(date)
80
81diff --git a/Lib/email/utils.py b/Lib/email/utils.py
82index cfdfeb3..9522341 100644
83--- a/Lib/email/utils.py
84+++ b/Lib/email/utils.py
85@@ -48,6 +48,7 @@ TICK = "'"
86 specialsre = re.compile(r'[][\\()<>@,:;".]')
87 escapesre = re.compile(r'[\\"]')
88
89+
90 def _has_surrogates(s):
91 """Return True if s contains surrogate-escaped binary data."""
92 # This check is based on the fact that unless there are surrogates, utf8
93@@ -106,12 +107,127 @@ def formataddr(pair, charset='utf-8'):
94 return address
95
96
97+def _iter_escaped_chars(addr):
98+ pos = 0
99+ escape = False
100+ for pos, ch in enumerate(addr):
101+ if escape:
102+ yield (pos, '\\' + ch)
103+ escape = False
104+ elif ch == '\\':
105+ escape = True
106+ else:
107+ yield (pos, ch)
108+ if escape:
109+ yield (pos, '\\')
110+
111+
112+def _strip_quoted_realnames(addr):
113+ """Strip real names between quotes."""
114+ if '"' not in addr:
115+ # Fast path
116+ return addr
117+
118+ start = 0
119+ open_pos = None
120+ result = []
121+ for pos, ch in _iter_escaped_chars(addr):
122+ if ch == '"':
123+ if open_pos is None:
124+ open_pos = pos
125+ else:
126+ if start != open_pos:
127+ result.append(addr[start:open_pos])
128+ start = pos + 1
129+ open_pos = None
130+
131+ if start < len(addr):
132+ result.append(addr[start:])
133+
134+ return ''.join(result)
135
136-def getaddresses(fieldvalues):
137- """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
138- all = COMMASPACE.join(str(v) for v in fieldvalues)
139- a = _AddressList(all)
140- return a.addresslist
141+
142+supports_strict_parsing = True
143+
144+def getaddresses(fieldvalues, *, strict=True):
145+ """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
146+
147+ When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
148+ its place.
149+
150+ If strict is true, use a strict parser which rejects malformed inputs.
151+ """
152+
153+ # If strict is true, if the resulting list of parsed addresses is greater
154+ # than the number of fieldvalues in the input list, a parsing error has
155+ # occurred and consequently a list containing a single empty 2-tuple [('',
156+ # '')] is returned in its place. This is done to avoid invalid output.
157+ #
158+ # Malformed input: getaddresses(['alice@example.com <bob@example.com>'])
159+ # Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')]
160+ # Safe output: [('', '')]
161+
162+ if not strict:
163+ all = COMMASPACE.join(str(v) for v in fieldvalues)
164+ a = _AddressList(all)
165+ return a.addresslist
166+
167+ fieldvalues = [str(v) for v in fieldvalues]
168+ fieldvalues = _pre_parse_validation(fieldvalues)
169+ addr = COMMASPACE.join(fieldvalues)
170+ a = _AddressList(addr)
171+ result = _post_parse_validation(a.addresslist)
172+
173+ # Treat output as invalid if the number of addresses is not equal to the
174+ # expected number of addresses.
175+ n = 0
176+ for v in fieldvalues:
177+ # When a comma is used in the Real Name part it is not a deliminator.
178+ # So strip those out before counting the commas.
179+ v = _strip_quoted_realnames(v)
180+ # Expected number of addresses: 1 + number of commas
181+ n += 1 + v.count(',')
182+ if len(result) != n:
183+ return [('', '')]
184+
185+ return result
186+
187+
188+def _check_parenthesis(addr):
189+ # Ignore parenthesis in quoted real names.
190+ addr = _strip_quoted_realnames(addr)
191+
192+ opens = 0
193+ for pos, ch in _iter_escaped_chars(addr):
194+ if ch == '(':
195+ opens += 1
196+ elif ch == ')':
197+ opens -= 1
198+ if opens < 0:
199+ return False
200+ return (opens == 0)
201+
202+
203+def _pre_parse_validation(email_header_fields):
204+ accepted_values = []
205+ for v in email_header_fields:
206+ if not _check_parenthesis(v):
207+ v = "('', '')"
208+ accepted_values.append(v)
209+
210+ return accepted_values
211+
212+
213+def _post_parse_validation(parsed_email_header_tuples):
214+ accepted_values = []
215+ # The parser would have parsed a correctly formatted domain-literal
216+ # The existence of an [ after parsing indicates a parsing failure
217+ for v in parsed_email_header_tuples:
218+ if '[' in v[1]:
219+ v = ('', '')
220+ accepted_values.append(v)
221+
222+ return accepted_values
223
224
225 def _format_timetuple_and_zone(timetuple, zone):
226@@ -205,16 +321,33 @@ def parsedate_to_datetime(data):
227 tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))
228
229
230-def parseaddr(addr):
231+def parseaddr(addr, *, strict=True):
232 """
233 Parse addr into its constituent realname and email address parts.
234
235 Return a tuple of realname and email address, unless the parse fails, in
236 which case return a 2-tuple of ('', '').
237+
238+ If strict is True, use a strict parser which rejects malformed inputs.
239 """
240- addrs = _AddressList(addr).addresslist
241- if not addrs:
242- return '', ''
243+ if not strict:
244+ addrs = _AddressList(addr).addresslist
245+ if not addrs:
246+ return ('', '')
247+ return addrs[0]
248+
249+ if isinstance(addr, list):
250+ addr = addr[0]
251+
252+ if not isinstance(addr, str):
253+ return ('', '')
254+
255+ addr = _pre_parse_validation([addr])[0]
256+ addrs = _post_parse_validation(_AddressList(addr).addresslist)
257+
258+ if not addrs or len(addrs) > 1:
259+ return ('', '')
260+
261 return addrs[0]
262
263
264diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py
265index 8b16cca..5b19bb3 100644
266--- a/Lib/test/test_email/test_email.py
267+++ b/Lib/test/test_email/test_email.py
268@@ -16,6 +16,7 @@ from unittest.mock import patch
269
270 import email
271 import email.policy
272+import email.utils
273
274 from email.charset import Charset
275 from email.generator import Generator, DecodedGenerator, BytesGenerator
276@@ -3288,15 +3289,154 @@ Foo
277 [('Al Person', 'aperson@dom.ain'),
278 ('Bud Person', 'bperson@dom.ain')])
279
280+ def test_getaddresses_comma_in_name(self):
281+ """GH-106669 regression test."""
282+ self.assertEqual(
283+ utils.getaddresses(
284+ [
285+ '"Bud, Person" <bperson@dom.ain>',
286+ 'aperson@dom.ain (Al Person)',
287+ '"Mariusz Felisiak" <to@example.com>',
288+ ]
289+ ),
290+ [
291+ ('Bud, Person', 'bperson@dom.ain'),
292+ ('Al Person', 'aperson@dom.ain'),
293+ ('Mariusz Felisiak', 'to@example.com'),
294+ ],
295+ )
296+
297+ def test_parsing_errors(self):
298+ """Test for parsing errors from CVE-2023-27043 and CVE-2019-16056"""
299+ alice = 'alice@example.org'
300+ bob = 'bob@example.com'
301+ empty = ('', '')
302+
303+ # Test utils.getaddresses() and utils.parseaddr() on malformed email
304+ # addresses: default behavior (strict=True) rejects malformed address,
305+ # and strict=False which tolerates malformed address.
306+ for invalid_separator, expected_non_strict in (
307+ ('(', [(f'<{bob}>', alice)]),
308+ (')', [('', alice), empty, ('', bob)]),
309+ ('<', [('', alice), empty, ('', bob), empty]),
310+ ('>', [('', alice), empty, ('', bob)]),
311+ ('[', [('', f'{alice}[<{bob}>]')]),
312+ (']', [('', alice), empty, ('', bob)]),
313+ ('@', [empty, empty, ('', bob)]),
314+ (';', [('', alice), empty, ('', bob)]),
315+ (':', [('', alice), ('', bob)]),
316+ ('.', [('', alice + '.'), ('', bob)]),
317+ ('"', [('', alice), ('', f'<{bob}>')]),
318+ ):
319+ address = f'{alice}{invalid_separator}<{bob}>'
320+ with self.subTest(address=address):
321+ self.assertEqual(utils.getaddresses([address]),
322+ [empty])
323+ self.assertEqual(utils.getaddresses([address], strict=False),
324+ expected_non_strict)
325+
326+ self.assertEqual(utils.parseaddr([address]),
327+ empty)
328+ self.assertEqual(utils.parseaddr([address], strict=False),
329+ ('', address))
330+
331+ # Comma (',') is treated differently depending on strict parameter.
332+ # Comma without quotes.
333+ address = f'{alice},<{bob}>'
334+ self.assertEqual(utils.getaddresses([address]),
335+ [('', alice), ('', bob)])
336+ self.assertEqual(utils.getaddresses([address], strict=False),
337+ [('', alice), ('', bob)])
338+ self.assertEqual(utils.parseaddr([address]),
339+ empty)
340+ self.assertEqual(utils.parseaddr([address], strict=False),
341+ ('', address))
342+
343+ # Real name between quotes containing comma.
344+ address = '"Alice, alice@example.org" <bob@example.com>'
345+ expected_strict = ('Alice, alice@example.org', 'bob@example.com')
346+ self.assertEqual(utils.getaddresses([address]), [expected_strict])
347+ self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict])
348+ self.assertEqual(utils.parseaddr([address]), expected_strict)
349+ self.assertEqual(utils.parseaddr([address], strict=False),
350+ ('', address))
351+
352+ # Valid parenthesis in comments.
353+ address = 'alice@example.org (Alice)'
354+ expected_strict = ('Alice', 'alice@example.org')
355+ self.assertEqual(utils.getaddresses([address]), [expected_strict])
356+ self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict])
357+ self.assertEqual(utils.parseaddr([address]), expected_strict)
358+ self.assertEqual(utils.parseaddr([address], strict=False),
359+ ('', address))
360+
361+ # Invalid parenthesis in comments.
362+ address = 'alice@example.org )Alice('
363+ self.assertEqual(utils.getaddresses([address]), [empty])
364+ self.assertEqual(utils.getaddresses([address], strict=False),
365+ [('', 'alice@example.org'), ('', ''), ('', 'Alice')])
366+ self.assertEqual(utils.parseaddr([address]), empty)
367+ self.assertEqual(utils.parseaddr([address], strict=False),
368+ ('', address))
369+
370+ # Two addresses with quotes separated by comma.
371+ address = '"Jane Doe" <jane@example.net>, "John Doe" <john@example.net>'
372+ self.assertEqual(utils.getaddresses([address]),
373+ [('Jane Doe', 'jane@example.net'),
374+ ('John Doe', 'john@example.net')])
375+ self.assertEqual(utils.getaddresses([address], strict=False),
376+ [('Jane Doe', 'jane@example.net'),
377+ ('John Doe', 'john@example.net')])
378+ self.assertEqual(utils.parseaddr([address]), empty)
379+ self.assertEqual(utils.parseaddr([address], strict=False),
380+ ('', address))
381+
382+ # Test email.utils.supports_strict_parsing attribute
383+ self.assertEqual(email.utils.supports_strict_parsing, True)
384+
385 def test_getaddresses_nasty(self):
386- eq = self.assertEqual
387- eq(utils.getaddresses(['foo: ;']), [('', '')])
388- eq(utils.getaddresses(
389- ['[]*-- =~$']),
390- [('', ''), ('', ''), ('', '*--')])
391- eq(utils.getaddresses(
392- ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
393- [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
394+ for addresses, expected in (
395+ (['"Sürname, Firstname" <to@example.com>'],
396+ [('Sürname, Firstname', 'to@example.com')]),
397+
398+ (['foo: ;'],
399+ [('', '')]),
400+
401+ (['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>'],
402+ [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]),
403+
404+ ([r'Pete(A nice \) chap) <pete(his account)@silly.test(his host)>'],
405+ [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]),
406+
407+ (['(Empty list)(start)Undisclosed recipients :(nobody(I know))'],
408+ [('', '')]),
409+
410+ (['Mary <@machine.tld:mary@example.net>, , jdoe@test . example'],
411+ [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]),
412+
413+ (['John Doe <jdoe@machine(comment). example>'],
414+ [('John Doe (comment)', 'jdoe@machine.example')]),
415+
416+ (['"Mary Smith: Personal Account" <smith@home.example>'],
417+ [('Mary Smith: Personal Account', 'smith@home.example')]),
418+
419+ (['Undisclosed recipients:;'],
420+ [('', '')]),
421+
422+ ([r'<boss@nil.test>, "Giant; \"Big\" Box" <bob@example.net>'],
423+ [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]),
424+ ):
425+ with self.subTest(addresses=addresses):
426+ self.assertEqual(utils.getaddresses(addresses),
427+ expected)
428+ self.assertEqual(utils.getaddresses(addresses, strict=False),
429+ expected)
430+
431+ addresses = ['[]*-- =~$']
432+ self.assertEqual(utils.getaddresses(addresses),
433+ [('', '')])
434+ self.assertEqual(utils.getaddresses(addresses, strict=False),
435+ [('', ''), ('', ''), ('', '*--')])
436
437 def test_getaddresses_embedded_comment(self):
438 """Test proper handling of a nested comment"""
439@@ -3485,6 +3625,54 @@ multipart/report
440 m = cls(*constructor, policy=email.policy.default)
441 self.assertIs(m.policy, email.policy.default)
442
443+ def test_iter_escaped_chars(self):
444+ self.assertEqual(list(utils._iter_escaped_chars(r'a\\b\"c\\"d')),
445+ [(0, 'a'),
446+ (2, '\\\\'),
447+ (3, 'b'),
448+ (5, '\\"'),
449+ (6, 'c'),
450+ (8, '\\\\'),
451+ (9, '"'),
452+ (10, 'd')])
453+ self.assertEqual(list(utils._iter_escaped_chars('a\\')),
454+ [(0, 'a'), (1, '\\')])
455+
456+ def test_strip_quoted_realnames(self):
457+ def check(addr, expected):
458+ self.assertEqual(utils._strip_quoted_realnames(addr), expected)
459+
460+ check('"Jane Doe" <jane@example.net>, "John Doe" <john@example.net>',
461+ ' <jane@example.net>, <john@example.net>')
462+ check(r'"Jane \"Doe\"." <jane@example.net>',
463+ ' <jane@example.net>')
464+
465+ # special cases
466+ check(r'before"name"after', 'beforeafter')
467+ check(r'before"name"', 'before')
468+ check(r'b"name"', 'b') # single char
469+ check(r'"name"after', 'after')
470+ check(r'"name"a', 'a') # single char
471+ check(r'"name"', '')
472+
473+ # no change
474+ for addr in (
475+ 'Jane Doe <jane@example.net>, John Doe <john@example.net>',
476+ 'lone " quote',
477+ ):
478+ self.assertEqual(utils._strip_quoted_realnames(addr), addr)
479+
480+
481+ def test_check_parenthesis(self):
482+ addr = 'alice@example.net'
483+ self.assertTrue(utils._check_parenthesis(f'{addr} (Alice)'))
484+ self.assertFalse(utils._check_parenthesis(f'{addr} )Alice('))
485+ self.assertFalse(utils._check_parenthesis(f'{addr} (Alice))'))
486+ self.assertFalse(utils._check_parenthesis(f'{addr} ((Alice)'))
487+
488+ # Ignore real name between quotes
489+ self.assertTrue(utils._check_parenthesis(f'")Alice((" {addr}'))
490+
491
492 # Test the iterator/generators
493 class TestIterators(TestEmailBase):
494diff --git a/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst b/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst
495new file mode 100644
496index 0000000..3d0e9e4
497--- /dev/null
498+++ b/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst
499@@ -0,0 +1,8 @@
500+:func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now
501+return ``('', '')`` 2-tuples in more situations where invalid email
502+addresses are encountered instead of potentially inaccurate values. Add
503+optional *strict* parameter to these two functions: use ``strict=False`` to
504+get the old behavior, accept malformed inputs.
505+``getattr(email.utils, 'supports_strict_parsing', False)`` can be use to check
506+if the *strict* paramater is available. Patch by Thomas Dwyer and Victor
507+Stinner to improve the CVE-2023-27043 fix.
508--
5092.25.1
510