summaryrefslogtreecommitdiffstats
path: root/meta/recipes-devtools/python
diff options
context:
space:
mode:
authorDivya Chellam <divya.chellam@windriver.com>2024-09-18 06:40:30 +0000
committerSteve Sakoman <steve@sakoman.com>2024-09-27 05:57:35 -0700
commit487e8cdf1df6feba6d88fa29e11791f4ebaaa362 (patch)
tree1840e3774e8af68dbe0248a463055af51fd25dd7 /meta/recipes-devtools/python
parent8190d9c754c9c3a1962123e1e86d99de96c1224c (diff)
downloadpoky-487e8cdf1df6feba6d88fa29e11791f4ebaaa362.tar.gz
python3: Upgrade 3.10.14 -> 3.10.15
Includes security fixes for CVE-2024-4030, CVE-2024-7592, CVE-2024-4032, CVE-2024-8088 CVE-2024-6232, CVE-2024-6923, CVE-2023-27043 and other bug fixes. Removed below patches, as the fixes included in 3.10.15 upgrade: 1. CVE-2023-27043.patch 2. CVE-2024-6232.patch 3. CVE-2024-7592.patch 4. CVE-2024-8088.patch Release Notes: https://www.python.org/downloads/release/python-31015/ (From OE-Core rev: e64ead97ae3d680f97bf85422f777cd77ae7c434) Signed-off-by: Divya Chellam <divya.chellam@windriver.com> Signed-off-by: Steve Sakoman <steve@sakoman.com>
Diffstat (limited to 'meta/recipes-devtools/python')
-rw-r--r--meta/recipes-devtools/python/python3/CVE-2023-27043.patch510
-rw-r--r--meta/recipes-devtools/python/python3/CVE-2024-6232.patch251
-rw-r--r--meta/recipes-devtools/python/python3/CVE-2024-7592.patch140
-rw-r--r--meta/recipes-devtools/python/python3/CVE-2024-8088.patch124
-rw-r--r--meta/recipes-devtools/python/python3_3.10.15.bb (renamed from meta/recipes-devtools/python/python3_3.10.14.bb)6
5 files changed, 1 insertions, 1030 deletions
diff --git a/meta/recipes-devtools/python/python3/CVE-2023-27043.patch b/meta/recipes-devtools/python/python3/CVE-2023-27043.patch
deleted file mode 100644
index d27afc41a9..0000000000
--- a/meta/recipes-devtools/python/python3/CVE-2023-27043.patch
+++ /dev/null
@@ -1,510 +0,0 @@
1From 2a9273a0e4466e2f057f9ce6fe98cd8ce570331b Mon Sep 17 00:00:00 2001
2From: Petr Viktorin <encukou@gmail.com>
3Date: Fri, 6 Sep 2024 13:14:22 +0200
4Subject: [PATCH] [3.10] [CVE-2023-27043] gh-102988: Reject malformed addresses
5 in email.parseaddr() (GH-111116) (#123768)
6
7Detect email address parsing errors and return empty tuple to
8indicate the parsing error (old API). Add an optional 'strict'
9parameter to getaddresses() and parseaddr() functions. Patch by
10Thomas Dwyer.
11
12(cherry picked from commit 4a153a1d3b18803a684cd1bcc2cdf3ede3dbae19)
13
14Co-authored-by: Victor Stinner <vstinner@python.org>
15Co-Authored-By: Thomas Dwyer <github@tomd.tel>
16
17Upstream-Status: Backport [https://github.com/python/cpython/commit/2a9273a0e4466e2f057f9ce6fe98cd8ce570331b]
18CVE: CVE-2023-27043
19Signed-off-by: Hitendra Prajapati <hprajapati@mvista.com>
20---
21 Doc/library/email.utils.rst | 19 +-
22 Lib/email/utils.py | 151 ++++++++++++-
23 Lib/test/test_email/test_email.py | 204 +++++++++++++++++-
24 ...-10-20-15-28-08.gh-issue-102988.dStNO7.rst | 8 +
25 4 files changed, 361 insertions(+), 21 deletions(-)
26 create mode 100644 Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst
27
28diff --git a/Doc/library/email.utils.rst b/Doc/library/email.utils.rst
29index 0e266b6..65948fb 100644
30--- a/Doc/library/email.utils.rst
31+++ b/Doc/library/email.utils.rst
32@@ -60,13 +60,18 @@ of the new API.
33 begins with angle brackets, they are stripped off.
34
35
36-.. function:: parseaddr(address)
37+.. function:: parseaddr(address, *, strict=True)
38
39 Parse address -- which should be the value of some address-containing field such
40 as :mailheader:`To` or :mailheader:`Cc` -- into its constituent *realname* and
41 *email address* parts. Returns a tuple of that information, unless the parse
42 fails, in which case a 2-tuple of ``('', '')`` is returned.
43
44+ If *strict* is true, use a strict parser which rejects malformed inputs.
45+
46+ .. versionchanged:: 3.10.15
47+ Add *strict* optional parameter and reject malformed inputs by default.
48+
49
50 .. function:: formataddr(pair, charset='utf-8')
51
52@@ -84,12 +89,15 @@ of the new API.
53 Added the *charset* option.
54
55
56-.. function:: getaddresses(fieldvalues)
57+.. function:: getaddresses(fieldvalues, *, strict=True)
58
59 This method returns a list of 2-tuples of the form returned by ``parseaddr()``.
60 *fieldvalues* is a sequence of header field values as might be returned by
61- :meth:`Message.get_all <email.message.Message.get_all>`. Here's a simple
62- example that gets all the recipients of a message::
63+ :meth:`Message.get_all <email.message.Message.get_all>`.
64+
65+ If *strict* is true, use a strict parser which rejects malformed inputs.
66+
67+ Here's a simple example that gets all the recipients of a message::
68
69 from email.utils import getaddresses
70
71@@ -99,6 +107,9 @@ of the new API.
72 resent_ccs = msg.get_all('resent-cc', [])
73 all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
74
75+ .. versionchanged:: 3.10.15
76+ Add *strict* optional parameter and reject malformed inputs by default.
77+
78
79 .. function:: parsedate(date)
80
81diff --git a/Lib/email/utils.py b/Lib/email/utils.py
82index cfdfeb3..9522341 100644
83--- a/Lib/email/utils.py
84+++ b/Lib/email/utils.py
85@@ -48,6 +48,7 @@ TICK = "'"
86 specialsre = re.compile(r'[][\\()<>@,:;".]')
87 escapesre = re.compile(r'[\\"]')
88
89+
90 def _has_surrogates(s):
91 """Return True if s contains surrogate-escaped binary data."""
92 # This check is based on the fact that unless there are surrogates, utf8
93@@ -106,12 +107,127 @@ def formataddr(pair, charset='utf-8'):
94 return address
95
96
97+def _iter_escaped_chars(addr):
98+ pos = 0
99+ escape = False
100+ for pos, ch in enumerate(addr):
101+ if escape:
102+ yield (pos, '\\' + ch)
103+ escape = False
104+ elif ch == '\\':
105+ escape = True
106+ else:
107+ yield (pos, ch)
108+ if escape:
109+ yield (pos, '\\')
110+
111+
112+def _strip_quoted_realnames(addr):
113+ """Strip real names between quotes."""
114+ if '"' not in addr:
115+ # Fast path
116+ return addr
117+
118+ start = 0
119+ open_pos = None
120+ result = []
121+ for pos, ch in _iter_escaped_chars(addr):
122+ if ch == '"':
123+ if open_pos is None:
124+ open_pos = pos
125+ else:
126+ if start != open_pos:
127+ result.append(addr[start:open_pos])
128+ start = pos + 1
129+ open_pos = None
130+
131+ if start < len(addr):
132+ result.append(addr[start:])
133+
134+ return ''.join(result)
135
136-def getaddresses(fieldvalues):
137- """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
138- all = COMMASPACE.join(str(v) for v in fieldvalues)
139- a = _AddressList(all)
140- return a.addresslist
141+
142+supports_strict_parsing = True
143+
144+def getaddresses(fieldvalues, *, strict=True):
145+ """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
146+
147+ When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
148+ its place.
149+
150+ If strict is true, use a strict parser which rejects malformed inputs.
151+ """
152+
153+ # If strict is true, if the resulting list of parsed addresses is greater
154+ # than the number of fieldvalues in the input list, a parsing error has
155+ # occurred and consequently a list containing a single empty 2-tuple [('',
156+ # '')] is returned in its place. This is done to avoid invalid output.
157+ #
158+ # Malformed input: getaddresses(['alice@example.com <bob@example.com>'])
159+ # Invalid output: [('', 'alice@example.com'), ('', 'bob@example.com')]
160+ # Safe output: [('', '')]
161+
162+ if not strict:
163+ all = COMMASPACE.join(str(v) for v in fieldvalues)
164+ a = _AddressList(all)
165+ return a.addresslist
166+
167+ fieldvalues = [str(v) for v in fieldvalues]
168+ fieldvalues = _pre_parse_validation(fieldvalues)
169+ addr = COMMASPACE.join(fieldvalues)
170+ a = _AddressList(addr)
171+ result = _post_parse_validation(a.addresslist)
172+
173+ # Treat output as invalid if the number of addresses is not equal to the
174+ # expected number of addresses.
175+ n = 0
176+ for v in fieldvalues:
177+ # When a comma is used in the Real Name part it is not a deliminator.
178+ # So strip those out before counting the commas.
179+ v = _strip_quoted_realnames(v)
180+ # Expected number of addresses: 1 + number of commas
181+ n += 1 + v.count(',')
182+ if len(result) != n:
183+ return [('', '')]
184+
185+ return result
186+
187+
188+def _check_parenthesis(addr):
189+ # Ignore parenthesis in quoted real names.
190+ addr = _strip_quoted_realnames(addr)
191+
192+ opens = 0
193+ for pos, ch in _iter_escaped_chars(addr):
194+ if ch == '(':
195+ opens += 1
196+ elif ch == ')':
197+ opens -= 1
198+ if opens < 0:
199+ return False
200+ return (opens == 0)
201+
202+
203+def _pre_parse_validation(email_header_fields):
204+ accepted_values = []
205+ for v in email_header_fields:
206+ if not _check_parenthesis(v):
207+ v = "('', '')"
208+ accepted_values.append(v)
209+
210+ return accepted_values
211+
212+
213+def _post_parse_validation(parsed_email_header_tuples):
214+ accepted_values = []
215+ # The parser would have parsed a correctly formatted domain-literal
216+ # The existence of an [ after parsing indicates a parsing failure
217+ for v in parsed_email_header_tuples:
218+ if '[' in v[1]:
219+ v = ('', '')
220+ accepted_values.append(v)
221+
222+ return accepted_values
223
224
225 def _format_timetuple_and_zone(timetuple, zone):
226@@ -205,16 +321,33 @@ def parsedate_to_datetime(data):
227 tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))
228
229
230-def parseaddr(addr):
231+def parseaddr(addr, *, strict=True):
232 """
233 Parse addr into its constituent realname and email address parts.
234
235 Return a tuple of realname and email address, unless the parse fails, in
236 which case return a 2-tuple of ('', '').
237+
238+ If strict is True, use a strict parser which rejects malformed inputs.
239 """
240- addrs = _AddressList(addr).addresslist
241- if not addrs:
242- return '', ''
243+ if not strict:
244+ addrs = _AddressList(addr).addresslist
245+ if not addrs:
246+ return ('', '')
247+ return addrs[0]
248+
249+ if isinstance(addr, list):
250+ addr = addr[0]
251+
252+ if not isinstance(addr, str):
253+ return ('', '')
254+
255+ addr = _pre_parse_validation([addr])[0]
256+ addrs = _post_parse_validation(_AddressList(addr).addresslist)
257+
258+ if not addrs or len(addrs) > 1:
259+ return ('', '')
260+
261 return addrs[0]
262
263
264diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py
265index 8b16cca..5b19bb3 100644
266--- a/Lib/test/test_email/test_email.py
267+++ b/Lib/test/test_email/test_email.py
268@@ -16,6 +16,7 @@ from unittest.mock import patch
269
270 import email
271 import email.policy
272+import email.utils
273
274 from email.charset import Charset
275 from email.generator import Generator, DecodedGenerator, BytesGenerator
276@@ -3288,15 +3289,154 @@ Foo
277 [('Al Person', 'aperson@dom.ain'),
278 ('Bud Person', 'bperson@dom.ain')])
279
280+ def test_getaddresses_comma_in_name(self):
281+ """GH-106669 regression test."""
282+ self.assertEqual(
283+ utils.getaddresses(
284+ [
285+ '"Bud, Person" <bperson@dom.ain>',
286+ 'aperson@dom.ain (Al Person)',
287+ '"Mariusz Felisiak" <to@example.com>',
288+ ]
289+ ),
290+ [
291+ ('Bud, Person', 'bperson@dom.ain'),
292+ ('Al Person', 'aperson@dom.ain'),
293+ ('Mariusz Felisiak', 'to@example.com'),
294+ ],
295+ )
296+
297+ def test_parsing_errors(self):
298+ """Test for parsing errors from CVE-2023-27043 and CVE-2019-16056"""
299+ alice = 'alice@example.org'
300+ bob = 'bob@example.com'
301+ empty = ('', '')
302+
303+ # Test utils.getaddresses() and utils.parseaddr() on malformed email
304+ # addresses: default behavior (strict=True) rejects malformed address,
305+ # and strict=False which tolerates malformed address.
306+ for invalid_separator, expected_non_strict in (
307+ ('(', [(f'<{bob}>', alice)]),
308+ (')', [('', alice), empty, ('', bob)]),
309+ ('<', [('', alice), empty, ('', bob), empty]),
310+ ('>', [('', alice), empty, ('', bob)]),
311+ ('[', [('', f'{alice}[<{bob}>]')]),
312+ (']', [('', alice), empty, ('', bob)]),
313+ ('@', [empty, empty, ('', bob)]),
314+ (';', [('', alice), empty, ('', bob)]),
315+ (':', [('', alice), ('', bob)]),
316+ ('.', [('', alice + '.'), ('', bob)]),
317+ ('"', [('', alice), ('', f'<{bob}>')]),
318+ ):
319+ address = f'{alice}{invalid_separator}<{bob}>'
320+ with self.subTest(address=address):
321+ self.assertEqual(utils.getaddresses([address]),
322+ [empty])
323+ self.assertEqual(utils.getaddresses([address], strict=False),
324+ expected_non_strict)
325+
326+ self.assertEqual(utils.parseaddr([address]),
327+ empty)
328+ self.assertEqual(utils.parseaddr([address], strict=False),
329+ ('', address))
330+
331+ # Comma (',') is treated differently depending on strict parameter.
332+ # Comma without quotes.
333+ address = f'{alice},<{bob}>'
334+ self.assertEqual(utils.getaddresses([address]),
335+ [('', alice), ('', bob)])
336+ self.assertEqual(utils.getaddresses([address], strict=False),
337+ [('', alice), ('', bob)])
338+ self.assertEqual(utils.parseaddr([address]),
339+ empty)
340+ self.assertEqual(utils.parseaddr([address], strict=False),
341+ ('', address))
342+
343+ # Real name between quotes containing comma.
344+ address = '"Alice, alice@example.org" <bob@example.com>'
345+ expected_strict = ('Alice, alice@example.org', 'bob@example.com')
346+ self.assertEqual(utils.getaddresses([address]), [expected_strict])
347+ self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict])
348+ self.assertEqual(utils.parseaddr([address]), expected_strict)
349+ self.assertEqual(utils.parseaddr([address], strict=False),
350+ ('', address))
351+
352+ # Valid parenthesis in comments.
353+ address = 'alice@example.org (Alice)'
354+ expected_strict = ('Alice', 'alice@example.org')
355+ self.assertEqual(utils.getaddresses([address]), [expected_strict])
356+ self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict])
357+ self.assertEqual(utils.parseaddr([address]), expected_strict)
358+ self.assertEqual(utils.parseaddr([address], strict=False),
359+ ('', address))
360+
361+ # Invalid parenthesis in comments.
362+ address = 'alice@example.org )Alice('
363+ self.assertEqual(utils.getaddresses([address]), [empty])
364+ self.assertEqual(utils.getaddresses([address], strict=False),
365+ [('', 'alice@example.org'), ('', ''), ('', 'Alice')])
366+ self.assertEqual(utils.parseaddr([address]), empty)
367+ self.assertEqual(utils.parseaddr([address], strict=False),
368+ ('', address))
369+
370+ # Two addresses with quotes separated by comma.
371+ address = '"Jane Doe" <jane@example.net>, "John Doe" <john@example.net>'
372+ self.assertEqual(utils.getaddresses([address]),
373+ [('Jane Doe', 'jane@example.net'),
374+ ('John Doe', 'john@example.net')])
375+ self.assertEqual(utils.getaddresses([address], strict=False),
376+ [('Jane Doe', 'jane@example.net'),
377+ ('John Doe', 'john@example.net')])
378+ self.assertEqual(utils.parseaddr([address]), empty)
379+ self.assertEqual(utils.parseaddr([address], strict=False),
380+ ('', address))
381+
382+ # Test email.utils.supports_strict_parsing attribute
383+ self.assertEqual(email.utils.supports_strict_parsing, True)
384+
385 def test_getaddresses_nasty(self):
386- eq = self.assertEqual
387- eq(utils.getaddresses(['foo: ;']), [('', '')])
388- eq(utils.getaddresses(
389- ['[]*-- =~$']),
390- [('', ''), ('', ''), ('', '*--')])
391- eq(utils.getaddresses(
392- ['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
393- [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
394+ for addresses, expected in (
395+ (['"Sürname, Firstname" <to@example.com>'],
396+ [('Sürname, Firstname', 'to@example.com')]),
397+
398+ (['foo: ;'],
399+ [('', '')]),
400+
401+ (['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>'],
402+ [('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]),
403+
404+ ([r'Pete(A nice \) chap) <pete(his account)@silly.test(his host)>'],
405+ [('Pete (A nice ) chap his account his host)', 'pete@silly.test')]),
406+
407+ (['(Empty list)(start)Undisclosed recipients :(nobody(I know))'],
408+ [('', '')]),
409+
410+ (['Mary <@machine.tld:mary@example.net>, , jdoe@test . example'],
411+ [('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]),
412+
413+ (['John Doe <jdoe@machine(comment). example>'],
414+ [('John Doe (comment)', 'jdoe@machine.example')]),
415+
416+ (['"Mary Smith: Personal Account" <smith@home.example>'],
417+ [('Mary Smith: Personal Account', 'smith@home.example')]),
418+
419+ (['Undisclosed recipients:;'],
420+ [('', '')]),
421+
422+ ([r'<boss@nil.test>, "Giant; \"Big\" Box" <bob@example.net>'],
423+ [('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]),
424+ ):
425+ with self.subTest(addresses=addresses):
426+ self.assertEqual(utils.getaddresses(addresses),
427+ expected)
428+ self.assertEqual(utils.getaddresses(addresses, strict=False),
429+ expected)
430+
431+ addresses = ['[]*-- =~$']
432+ self.assertEqual(utils.getaddresses(addresses),
433+ [('', '')])
434+ self.assertEqual(utils.getaddresses(addresses, strict=False),
435+ [('', ''), ('', ''), ('', '*--')])
436
437 def test_getaddresses_embedded_comment(self):
438 """Test proper handling of a nested comment"""
439@@ -3485,6 +3625,54 @@ multipart/report
440 m = cls(*constructor, policy=email.policy.default)
441 self.assertIs(m.policy, email.policy.default)
442
443+ def test_iter_escaped_chars(self):
444+ self.assertEqual(list(utils._iter_escaped_chars(r'a\\b\"c\\"d')),
445+ [(0, 'a'),
446+ (2, '\\\\'),
447+ (3, 'b'),
448+ (5, '\\"'),
449+ (6, 'c'),
450+ (8, '\\\\'),
451+ (9, '"'),
452+ (10, 'd')])
453+ self.assertEqual(list(utils._iter_escaped_chars('a\\')),
454+ [(0, 'a'), (1, '\\')])
455+
456+ def test_strip_quoted_realnames(self):
457+ def check(addr, expected):
458+ self.assertEqual(utils._strip_quoted_realnames(addr), expected)
459+
460+ check('"Jane Doe" <jane@example.net>, "John Doe" <john@example.net>',
461+ ' <jane@example.net>, <john@example.net>')
462+ check(r'"Jane \"Doe\"." <jane@example.net>',
463+ ' <jane@example.net>')
464+
465+ # special cases
466+ check(r'before"name"after', 'beforeafter')
467+ check(r'before"name"', 'before')
468+ check(r'b"name"', 'b') # single char
469+ check(r'"name"after', 'after')
470+ check(r'"name"a', 'a') # single char
471+ check(r'"name"', '')
472+
473+ # no change
474+ for addr in (
475+ 'Jane Doe <jane@example.net>, John Doe <john@example.net>',
476+ 'lone " quote',
477+ ):
478+ self.assertEqual(utils._strip_quoted_realnames(addr), addr)
479+
480+
481+ def test_check_parenthesis(self):
482+ addr = 'alice@example.net'
483+ self.assertTrue(utils._check_parenthesis(f'{addr} (Alice)'))
484+ self.assertFalse(utils._check_parenthesis(f'{addr} )Alice('))
485+ self.assertFalse(utils._check_parenthesis(f'{addr} (Alice))'))
486+ self.assertFalse(utils._check_parenthesis(f'{addr} ((Alice)'))
487+
488+ # Ignore real name between quotes
489+ self.assertTrue(utils._check_parenthesis(f'")Alice((" {addr}'))
490+
491
492 # Test the iterator/generators
493 class TestIterators(TestEmailBase):
494diff --git a/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst b/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst
495new file mode 100644
496index 0000000..3d0e9e4
497--- /dev/null
498+++ b/Misc/NEWS.d/next/Library/2023-10-20-15-28-08.gh-issue-102988.dStNO7.rst
499@@ -0,0 +1,8 @@
500+:func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now
501+return ``('', '')`` 2-tuples in more situations where invalid email
502+addresses are encountered instead of potentially inaccurate values. Add
503+optional *strict* parameter to these two functions: use ``strict=False`` to
504+get the old behavior, accept malformed inputs.
505+``getattr(email.utils, 'supports_strict_parsing', False)`` can be use to check
506+if the *strict* paramater is available. Patch by Thomas Dwyer and Victor
507+Stinner to improve the CVE-2023-27043 fix.
508--
5092.25.1
510
diff --git a/meta/recipes-devtools/python/python3/CVE-2024-6232.patch b/meta/recipes-devtools/python/python3/CVE-2024-6232.patch
deleted file mode 100644
index 874cbfe40c..0000000000
--- a/meta/recipes-devtools/python/python3/CVE-2024-6232.patch
+++ /dev/null
@@ -1,251 +0,0 @@
1From 3a22dc1079be5a75750d24dc6992956e7b84b5a0 Mon Sep 17 00:00:00 2001
2From: Seth Michael Larson <seth@python.org>
3Date: Tue, 3 Sep 2024 10:07:53 -0500
4Subject: [PATCH 2/2] [3.10] gh-121285: Remove backtracking when parsing
5 tarfile headers (GH-121286) (#123640)
6
7* Remove backtracking when parsing tarfile headers
8* Rewrite PAX header parsing to be stricter
9* Optimize parsing of GNU extended sparse headers v0.0
10
11(cherry picked from commit 34ddb64d088dd7ccc321f6103d23153256caa5d4)
12
13Upstream-Status: Backport from https://github.com/python/cpython/commit/743acbe872485dc18df4d8ab2dc7895187f062c4
14CVE: CVE-2024-6232
15
16Co-authored-by: Kirill Podoprigora <kirill.bast9@mail.ru>
17Co-authored-by: Gregory P. Smith <greg@krypto.org>
18Signed-off-by: Hugo SIMELIERE <hsimeliere.opensource@witekio.com>
19---
20 Lib/tarfile.py | 105 +++++++++++-------
21 Lib/test/test_tarfile.py | 42 +++++++
22 ...-07-02-13-39-20.gh-issue-121285.hrl-yI.rst | 2 +
23 3 files changed, 111 insertions(+), 38 deletions(-)
24 create mode 100644 Misc/NEWS.d/next/Security/2024-07-02-13-39-20.gh-issue-121285.hrl-yI.rst
25
26diff --git a/Lib/tarfile.py b/Lib/tarfile.py
27index 495349f08f9..3ab6811d633 100755
28--- a/Lib/tarfile.py
29+++ b/Lib/tarfile.py
30@@ -841,6 +841,9 @@ def data_filter(member, dest_path):
31 # Sentinel for replace() defaults, meaning "don't change the attribute"
32 _KEEP = object()
33
34+# Header length is digits followed by a space.
35+_header_length_prefix_re = re.compile(br"([0-9]{1,20}) ")
36+
37 class TarInfo(object):
38 """Informational class which holds the details about an
39 archive member given by a tar header block.
40@@ -1410,41 +1413,59 @@ def _proc_pax(self, tarfile):
41 else:
42 pax_headers = tarfile.pax_headers.copy()
43
44- # Check if the pax header contains a hdrcharset field. This tells us
45- # the encoding of the path, linkpath, uname and gname fields. Normally,
46- # these fields are UTF-8 encoded but since POSIX.1-2008 tar
47- # implementations are allowed to store them as raw binary strings if
48- # the translation to UTF-8 fails.
49- match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf)
50- if match is not None:
51- pax_headers["hdrcharset"] = match.group(1).decode("utf-8")
52-
53- # For the time being, we don't care about anything other than "BINARY".
54- # The only other value that is currently allowed by the standard is
55- # "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
56- hdrcharset = pax_headers.get("hdrcharset")
57- if hdrcharset == "BINARY":
58- encoding = tarfile.encoding
59- else:
60- encoding = "utf-8"
61-
62 # Parse pax header information. A record looks like that:
63 # "%d %s=%s\n" % (length, keyword, value). length is the size
64 # of the complete record including the length field itself and
65- # the newline. keyword and value are both UTF-8 encoded strings.
66- regex = re.compile(br"(\d+) ([^=]+)=")
67+ # the newline.
68 pos = 0
69- while True:
70- match = regex.match(buf, pos)
71- if not match:
72- break
73+ encoding = None
74+ raw_headers = []
75+ while len(buf) > pos and buf[pos] != 0x00:
76+ if not (match := _header_length_prefix_re.match(buf, pos)):
77+ raise InvalidHeaderError("invalid header")
78+ try:
79+ length = int(match.group(1))
80+ except ValueError:
81+ raise InvalidHeaderError("invalid header")
82+ # Headers must be at least 5 bytes, shortest being '5 x=\n'.
83+ # Value is allowed to be empty.
84+ if length < 5:
85+ raise InvalidHeaderError("invalid header")
86+ if pos + length > len(buf):
87+ raise InvalidHeaderError("invalid header")
88
89- length, keyword = match.groups()
90- length = int(length)
91- if length == 0:
92+ header_value_end_offset = match.start(1) + length - 1 # Last byte of the header
93+ keyword_and_value = buf[match.end(1) + 1:header_value_end_offset]
94+ raw_keyword, equals, raw_value = keyword_and_value.partition(b"=")
95+
96+ # Check the framing of the header. The last character must be '\n' (0x0A)
97+ if not raw_keyword or equals != b"=" or buf[header_value_end_offset] != 0x0A:
98 raise InvalidHeaderError("invalid header")
99- value = buf[match.end(2) + 1:match.start(1) + length - 1]
100+ raw_headers.append((length, raw_keyword, raw_value))
101+
102+ # Check if the pax header contains a hdrcharset field. This tells us
103+ # the encoding of the path, linkpath, uname and gname fields. Normally,
104+ # these fields are UTF-8 encoded but since POSIX.1-2008 tar
105+ # implementations are allowed to store them as raw binary strings if
106+ # the translation to UTF-8 fails. For the time being, we don't care about
107+ # anything other than "BINARY". The only other value that is currently
108+ # allowed by the standard is "ISO-IR 10646 2000 UTF-8" in other words UTF-8.
109+ # Note that we only follow the initial 'hdrcharset' setting to preserve
110+ # the initial behavior of the 'tarfile' module.
111+ if raw_keyword == b"hdrcharset" and encoding is None:
112+ if raw_value == b"BINARY":
113+ encoding = tarfile.encoding
114+ else: # This branch ensures only the first 'hdrcharset' header is used.
115+ encoding = "utf-8"
116+
117+ pos += length
118
119+ # If no explicit hdrcharset is set, we use UTF-8 as a default.
120+ if encoding is None:
121+ encoding = "utf-8"
122+
123+ # After parsing the raw headers we can decode them to text.
124+ for length, raw_keyword, raw_value in raw_headers:
125 # Normally, we could just use "utf-8" as the encoding and "strict"
126 # as the error handler, but we better not take the risk. For
127 # example, GNU tar <= 1.23 is known to store filenames it cannot
128@@ -1452,17 +1473,16 @@ def _proc_pax(self, tarfile):
129 # hdrcharset=BINARY header).
130 # We first try the strict standard encoding, and if that fails we
131 # fall back on the user's encoding and error handler.
132- keyword = self._decode_pax_field(keyword, "utf-8", "utf-8",
133+ keyword = self._decode_pax_field(raw_keyword, "utf-8", "utf-8",
134 tarfile.errors)
135 if keyword in PAX_NAME_FIELDS:
136- value = self._decode_pax_field(value, encoding, tarfile.encoding,
137+ value = self._decode_pax_field(raw_value, encoding, tarfile.encoding,
138 tarfile.errors)
139 else:
140- value = self._decode_pax_field(value, "utf-8", "utf-8",
141+ value = self._decode_pax_field(raw_value, "utf-8", "utf-8",
142 tarfile.errors)
143
144 pax_headers[keyword] = value
145- pos += length
146
147 # Fetch the next header.
148 try:
149@@ -1477,7 +1497,7 @@ def _proc_pax(self, tarfile):
150
151 elif "GNU.sparse.size" in pax_headers:
152 # GNU extended sparse format version 0.0.
153- self._proc_gnusparse_00(next, pax_headers, buf)
154+ self._proc_gnusparse_00(next, raw_headers)
155
156 elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":
157 # GNU extended sparse format version 1.0.
158@@ -1499,15 +1519,24 @@ def _proc_pax(self, tarfile):
159
160 return next
161
162- def _proc_gnusparse_00(self, next, pax_headers, buf):
163+ def _proc_gnusparse_00(self, next, raw_headers):
164 """Process a GNU tar extended sparse header, version 0.0.
165 """
166 offsets = []
167- for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf):
168- offsets.append(int(match.group(1)))
169 numbytes = []
170- for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf):
171- numbytes.append(int(match.group(1)))
172+ for _, keyword, value in raw_headers:
173+ if keyword == b"GNU.sparse.offset":
174+ try:
175+ offsets.append(int(value.decode()))
176+ except ValueError:
177+ raise InvalidHeaderError("invalid header")
178+
179+ elif keyword == b"GNU.sparse.numbytes":
180+ try:
181+ numbytes.append(int(value.decode()))
182+ except ValueError:
183+ raise InvalidHeaderError("invalid header")
184+
185 next.sparse = list(zip(offsets, numbytes))
186
187 def _proc_gnusparse_01(self, next, pax_headers):
188diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
189index cfc13bccb20..007c3e94acb 100644
190--- a/Lib/test/test_tarfile.py
191+++ b/Lib/test/test_tarfile.py
192@@ -1139,6 +1139,48 @@ def test_pax_number_fields(self):
193 finally:
194 tar.close()
195
196+ def test_pax_header_bad_formats(self):
197+ # The fields from the pax header have priority over the
198+ # TarInfo.
199+ pax_header_replacements = (
200+ b" foo=bar\n",
201+ b"0 \n",
202+ b"1 \n",
203+ b"2 \n",
204+ b"3 =\n",
205+ b"4 =a\n",
206+ b"1000000 foo=bar\n",
207+ b"0 foo=bar\n",
208+ b"-12 foo=bar\n",
209+ b"000000000000000000000000036 foo=bar\n",
210+ )
211+ pax_headers = {"foo": "bar"}
212+
213+ for replacement in pax_header_replacements:
214+ with self.subTest(header=replacement):
215+ tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT,
216+ encoding="iso8859-1")
217+ try:
218+ t = tarfile.TarInfo()
219+ t.name = "pax" # non-ASCII
220+ t.uid = 1
221+ t.pax_headers = pax_headers
222+ tar.addfile(t)
223+ finally:
224+ tar.close()
225+
226+ with open(tmpname, "rb") as f:
227+ data = f.read()
228+ self.assertIn(b"11 foo=bar\n", data)
229+ data = data.replace(b"11 foo=bar\n", replacement)
230+
231+ with open(tmpname, "wb") as f:
232+ f.truncate()
233+ f.write(data)
234+
235+ with self.assertRaisesRegex(tarfile.ReadError, r"method tar: ReadError\('invalid header'\)"):
236+ tarfile.open(tmpname, encoding="iso8859-1")
237+
238
239 class WriteTestBase(TarTest):
240 # Put all write tests in here that are supposed to be tested
241diff --git a/Misc/NEWS.d/next/Security/2024-07-02-13-39-20.gh-issue-121285.hrl-yI.rst b/Misc/NEWS.d/next/Security/2024-07-02-13-39-20.gh-issue-121285.hrl-yI.rst
242new file mode 100644
243index 00000000000..81f918bfe2b
244--- /dev/null
245+++ b/Misc/NEWS.d/next/Security/2024-07-02-13-39-20.gh-issue-121285.hrl-yI.rst
246@@ -0,0 +1,2 @@
247+Remove backtracking from tarfile header parsing for ``hdrcharset``, PAX, and
248+GNU sparse headers.
249--
2502.46.0
251
diff --git a/meta/recipes-devtools/python/python3/CVE-2024-7592.patch b/meta/recipes-devtools/python/python3/CVE-2024-7592.patch
deleted file mode 100644
index 7303a41e20..0000000000
--- a/meta/recipes-devtools/python/python3/CVE-2024-7592.patch
+++ /dev/null
@@ -1,140 +0,0 @@
1From 3c15b8437f57fe1027171b34af88bf791cf1868c Mon Sep 17 00:00:00 2001
2From: "Miss Islington (bot)"
3 <31488909+miss-islington@users.noreply.github.com>
4Date: Wed, 4 Sep 2024 17:50:36 +0200
5Subject: [PATCH 1/2] [3.10] gh-123067: Fix quadratic complexity in parsing
6 "-quoted cookie values with backslashes (GH-123075) (#123106)
7
8This fixes CVE-2024-7592.
9(cherry picked from commit 44e458357fca05ca0ae2658d62c8c595b048b5ef)
10
11Upstream-Status: Backport from https://github.com/python/cpython/commit/b2f11ca7667e4d57c71c1c88b255115f16042d9a
12CVE: CVE-2024-7592
13
14Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
15Signed-off-by: Hugo SIMELIERE <hsimeliere.opensource@witekio.com>
16---
17 Lib/http/cookies.py | 34 ++++-------------
18 Lib/test/test_http_cookies.py | 38 +++++++++++++++++++
19 ...-08-16-19-13-21.gh-issue-123067.Nx9O4R.rst | 1 +
20 3 files changed, 47 insertions(+), 26 deletions(-)
21 create mode 100644 Misc/NEWS.d/next/Library/2024-08-16-19-13-21.gh-issue-123067.Nx9O4R.rst
22
23diff --git a/Lib/http/cookies.py b/Lib/http/cookies.py
24index 35ac2dc6ae2..2c1f021d0ab 100644
25--- a/Lib/http/cookies.py
26+++ b/Lib/http/cookies.py
27@@ -184,8 +184,13 @@ def _quote(str):
28 return '"' + str.translate(_Translator) + '"'
29
30
31-_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]")
32-_QuotePatt = re.compile(r"[\\].")
33+_unquote_sub = re.compile(r'\\(?:([0-3][0-7][0-7])|(.))').sub
34+
35+def _unquote_replace(m):
36+ if m[1]:
37+ return chr(int(m[1], 8))
38+ else:
39+ return m[2]
40
41 def _unquote(str):
42 # If there aren't any doublequotes,
43@@ -205,30 +210,7 @@ def _unquote(str):
44 # \012 --> \n
45 # \" --> "
46 #
47- i = 0
48- n = len(str)
49- res = []
50- while 0 <= i < n:
51- o_match = _OctalPatt.search(str, i)
52- q_match = _QuotePatt.search(str, i)
53- if not o_match and not q_match: # Neither matched
54- res.append(str[i:])
55- break
56- # else:
57- j = k = -1
58- if o_match:
59- j = o_match.start(0)
60- if q_match:
61- k = q_match.start(0)
62- if q_match and (not o_match or k < j): # QuotePatt matched
63- res.append(str[i:k])
64- res.append(str[k+1])
65- i = k + 2
66- else: # OctalPatt matched
67- res.append(str[i:j])
68- res.append(chr(int(str[j+1:j+4], 8)))
69- i = j + 4
70- return _nulljoin(res)
71+ return _unquote_sub(_unquote_replace, str)
72
73 # The _getdate() routine is used to set the expiration time in the cookie's HTTP
74 # header. By default, _getdate() returns the current time in the appropriate
75diff --git a/Lib/test/test_http_cookies.py b/Lib/test/test_http_cookies.py
76index 6072c7e15e9..644e75cd5b7 100644
77--- a/Lib/test/test_http_cookies.py
78+++ b/Lib/test/test_http_cookies.py
79@@ -5,6 +5,7 @@
80 import unittest
81 from http import cookies
82 import pickle
83+from test import support
84
85
86 class CookieTests(unittest.TestCase):
87@@ -58,6 +59,43 @@ def test_basic(self):
88 for k, v in sorted(case['dict'].items()):
89 self.assertEqual(C[k].value, v)
90
91+ def test_unquote(self):
92+ cases = [
93+ (r'a="b=\""', 'b="'),
94+ (r'a="b=\\"', 'b=\\'),
95+ (r'a="b=\="', 'b=='),
96+ (r'a="b=\n"', 'b=n'),
97+ (r'a="b=\042"', 'b="'),
98+ (r'a="b=\134"', 'b=\\'),
99+ (r'a="b=\377"', 'b=\xff'),
100+ (r'a="b=\400"', 'b=400'),
101+ (r'a="b=\42"', 'b=42'),
102+ (r'a="b=\\042"', 'b=\\042'),
103+ (r'a="b=\\134"', 'b=\\134'),
104+ (r'a="b=\\\""', 'b=\\"'),
105+ (r'a="b=\\\042"', 'b=\\"'),
106+ (r'a="b=\134\""', 'b=\\"'),
107+ (r'a="b=\134\042"', 'b=\\"'),
108+ ]
109+ for encoded, decoded in cases:
110+ with self.subTest(encoded):
111+ C = cookies.SimpleCookie()
112+ C.load(encoded)
113+ self.assertEqual(C['a'].value, decoded)
114+
115+ @support.requires_resource('cpu')
116+ def test_unquote_large(self):
117+ n = 10**6
118+ for encoded in r'\\', r'\134':
119+ with self.subTest(encoded):
120+ data = 'a="b=' + encoded*n + ';"'
121+ C = cookies.SimpleCookie()
122+ C.load(data)
123+ value = C['a'].value
124+ self.assertEqual(value[:3], 'b=\\')
125+ self.assertEqual(value[-2:], '\\;')
126+ self.assertEqual(len(value), n + 3)
127+
128 def test_load(self):
129 C = cookies.SimpleCookie()
130 C.load('Customer="WILE_E_COYOTE"; Version=1; Path=/acme')
131diff --git a/Misc/NEWS.d/next/Library/2024-08-16-19-13-21.gh-issue-123067.Nx9O4R.rst b/Misc/NEWS.d/next/Library/2024-08-16-19-13-21.gh-issue-123067.Nx9O4R.rst
132new file mode 100644
133index 00000000000..6a234561fe3
134--- /dev/null
135+++ b/Misc/NEWS.d/next/Library/2024-08-16-19-13-21.gh-issue-123067.Nx9O4R.rst
136@@ -0,0 +1 @@
137+Fix quadratic complexity in parsing ``"``-quoted cookie values with backslashes by :mod:`http.cookies`.
138--
1392.46.0
140
diff --git a/meta/recipes-devtools/python/python3/CVE-2024-8088.patch b/meta/recipes-devtools/python/python3/CVE-2024-8088.patch
deleted file mode 100644
index 10d28a9e65..0000000000
--- a/meta/recipes-devtools/python/python3/CVE-2024-8088.patch
+++ /dev/null
@@ -1,124 +0,0 @@
1From e0264a61119d551658d9445af38323ba94fc16db Mon Sep 17 00:00:00 2001
2From: "Jason R. Coombs" <jaraco@jaraco.com>
3Date: Thu, 22 Aug 2024 19:24:33 -0400
4Subject: [PATCH] CVE-2024-8088: Sanitize names in zipfile.Path. (GH-122906)
5
6Upstream-Status: Backport from https://github.com/python/cpython/commit/e0264a61119d551658d9445af38323ba94fc16db
7CVE: CVE-2024-8088
8
9Signed-off-by: Rohini Sangam <rsangam@mvista.com>
10---
11 Lib/test/test_zipfile.py | 17 ++++++
12 Lib/zipfile.py | 61 ++++++++++++++++++-
13 2 files changed, 77 insertions(+), 1 deletion(-)
14
15diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
16index 32c0170..a60dc11 100644
17--- a/Lib/test/test_zipfile.py
18+++ b/Lib/test/test_zipfile.py
19@@ -3280,6 +3280,23 @@ with zipfile.ZipFile(io.BytesIO(), "w") as zf:
20 zipfile.Path(zf)
21 zf.extractall(source_path.parent)
22
23+ def test_malformed_paths(self):
24+ """
25+ Path should handle malformed paths.
26+ """
27+ data = io.BytesIO()
28+ zf = zipfile.ZipFile(data, "w")
29+ zf.writestr("/one-slash.txt", b"content")
30+ zf.writestr("//two-slash.txt", b"content")
31+ zf.writestr("../parent.txt", b"content")
32+ zf.filename = ''
33+ root = zipfile.Path(zf)
34+ assert list(map(str, root.iterdir())) == [
35+ 'one-slash.txt',
36+ 'two-slash.txt',
37+ 'parent.txt',
38+ ]
39+
40
41 class StripExtraTests(unittest.TestCase):
42 # Note: all of the "z" characters are technically invalid, but up
43diff --git a/Lib/zipfile.py b/Lib/zipfile.py
44index 7d18bc2..cbac8d9 100644
45--- a/Lib/zipfile.py
46+++ b/Lib/zipfile.py
47@@ -9,6 +9,7 @@ import io
48 import itertools
49 import os
50 import posixpath
51+import re
52 import shutil
53 import stat
54 import struct
55@@ -2182,7 +2183,65 @@ def _difference(minuend, subtrahend):
56 return itertools.filterfalse(set(subtrahend).__contains__, minuend)
57
58
59-class CompleteDirs(ZipFile):
60+class SanitizedNames:
61+ """
62+ ZipFile mix-in to ensure names are sanitized.
63+ """
64+
65+ def namelist(self):
66+ return list(map(self._sanitize, super().namelist()))
67+
68+ @staticmethod
69+ def _sanitize(name):
70+ r"""
71+ Ensure a relative path with posix separators and no dot names.
72+ Modeled after
73+ https://github.com/python/cpython/blob/bcc1be39cb1d04ad9fc0bd1b9193d3972835a57c/Lib/zipfile/__init__.py#L1799-L1813
74+ but provides consistent cross-platform behavior.
75+ >>> san = SanitizedNames._sanitize
76+ >>> san('/foo/bar')
77+ 'foo/bar'
78+ >>> san('//foo.txt')
79+ 'foo.txt'
80+ >>> san('foo/.././bar.txt')
81+ 'foo/bar.txt'
82+ >>> san('foo../.bar.txt')
83+ 'foo../.bar.txt'
84+ >>> san('\\foo\\bar.txt')
85+ 'foo/bar.txt'
86+ >>> san('D:\\foo.txt')
87+ 'D/foo.txt'
88+ >>> san('\\\\server\\share\\file.txt')
89+ 'server/share/file.txt'
90+ >>> san('\\\\?\\GLOBALROOT\\Volume3')
91+ '?/GLOBALROOT/Volume3'
92+ >>> san('\\\\.\\PhysicalDrive1\\root')
93+ 'PhysicalDrive1/root'
94+ Retain any trailing slash.
95+ >>> san('abc/')
96+ 'abc/'
97+ Raises a ValueError if the result is empty.
98+ >>> san('../..')
99+ Traceback (most recent call last):
100+ ...
101+ ValueError: Empty filename
102+ """
103+
104+ def allowed(part):
105+ return part and part not in {'..', '.'}
106+
107+ # Remove the drive letter.
108+ # Don't use ntpath.splitdrive, because that also strips UNC paths
109+ bare = re.sub('^([A-Z]):', r'\1', name, flags=re.IGNORECASE)
110+ clean = bare.replace('\\', '/')
111+ parts = clean.split('/')
112+ joined = '/'.join(filter(allowed, parts))
113+ if not joined:
114+ raise ValueError("Empty filename")
115+ return joined + '/' * name.endswith('/')
116+
117+
118+class CompleteDirs(SanitizedNames, ZipFile):
119 """
120 A ZipFile subclass that ensures that implied directories
121 are always included in the namelist.
122--
1232.35.7
124
diff --git a/meta/recipes-devtools/python/python3_3.10.14.bb b/meta/recipes-devtools/python/python3_3.10.15.bb
index 8f6a15701f..4157b8cb83 100644
--- a/meta/recipes-devtools/python/python3_3.10.14.bb
+++ b/meta/recipes-devtools/python/python3_3.10.15.bb
@@ -36,10 +36,6 @@ SRC_URI = "http://www.python.org/ftp/python/${PV}/Python-${PV}.tar.xz \
36 file://deterministic_imports.patch \ 36 file://deterministic_imports.patch \
37 file://0001-Avoid-shebang-overflow-on-python-config.py.patch \ 37 file://0001-Avoid-shebang-overflow-on-python-config.py.patch \
38 file://0001-test_storlines-skip-due-to-load-variability.patch \ 38 file://0001-test_storlines-skip-due-to-load-variability.patch \
39 file://CVE-2024-8088.patch \
40 file://CVE-2024-7592.patch \
41 file://CVE-2024-6232.patch \
42 file://CVE-2023-27043.patch \
43 " 39 "
44 40
45SRC_URI:append:class-native = " \ 41SRC_URI:append:class-native = " \
@@ -48,7 +44,7 @@ SRC_URI:append:class-native = " \
48 file://12-distutils-prefix-is-inside-staging-area.patch \ 44 file://12-distutils-prefix-is-inside-staging-area.patch \
49 file://0001-Don-t-search-system-for-headers-libraries.patch \ 45 file://0001-Don-t-search-system-for-headers-libraries.patch \
50 " 46 "
51SRC_URI[sha256sum] = "9c50481faa8c2832329ba0fc8868d0a606a680fc4f60ec48d26ce8e076751fda" 47SRC_URI[sha256sum] = "aab0950817735172601879872d937c1e4928a57c409ae02369ec3d91dccebe79"
52 48
53# exclude pre-releases for both python 2.x and 3.x 49# exclude pre-releases for both python 2.x and 3.x
54UPSTREAM_CHECK_REGEX = "[Pp]ython-(?P<pver>\d+(\.\d+)+).tar" 50UPSTREAM_CHECK_REGEX = "[Pp]ython-(?P<pver>\d+(\.\d+)+).tar"