diff options
Diffstat (limited to 'meta/recipes-devtools/python/python3/CVE-2024-6232.patch')
| -rw-r--r-- | meta/recipes-devtools/python/python3/CVE-2024-6232.patch | 251 |
1 files changed, 0 insertions, 251 deletions
diff --git a/meta/recipes-devtools/python/python3/CVE-2024-6232.patch b/meta/recipes-devtools/python/python3/CVE-2024-6232.patch deleted file mode 100644 index 874cbfe40c..0000000000 --- a/meta/recipes-devtools/python/python3/CVE-2024-6232.patch +++ /dev/null | |||
| @@ -1,251 +0,0 @@ | |||
| 1 | From 3a22dc1079be5a75750d24dc6992956e7b84b5a0 Mon Sep 17 00:00:00 2001 | ||
| 2 | From: Seth Michael Larson <seth@python.org> | ||
| 3 | Date: Tue, 3 Sep 2024 10:07:53 -0500 | ||
| 4 | Subject: [PATCH 2/2] [3.10] gh-121285: Remove backtracking when parsing | ||
| 5 | tarfile headers (GH-121286) (#123640) | ||
| 6 | |||
| 7 | * Remove backtracking when parsing tarfile headers | ||
| 8 | * Rewrite PAX header parsing to be stricter | ||
| 9 | * Optimize parsing of GNU extended sparse headers v0.0 | ||
| 10 | |||
| 11 | (cherry picked from commit 34ddb64d088dd7ccc321f6103d23153256caa5d4) | ||
| 12 | |||
| 13 | Upstream-Status: Backport from https://github.com/python/cpython/commit/743acbe872485dc18df4d8ab2dc7895187f062c4 | ||
| 14 | CVE: CVE-2024-6232 | ||
| 15 | |||
| 16 | Co-authored-by: Kirill Podoprigora <kirill.bast9@mail.ru> | ||
| 17 | Co-authored-by: Gregory P. Smith <greg@krypto.org> | ||
| 18 | Signed-off-by: Hugo SIMELIERE <hsimeliere.opensource@witekio.com> | ||
| 19 | --- | ||
| 20 | Lib/tarfile.py | 105 +++++++++++------- | ||
| 21 | Lib/test/test_tarfile.py | 42 +++++++ | ||
| 22 | ...-07-02-13-39-20.gh-issue-121285.hrl-yI.rst | 2 + | ||
| 23 | 3 files changed, 111 insertions(+), 38 deletions(-) | ||
| 24 | create mode 100644 Misc/NEWS.d/next/Security/2024-07-02-13-39-20.gh-issue-121285.hrl-yI.rst | ||
| 25 | |||
| 26 | diff --git a/Lib/tarfile.py b/Lib/tarfile.py | ||
| 27 | index 495349f08f9..3ab6811d633 100755 | ||
| 28 | --- a/Lib/tarfile.py | ||
| 29 | +++ b/Lib/tarfile.py | ||
| 30 | @@ -841,6 +841,9 @@ def data_filter(member, dest_path): | ||
| 31 | # Sentinel for replace() defaults, meaning "don't change the attribute" | ||
| 32 | _KEEP = object() | ||
| 33 | |||
| 34 | +# Header length is digits followed by a space. | ||
| 35 | +_header_length_prefix_re = re.compile(br"([0-9]{1,20}) ") | ||
| 36 | + | ||
| 37 | class TarInfo(object): | ||
| 38 | """Informational class which holds the details about an | ||
| 39 | archive member given by a tar header block. | ||
| 40 | @@ -1410,41 +1413,59 @@ def _proc_pax(self, tarfile): | ||
| 41 | else: | ||
| 42 | pax_headers = tarfile.pax_headers.copy() | ||
| 43 | |||
| 44 | - # Check if the pax header contains a hdrcharset field. This tells us | ||
| 45 | - # the encoding of the path, linkpath, uname and gname fields. Normally, | ||
| 46 | - # these fields are UTF-8 encoded but since POSIX.1-2008 tar | ||
| 47 | - # implementations are allowed to store them as raw binary strings if | ||
| 48 | - # the translation to UTF-8 fails. | ||
| 49 | - match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf) | ||
| 50 | - if match is not None: | ||
| 51 | - pax_headers["hdrcharset"] = match.group(1).decode("utf-8") | ||
| 52 | - | ||
| 53 | - # For the time being, we don't care about anything other than "BINARY". | ||
| 54 | - # The only other value that is currently allowed by the standard is | ||
| 55 | - # "ISO-IR 10646 2000 UTF-8" in other words UTF-8. | ||
| 56 | - hdrcharset = pax_headers.get("hdrcharset") | ||
| 57 | - if hdrcharset == "BINARY": | ||
| 58 | - encoding = tarfile.encoding | ||
| 59 | - else: | ||
| 60 | - encoding = "utf-8" | ||
| 61 | - | ||
| 62 | # Parse pax header information. A record looks like that: | ||
| 63 | # "%d %s=%s\n" % (length, keyword, value). length is the size | ||
| 64 | # of the complete record including the length field itself and | ||
| 65 | - # the newline. keyword and value are both UTF-8 encoded strings. | ||
| 66 | - regex = re.compile(br"(\d+) ([^=]+)=") | ||
| 67 | + # the newline. | ||
| 68 | pos = 0 | ||
| 69 | - while True: | ||
| 70 | - match = regex.match(buf, pos) | ||
| 71 | - if not match: | ||
| 72 | - break | ||
| 73 | + encoding = None | ||
| 74 | + raw_headers = [] | ||
| 75 | + while len(buf) > pos and buf[pos] != 0x00: | ||
| 76 | + if not (match := _header_length_prefix_re.match(buf, pos)): | ||
| 77 | + raise InvalidHeaderError("invalid header") | ||
| 78 | + try: | ||
| 79 | + length = int(match.group(1)) | ||
| 80 | + except ValueError: | ||
| 81 | + raise InvalidHeaderError("invalid header") | ||
| 82 | + # Headers must be at least 5 bytes, shortest being '5 x=\n'. | ||
| 83 | + # Value is allowed to be empty. | ||
| 84 | + if length < 5: | ||
| 85 | + raise InvalidHeaderError("invalid header") | ||
| 86 | + if pos + length > len(buf): | ||
| 87 | + raise InvalidHeaderError("invalid header") | ||
| 88 | |||
| 89 | - length, keyword = match.groups() | ||
| 90 | - length = int(length) | ||
| 91 | - if length == 0: | ||
| 92 | + header_value_end_offset = match.start(1) + length - 1 # Last byte of the header | ||
| 93 | + keyword_and_value = buf[match.end(1) + 1:header_value_end_offset] | ||
| 94 | + raw_keyword, equals, raw_value = keyword_and_value.partition(b"=") | ||
| 95 | + | ||
| 96 | + # Check the framing of the header. The last character must be '\n' (0x0A) | ||
| 97 | + if not raw_keyword or equals != b"=" or buf[header_value_end_offset] != 0x0A: | ||
| 98 | raise InvalidHeaderError("invalid header") | ||
| 99 | - value = buf[match.end(2) + 1:match.start(1) + length - 1] | ||
| 100 | + raw_headers.append((length, raw_keyword, raw_value)) | ||
| 101 | + | ||
| 102 | + # Check if the pax header contains a hdrcharset field. This tells us | ||
| 103 | + # the encoding of the path, linkpath, uname and gname fields. Normally, | ||
| 104 | + # these fields are UTF-8 encoded but since POSIX.1-2008 tar | ||
| 105 | + # implementations are allowed to store them as raw binary strings if | ||
| 106 | + # the translation to UTF-8 fails. For the time being, we don't care about | ||
| 107 | + # anything other than "BINARY". The only other value that is currently | ||
| 108 | + # allowed by the standard is "ISO-IR 10646 2000 UTF-8" in other words UTF-8. | ||
| 109 | + # Note that we only follow the initial 'hdrcharset' setting to preserve | ||
| 110 | + # the initial behavior of the 'tarfile' module. | ||
| 111 | + if raw_keyword == b"hdrcharset" and encoding is None: | ||
| 112 | + if raw_value == b"BINARY": | ||
| 113 | + encoding = tarfile.encoding | ||
| 114 | + else: # This branch ensures only the first 'hdrcharset' header is used. | ||
| 115 | + encoding = "utf-8" | ||
| 116 | + | ||
| 117 | + pos += length | ||
| 118 | |||
| 119 | + # If no explicit hdrcharset is set, we use UTF-8 as a default. | ||
| 120 | + if encoding is None: | ||
| 121 | + encoding = "utf-8" | ||
| 122 | + | ||
| 123 | + # After parsing the raw headers we can decode them to text. | ||
| 124 | + for length, raw_keyword, raw_value in raw_headers: | ||
| 125 | # Normally, we could just use "utf-8" as the encoding and "strict" | ||
| 126 | # as the error handler, but we better not take the risk. For | ||
| 127 | # example, GNU tar <= 1.23 is known to store filenames it cannot | ||
| 128 | @@ -1452,17 +1473,16 @@ def _proc_pax(self, tarfile): | ||
| 129 | # hdrcharset=BINARY header). | ||
| 130 | # We first try the strict standard encoding, and if that fails we | ||
| 131 | # fall back on the user's encoding and error handler. | ||
| 132 | - keyword = self._decode_pax_field(keyword, "utf-8", "utf-8", | ||
| 133 | + keyword = self._decode_pax_field(raw_keyword, "utf-8", "utf-8", | ||
| 134 | tarfile.errors) | ||
| 135 | if keyword in PAX_NAME_FIELDS: | ||
| 136 | - value = self._decode_pax_field(value, encoding, tarfile.encoding, | ||
| 137 | + value = self._decode_pax_field(raw_value, encoding, tarfile.encoding, | ||
| 138 | tarfile.errors) | ||
| 139 | else: | ||
| 140 | - value = self._decode_pax_field(value, "utf-8", "utf-8", | ||
| 141 | + value = self._decode_pax_field(raw_value, "utf-8", "utf-8", | ||
| 142 | tarfile.errors) | ||
| 143 | |||
| 144 | pax_headers[keyword] = value | ||
| 145 | - pos += length | ||
| 146 | |||
| 147 | # Fetch the next header. | ||
| 148 | try: | ||
| 149 | @@ -1477,7 +1497,7 @@ def _proc_pax(self, tarfile): | ||
| 150 | |||
| 151 | elif "GNU.sparse.size" in pax_headers: | ||
| 152 | # GNU extended sparse format version 0.0. | ||
| 153 | - self._proc_gnusparse_00(next, pax_headers, buf) | ||
| 154 | + self._proc_gnusparse_00(next, raw_headers) | ||
| 155 | |||
| 156 | elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0": | ||
| 157 | # GNU extended sparse format version 1.0. | ||
| 158 | @@ -1499,15 +1519,24 @@ def _proc_pax(self, tarfile): | ||
| 159 | |||
| 160 | return next | ||
| 161 | |||
| 162 | - def _proc_gnusparse_00(self, next, pax_headers, buf): | ||
| 163 | + def _proc_gnusparse_00(self, next, raw_headers): | ||
| 164 | """Process a GNU tar extended sparse header, version 0.0. | ||
| 165 | """ | ||
| 166 | offsets = [] | ||
| 167 | - for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf): | ||
| 168 | - offsets.append(int(match.group(1))) | ||
| 169 | numbytes = [] | ||
| 170 | - for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf): | ||
| 171 | - numbytes.append(int(match.group(1))) | ||
| 172 | + for _, keyword, value in raw_headers: | ||
| 173 | + if keyword == b"GNU.sparse.offset": | ||
| 174 | + try: | ||
| 175 | + offsets.append(int(value.decode())) | ||
| 176 | + except ValueError: | ||
| 177 | + raise InvalidHeaderError("invalid header") | ||
| 178 | + | ||
| 179 | + elif keyword == b"GNU.sparse.numbytes": | ||
| 180 | + try: | ||
| 181 | + numbytes.append(int(value.decode())) | ||
| 182 | + except ValueError: | ||
| 183 | + raise InvalidHeaderError("invalid header") | ||
| 184 | + | ||
| 185 | next.sparse = list(zip(offsets, numbytes)) | ||
| 186 | |||
| 187 | def _proc_gnusparse_01(self, next, pax_headers): | ||
| 188 | diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py | ||
| 189 | index cfc13bccb20..007c3e94acb 100644 | ||
| 190 | --- a/Lib/test/test_tarfile.py | ||
| 191 | +++ b/Lib/test/test_tarfile.py | ||
| 192 | @@ -1139,6 +1139,48 @@ def test_pax_number_fields(self): | ||
| 193 | finally: | ||
| 194 | tar.close() | ||
| 195 | |||
| 196 | + def test_pax_header_bad_formats(self): | ||
| 197 | + # The fields from the pax header have priority over the | ||
| 198 | + # TarInfo. | ||
| 199 | + pax_header_replacements = ( | ||
| 200 | + b" foo=bar\n", | ||
| 201 | + b"0 \n", | ||
| 202 | + b"1 \n", | ||
| 203 | + b"2 \n", | ||
| 204 | + b"3 =\n", | ||
| 205 | + b"4 =a\n", | ||
| 206 | + b"1000000 foo=bar\n", | ||
| 207 | + b"0 foo=bar\n", | ||
| 208 | + b"-12 foo=bar\n", | ||
| 209 | + b"000000000000000000000000036 foo=bar\n", | ||
| 210 | + ) | ||
| 211 | + pax_headers = {"foo": "bar"} | ||
| 212 | + | ||
| 213 | + for replacement in pax_header_replacements: | ||
| 214 | + with self.subTest(header=replacement): | ||
| 215 | + tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, | ||
| 216 | + encoding="iso8859-1") | ||
| 217 | + try: | ||
| 218 | + t = tarfile.TarInfo() | ||
| 219 | + t.name = "pax" # non-ASCII | ||
| 220 | + t.uid = 1 | ||
| 221 | + t.pax_headers = pax_headers | ||
| 222 | + tar.addfile(t) | ||
| 223 | + finally: | ||
| 224 | + tar.close() | ||
| 225 | + | ||
| 226 | + with open(tmpname, "rb") as f: | ||
| 227 | + data = f.read() | ||
| 228 | + self.assertIn(b"11 foo=bar\n", data) | ||
| 229 | + data = data.replace(b"11 foo=bar\n", replacement) | ||
| 230 | + | ||
| 231 | + with open(tmpname, "wb") as f: | ||
| 232 | + f.truncate() | ||
| 233 | + f.write(data) | ||
| 234 | + | ||
| 235 | + with self.assertRaisesRegex(tarfile.ReadError, r"method tar: ReadError\('invalid header'\)"): | ||
| 236 | + tarfile.open(tmpname, encoding="iso8859-1") | ||
| 237 | + | ||
| 238 | |||
| 239 | class WriteTestBase(TarTest): | ||
| 240 | # Put all write tests in here that are supposed to be tested | ||
| 241 | diff --git a/Misc/NEWS.d/next/Security/2024-07-02-13-39-20.gh-issue-121285.hrl-yI.rst b/Misc/NEWS.d/next/Security/2024-07-02-13-39-20.gh-issue-121285.hrl-yI.rst | ||
| 242 | new file mode 100644 | ||
| 243 | index 00000000000..81f918bfe2b | ||
| 244 | --- /dev/null | ||
| 245 | +++ b/Misc/NEWS.d/next/Security/2024-07-02-13-39-20.gh-issue-121285.hrl-yI.rst | ||
| 246 | @@ -0,0 +1,2 @@ | ||
| 247 | +Remove backtracking from tarfile header parsing for ``hdrcharset``, PAX, and | ||
| 248 | +GNU sparse headers. | ||
| 249 | -- | ||
| 250 | 2.46.0 | ||
| 251 | |||
