diff options
3 files changed, 667 insertions, 0 deletions
diff --git a/meta/recipes-devtools/python/python3-urllib3/CVE-2025-66418.patch b/meta/recipes-devtools/python/python3-urllib3/CVE-2025-66418.patch new file mode 100644 index 0000000000..5d39b36afc --- /dev/null +++ b/meta/recipes-devtools/python/python3-urllib3/CVE-2025-66418.patch | |||
| @@ -0,0 +1,80 @@ | |||
| 1 | From 3bf7db860ef730e828b68264e88210190120cacf Mon Sep 17 00:00:00 2001 | ||
| 2 | From: Illia Volochii <illia.volochii@gmail.com> | ||
| 3 | Date: Fri, 5 Dec 2025 16:41:33 +0200 | ||
| 4 | Subject: [PATCH] Merge commit from fork | ||
| 5 | |||
| 6 | * Add a hard-coded limit for the decompression chain | ||
| 7 | |||
| 8 | * Reuse new list | ||
| 9 | |||
| 10 | CVE: CVE-2025-66418 | ||
| 11 | |||
| 12 | Upstream-Status: Backport | ||
| 13 | [https://github.com/urllib3/urllib3/commit/24d7b67eac89f94e11003424bcf0d8f7b72222a8] | ||
| 14 | |||
| 15 | Signed-off-by: Jiaying Song <jiaying.song.cn@windriver.com> | ||
| 16 | --- | ||
| 17 | changelog/GHSA-gm62-xv2j-4w53.security.rst | 4 ++++ | ||
| 18 | src/urllib3/response.py | 12 +++++++++++- | ||
| 19 | test/test_response.py | 10 ++++++++++ | ||
| 20 | 3 files changed, 25 insertions(+), 1 deletion(-) | ||
| 21 | create mode 100644 changelog/GHSA-gm62-xv2j-4w53.security.rst | ||
| 22 | |||
| 23 | diff --git a/changelog/GHSA-gm62-xv2j-4w53.security.rst b/changelog/GHSA-gm62-xv2j-4w53.security.rst | ||
| 24 | new file mode 100644 | ||
| 25 | index 00000000..6646eaa3 | ||
| 26 | --- /dev/null | ||
| 27 | +++ b/changelog/GHSA-gm62-xv2j-4w53.security.rst | ||
| 28 | @@ -0,0 +1,4 @@ | ||
| 29 | +Fixed a security issue where an attacker could compose an HTTP response with | ||
| 30 | +virtually unlimited links in the ``Content-Encoding`` header, potentially | ||
| 31 | +leading to a denial of service (DoS) attack by exhausting system resources | ||
| 32 | +during decoding. The number of allowed chained encodings is now limited to 5. | ||
| 33 | diff --git a/src/urllib3/response.py b/src/urllib3/response.py | ||
| 34 | index a0273d65..b8e8565c 100644 | ||
| 35 | --- a/src/urllib3/response.py | ||
| 36 | +++ b/src/urllib3/response.py | ||
| 37 | @@ -194,8 +194,18 @@ class MultiDecoder(ContentDecoder): | ||
| 38 | they were applied. | ||
| 39 | """ | ||
| 40 | |||
| 41 | + # Maximum allowed number of chained HTTP encodings in the | ||
| 42 | + # Content-Encoding header. | ||
| 43 | + max_decode_links = 5 | ||
| 44 | + | ||
| 45 | def __init__(self, modes: str) -> None: | ||
| 46 | - self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")] | ||
| 47 | + encodings = [m.strip() for m in modes.split(",")] | ||
| 48 | + if len(encodings) > self.max_decode_links: | ||
| 49 | + raise DecodeError( | ||
| 50 | + "Too many content encodings in the chain: " | ||
| 51 | + f"{len(encodings)} > {self.max_decode_links}" | ||
| 52 | + ) | ||
| 53 | + self._decoders = [_get_decoder(e) for e in encodings] | ||
| 54 | |||
| 55 | def flush(self) -> bytes: | ||
| 56 | return self._decoders[0].flush() | ||
| 57 | diff --git a/test/test_response.py b/test/test_response.py | ||
| 58 | index c0062771..0e8abd93 100644 | ||
| 59 | --- a/test/test_response.py | ||
| 60 | +++ b/test/test_response.py | ||
| 61 | @@ -581,6 +581,16 @@ class TestResponse: | ||
| 62 | assert r.read(9 * 37) == b"foobarbaz" * 37 | ||
| 63 | assert r.read() == b"" | ||
| 64 | |||
| 65 | + def test_read_multi_decoding_too_many_links(self) -> None: | ||
| 66 | + fp = BytesIO(b"foo") | ||
| 67 | + with pytest.raises( | ||
| 68 | + DecodeError, match="Too many content encodings in the chain: 6 > 5" | ||
| 69 | + ): | ||
| 70 | + HTTPResponse( | ||
| 71 | + fp, | ||
| 72 | + headers={"content-encoding": "gzip, deflate, br, zstd, gzip, deflate"}, | ||
| 73 | + ) | ||
| 74 | + | ||
| 75 | def test_body_blob(self) -> None: | ||
| 76 | resp = HTTPResponse(b"foo") | ||
| 77 | assert resp.data == b"foo" | ||
| 78 | -- | ||
| 79 | 2.34.1 | ||
| 80 | |||
diff --git a/meta/recipes-devtools/python/python3-urllib3/CVE-2025-66471.patch b/meta/recipes-devtools/python/python3-urllib3/CVE-2025-66471.patch new file mode 100644 index 0000000000..5329e26272 --- /dev/null +++ b/meta/recipes-devtools/python/python3-urllib3/CVE-2025-66471.patch | |||
| @@ -0,0 +1,585 @@ | |||
| 1 | From f25c0d11e1b640e3c7e0addb66a1ff50730be508 Mon Sep 17 00:00:00 2001 | ||
| 2 | From: Illia Volochii <illia.volochii@gmail.com> | ||
| 3 | Date: Fri, 5 Dec 2025 16:40:41 +0200 | ||
| 4 | Subject: [PATCH] Merge commit from fork | ||
| 5 | |||
| 6 | * Prevent decompression bomb for zstd in Python 3.14 | ||
| 7 | |||
| 8 | * Add experimental `decompress_iter` for Brotli | ||
| 9 | |||
| 10 | * Update changes for Brotli | ||
| 11 | |||
| 12 | * Add `GzipDecoder.decompress_iter` | ||
| 13 | |||
| 14 | * Test https://github.com/python-hyper/brotlicffi/pull/207 | ||
| 15 | |||
| 16 | * Pin Brotli | ||
| 17 | |||
| 18 | * Add `decompress_iter` to all decoders and make tests pass | ||
| 19 | |||
| 20 | * Pin brotlicffi to an official release | ||
| 21 | |||
| 22 | * Revert changes to response.py | ||
| 23 | |||
| 24 | * Add `max_length` parameter to all `decompress` methods | ||
| 25 | |||
| 26 | * Fix the `test_brotlipy` session | ||
| 27 | |||
| 28 | * Unset `_data` on gzip error | ||
| 29 | |||
| 30 | * Add a test for memory usage | ||
| 31 | |||
| 32 | * Test more methods | ||
| 33 | |||
| 34 | * Fix the test for `stream` | ||
| 35 | |||
| 36 | * Cover more lines with tests | ||
| 37 | |||
| 38 | * Add more coverage | ||
| 39 | |||
| 40 | * Make `read1` a bit more efficient | ||
| 41 | |||
| 42 | * Fix PyPy tests for Brotli | ||
| 43 | |||
| 44 | * Revert an unnecessarily moved check | ||
| 45 | |||
| 46 | * Add some comments | ||
| 47 | |||
| 48 | * Leave just one `self._obj.decompress` call in `GzipDecoder` | ||
| 49 | |||
| 50 | * Refactor test params | ||
| 51 | |||
| 52 | * Test reads with all data already in the decompressor | ||
| 53 | |||
| 54 | * Prevent needless copying of data decoded with `max_length` | ||
| 55 | |||
| 56 | * Rename the changed test | ||
| 57 | |||
| 58 | * Note that responses of unknown length should be streamed too | ||
| 59 | |||
| 60 | * Add a changelog entry | ||
| 61 | |||
| 62 | * Avoid returning a memory view from `BytesQueueBuffer` | ||
| 63 | |||
| 64 | * Add one more note to the changelog entry | ||
| 65 | |||
| 66 | CVE: CVE-2025-66471 | ||
| 67 | |||
| 68 | Upstream-Status: Backport | ||
| 69 | [https://github.com/urllib3/urllib3/commit/c19571de34c47de3a766541b041637ba5f716ed7] | ||
| 70 | |||
| 71 | Signed-off-by: Jiaying Song <jiaying.song.cn@windriver.com> | ||
| 72 | --- | ||
| 73 | docs/advanced-usage.rst | 3 +- | ||
| 74 | docs/user-guide.rst | 4 +- | ||
| 75 | pyproject.toml | 5 +- | ||
| 76 | src/urllib3/response.py | 278 ++++++++++++++++++++++++++++++++++------ | ||
| 77 | 4 files changed, 246 insertions(+), 44 deletions(-) | ||
| 78 | |||
| 79 | diff --git a/docs/advanced-usage.rst b/docs/advanced-usage.rst | ||
| 80 | index 36a51e67..a12c7143 100644 | ||
| 81 | --- a/docs/advanced-usage.rst | ||
| 82 | +++ b/docs/advanced-usage.rst | ||
| 83 | @@ -66,7 +66,8 @@ When using ``preload_content=True`` (the default setting) the | ||
| 84 | response body will be read immediately into memory and the HTTP connection | ||
| 85 | will be released back into the pool without manual intervention. | ||
| 86 | |||
| 87 | -However, when dealing with large responses it's often better to stream the response | ||
| 88 | +However, when dealing with responses of large or unknown length, | ||
| 89 | +it's often better to stream the response | ||
| 90 | content using ``preload_content=False``. Setting ``preload_content`` to ``False`` means | ||
| 91 | that urllib3 will only read from the socket when data is requested. | ||
| 92 | |||
| 93 | diff --git a/docs/user-guide.rst b/docs/user-guide.rst | ||
| 94 | index 5c78c8af..1d9d0bbd 100644 | ||
| 95 | --- a/docs/user-guide.rst | ||
| 96 | +++ b/docs/user-guide.rst | ||
| 97 | @@ -145,8 +145,8 @@ to a byte string representing the response content: | ||
| 98 | print(resp.data) | ||
| 99 | # b"\xaa\xa5H?\x95\xe9\x9b\x11" | ||
| 100 | |||
| 101 | -.. note:: For larger responses, it's sometimes better to :ref:`stream <stream>` | ||
| 102 | - the response. | ||
| 103 | +.. note:: For responses of large or unknown length, it's sometimes better to | ||
| 104 | + :ref:`stream <stream>` the response. | ||
| 105 | |||
| 106 | Using io Wrappers with Response Content | ||
| 107 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | ||
| 108 | diff --git a/pyproject.toml b/pyproject.toml | ||
| 109 | index 1fe82937..58a2c2db 100644 | ||
| 110 | --- a/pyproject.toml | ||
| 111 | +++ b/pyproject.toml | ||
| 112 | @@ -40,8 +40,8 @@ dynamic = ["version"] | ||
| 113 | |||
| 114 | [project.optional-dependencies] | ||
| 115 | brotli = [ | ||
| 116 | - "brotli>=1.0.9; platform_python_implementation == 'CPython'", | ||
| 117 | - "brotlicffi>=0.8.0; platform_python_implementation != 'CPython'" | ||
| 118 | + "brotli>=1.2.0; platform_python_implementation == 'CPython'", | ||
| 119 | + "brotlicffi>=1.2.0.0; platform_python_implementation != 'CPython'" | ||
| 120 | ] | ||
| 121 | zstd = [ | ||
| 122 | "zstandard>=0.18.0", | ||
| 123 | @@ -95,6 +95,7 @@ filterwarnings = [ | ||
| 124 | '''default:ssl\.PROTOCOL_TLSv1_1 is deprecated:DeprecationWarning''', | ||
| 125 | '''default:ssl\.PROTOCOL_TLSv1_2 is deprecated:DeprecationWarning''', | ||
| 126 | '''default:ssl NPN is deprecated, use ALPN instead:DeprecationWarning''', | ||
| 127 | + '''default:Brotli >= 1.2.0 is required to prevent decompression bombs\.:urllib3.exceptions.DependencyWarning''', | ||
| 128 | '''default:Async generator 'quart\.wrappers\.response\.DataBody\.__aiter__\.<locals>\._aiter' was garbage collected.*:ResourceWarning''', # https://github.com/pallets/quart/issues/301 | ||
| 129 | '''default:unclosed file <_io\.BufferedWriter name='/dev/null'>:ResourceWarning''', # https://github.com/SeleniumHQ/selenium/issues/13328 | ||
| 130 | ] | ||
| 131 | diff --git a/src/urllib3/response.py b/src/urllib3/response.py | ||
| 132 | index b8e8565c..4304133e 100644 | ||
| 133 | --- a/src/urllib3/response.py | ||
| 134 | +++ b/src/urllib3/response.py | ||
| 135 | @@ -49,6 +49,7 @@ from .connection import BaseSSLError, HTTPConnection, HTTPException | ||
| 136 | from .exceptions import ( | ||
| 137 | BodyNotHttplibCompatible, | ||
| 138 | DecodeError, | ||
| 139 | + DependencyWarning, | ||
| 140 | HTTPError, | ||
| 141 | IncompleteRead, | ||
| 142 | InvalidChunkLength, | ||
| 143 | @@ -68,7 +69,11 @@ log = logging.getLogger(__name__) | ||
| 144 | |||
| 145 | |||
| 146 | class ContentDecoder: | ||
| 147 | - def decompress(self, data: bytes) -> bytes: | ||
| 148 | + def decompress(self, data: bytes, max_length: int = -1) -> bytes: | ||
| 149 | + raise NotImplementedError() | ||
| 150 | + | ||
| 151 | + @property | ||
| 152 | + def has_unconsumed_tail(self) -> bool: | ||
| 153 | raise NotImplementedError() | ||
| 154 | |||
| 155 | def flush(self) -> bytes: | ||
| 156 | @@ -78,30 +83,57 @@ class ContentDecoder: | ||
| 157 | class DeflateDecoder(ContentDecoder): | ||
| 158 | def __init__(self) -> None: | ||
| 159 | self._first_try = True | ||
| 160 | - self._data = b"" | ||
| 161 | + self._first_try_data = b"" | ||
| 162 | + self._unfed_data = b"" | ||
| 163 | self._obj = zlib.decompressobj() | ||
| 164 | |||
| 165 | - def decompress(self, data: bytes) -> bytes: | ||
| 166 | - if not data: | ||
| 167 | + def decompress(self, data: bytes, max_length: int = -1) -> bytes: | ||
| 168 | + data = self._unfed_data + data | ||
| 169 | + self._unfed_data = b"" | ||
| 170 | + if not data and not self._obj.unconsumed_tail: | ||
| 171 | return data | ||
| 172 | + original_max_length = max_length | ||
| 173 | + if original_max_length < 0: | ||
| 174 | + max_length = 0 | ||
| 175 | + elif original_max_length == 0: | ||
| 176 | + # We should not pass 0 to the zlib decompressor because 0 is | ||
| 177 | + # the default value that will make zlib decompress without a | ||
| 178 | + # length limit. | ||
| 179 | + # Data should be stored for subsequent calls. | ||
| 180 | + self._unfed_data = data | ||
| 181 | + return b"" | ||
| 182 | |||
| 183 | + # Subsequent calls always reuse `self._obj`. zlib requires | ||
| 184 | + # passing the unconsumed tail if decompression is to continue. | ||
| 185 | if not self._first_try: | ||
| 186 | - return self._obj.decompress(data) | ||
| 187 | + return self._obj.decompress( | ||
| 188 | + self._obj.unconsumed_tail + data, max_length=max_length | ||
| 189 | + ) | ||
| 190 | |||
| 191 | - self._data += data | ||
| 192 | + # First call tries with RFC 1950 ZLIB format. | ||
| 193 | + self._first_try_data += data | ||
| 194 | try: | ||
| 195 | - decompressed = self._obj.decompress(data) | ||
| 196 | + decompressed = self._obj.decompress(data, max_length=max_length) | ||
| 197 | if decompressed: | ||
| 198 | self._first_try = False | ||
| 199 | - self._data = None # type: ignore[assignment] | ||
| 200 | + self._first_try_data = b"" | ||
| 201 | return decompressed | ||
| 202 | + # On failure, it falls back to RFC 1951 DEFLATE format. | ||
| 203 | except zlib.error: | ||
| 204 | self._first_try = False | ||
| 205 | self._obj = zlib.decompressobj(-zlib.MAX_WBITS) | ||
| 206 | try: | ||
| 207 | - return self.decompress(self._data) | ||
| 208 | + return self.decompress( | ||
| 209 | + self._first_try_data, max_length=original_max_length | ||
| 210 | + ) | ||
| 211 | finally: | ||
| 212 | - self._data = None # type: ignore[assignment] | ||
| 213 | + self._first_try_data = b"" | ||
| 214 | + | ||
| 215 | + @property | ||
| 216 | + def has_unconsumed_tail(self) -> bool: | ||
| 217 | + return bool(self._unfed_data) or ( | ||
| 218 | + bool(self._obj.unconsumed_tail) and not self._first_try | ||
| 219 | + ) | ||
| 220 | |||
| 221 | def flush(self) -> bytes: | ||
| 222 | return self._obj.flush() | ||
| 223 | @@ -117,27 +149,61 @@ class GzipDecoder(ContentDecoder): | ||
| 224 | def __init__(self) -> None: | ||
| 225 | self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) | ||
| 226 | self._state = GzipDecoderState.FIRST_MEMBER | ||
| 227 | + self._unconsumed_tail = b"" | ||
| 228 | |||
| 229 | - def decompress(self, data: bytes) -> bytes: | ||
| 230 | + def decompress(self, data: bytes, max_length: int = -1) -> bytes: | ||
| 231 | ret = bytearray() | ||
| 232 | - if self._state == GzipDecoderState.SWALLOW_DATA or not data: | ||
| 233 | + if self._state == GzipDecoderState.SWALLOW_DATA: | ||
| 234 | return bytes(ret) | ||
| 235 | + | ||
| 236 | + if max_length == 0: | ||
| 237 | + # We should not pass 0 to the zlib decompressor because 0 is | ||
| 238 | + # the default value that will make zlib decompress without a | ||
| 239 | + # length limit. | ||
| 240 | + # Data should be stored for subsequent calls. | ||
| 241 | + self._unconsumed_tail += data | ||
| 242 | + return b"" | ||
| 243 | + | ||
| 244 | + # zlib requires passing the unconsumed tail to the subsequent | ||
| 245 | + # call if decompression is to continue. | ||
| 246 | + data = self._unconsumed_tail + data | ||
| 247 | + if not data and self._obj.eof: | ||
| 248 | + return bytes(ret) | ||
| 249 | + | ||
| 250 | while True: | ||
| 251 | try: | ||
| 252 | - ret += self._obj.decompress(data) | ||
| 253 | + ret += self._obj.decompress( | ||
| 254 | + data, max_length=max(max_length - len(ret), 0) | ||
| 255 | + ) | ||
| 256 | except zlib.error: | ||
| 257 | previous_state = self._state | ||
| 258 | # Ignore data after the first error | ||
| 259 | self._state = GzipDecoderState.SWALLOW_DATA | ||
| 260 | + self._unconsumed_tail = b"" | ||
| 261 | if previous_state == GzipDecoderState.OTHER_MEMBERS: | ||
| 262 | # Allow trailing garbage acceptable in other gzip clients | ||
| 263 | return bytes(ret) | ||
| 264 | raise | ||
| 265 | - data = self._obj.unused_data | ||
| 266 | + | ||
| 267 | + self._unconsumed_tail = data = ( | ||
| 268 | + self._obj.unconsumed_tail or self._obj.unused_data | ||
| 269 | + ) | ||
| 270 | + if max_length > 0 and len(ret) >= max_length: | ||
| 271 | + break | ||
| 272 | + | ||
| 273 | if not data: | ||
| 274 | return bytes(ret) | ||
| 275 | - self._state = GzipDecoderState.OTHER_MEMBERS | ||
| 276 | - self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) | ||
| 277 | + # When the end of a gzip member is reached, a new decompressor | ||
| 278 | + # must be created for unused (possibly future) data. | ||
| 279 | + if self._obj.eof: | ||
| 280 | + self._state = GzipDecoderState.OTHER_MEMBERS | ||
| 281 | + self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) | ||
| 282 | + | ||
| 283 | + return bytes(ret) | ||
| 284 | + | ||
| 285 | + @property | ||
| 286 | + def has_unconsumed_tail(self) -> bool: | ||
| 287 | + return bool(self._unconsumed_tail) | ||
| 288 | |||
| 289 | def flush(self) -> bytes: | ||
| 290 | return self._obj.flush() | ||
| 291 | @@ -152,9 +218,35 @@ if brotli is not None: | ||
| 292 | def __init__(self) -> None: | ||
| 293 | self._obj = brotli.Decompressor() | ||
| 294 | if hasattr(self._obj, "decompress"): | ||
| 295 | - setattr(self, "decompress", self._obj.decompress) | ||
| 296 | + setattr(self, "_decompress", self._obj.decompress) | ||
| 297 | else: | ||
| 298 | - setattr(self, "decompress", self._obj.process) | ||
| 299 | + setattr(self, "_decompress", self._obj.process) | ||
| 300 | + | ||
| 301 | + # Requires Brotli >= 1.2.0 for `output_buffer_limit`. | ||
| 302 | + def _decompress(self, data: bytes, output_buffer_limit: int = -1) -> bytes: | ||
| 303 | + raise NotImplementedError() | ||
| 304 | + | ||
| 305 | + def decompress(self, data: bytes, max_length: int = -1) -> bytes: | ||
| 306 | + try: | ||
| 307 | + if max_length > 0: | ||
| 308 | + return self._decompress(data, output_buffer_limit=max_length) | ||
| 309 | + else: | ||
| 310 | + return self._decompress(data) | ||
| 311 | + except TypeError: | ||
| 312 | + # Fallback for Brotli/brotlicffi/brotlipy versions without | ||
| 313 | + # the `output_buffer_limit` parameter. | ||
| 314 | + warnings.warn( | ||
| 315 | + "Brotli >= 1.2.0 is required to prevent decompression bombs.", | ||
| 316 | + DependencyWarning, | ||
| 317 | + ) | ||
| 318 | + return self._decompress(data) | ||
| 319 | + | ||
| 320 | + @property | ||
| 321 | + def has_unconsumed_tail(self) -> bool: | ||
| 322 | + try: | ||
| 323 | + return not self._obj.can_accept_more_data() | ||
| 324 | + except AttributeError: | ||
| 325 | + return False | ||
| 326 | |||
| 327 | def flush(self) -> bytes: | ||
| 328 | if hasattr(self._obj, "flush"): | ||
| 329 | @@ -168,16 +260,46 @@ if HAS_ZSTD: | ||
| 330 | def __init__(self) -> None: | ||
| 331 | self._obj = zstd.ZstdDecompressor().decompressobj() | ||
| 332 | |||
| 333 | - def decompress(self, data: bytes) -> bytes: | ||
| 334 | - if not data: | ||
| 335 | + def decompress(self, data: bytes, max_length: int = -1) -> bytes: | ||
| 336 | + if not data and not self.has_unconsumed_tail: | ||
| 337 | return b"" | ||
| 338 | - data_parts = [self._obj.decompress(data)] | ||
| 339 | - while self._obj.eof and self._obj.unused_data: | ||
| 340 | + if self._obj.eof: | ||
| 341 | + data = self._obj.unused_data + data | ||
| 342 | + self._obj = zstd.ZstdDecompressor() | ||
| 343 | + part = self._obj.decompress(data, max_length=max_length) | ||
| 344 | + length = len(part) | ||
| 345 | + data_parts = [part] | ||
| 346 | + # Every loop iteration is supposed to read data from a separate frame. | ||
| 347 | + # The loop breaks when: | ||
| 348 | + # - enough data is read; | ||
| 349 | + # - no more unused data is available; | ||
| 350 | + # - end of the last read frame has not been reached (i.e., | ||
| 351 | + # more data has to be fed). | ||
| 352 | + while ( | ||
| 353 | + self._obj.eof | ||
| 354 | + and self._obj.unused_data | ||
| 355 | + and (max_length < 0 or length < max_length) | ||
| 356 | + ): | ||
| 357 | unused_data = self._obj.unused_data | ||
| 358 | - self._obj = zstd.ZstdDecompressor().decompressobj() | ||
| 359 | - data_parts.append(self._obj.decompress(unused_data)) | ||
| 360 | + if not self._obj.needs_input: | ||
| 361 | + self._obj = zstd.ZstdDecompressor() | ||
| 362 | + part = self._obj.decompress( | ||
| 363 | + unused_data, | ||
| 364 | + max_length=(max_length - length) if max_length > 0 else -1, | ||
| 365 | + ) | ||
| 366 | + if part_length := len(part): | ||
| 367 | + data_parts.append(part) | ||
| 368 | + length += part_length | ||
| 369 | + elif self._obj.needs_input: | ||
| 370 | + break | ||
| 371 | return b"".join(data_parts) | ||
| 372 | |||
| 373 | + @property | ||
| 374 | + def has_unconsumed_tail(self) -> bool: | ||
| 375 | + return not (self._obj.needs_input or self._obj.eof) or bool( | ||
| 376 | + self._obj.unused_data | ||
| 377 | + ) | ||
| 378 | + | ||
| 379 | def flush(self) -> bytes: | ||
| 380 | ret = self._obj.flush() # note: this is a no-op | ||
| 381 | if not self._obj.eof: | ||
| 382 | @@ -210,10 +332,35 @@ class MultiDecoder(ContentDecoder): | ||
| 383 | def flush(self) -> bytes: | ||
| 384 | return self._decoders[0].flush() | ||
| 385 | |||
| 386 | - def decompress(self, data: bytes) -> bytes: | ||
| 387 | - for d in reversed(self._decoders): | ||
| 388 | - data = d.decompress(data) | ||
| 389 | - return data | ||
| 390 | + def decompress(self, data: bytes, max_length: int = -1) -> bytes: | ||
| 391 | + if max_length <= 0: | ||
| 392 | + for d in reversed(self._decoders): | ||
| 393 | + data = d.decompress(data) | ||
| 394 | + return data | ||
| 395 | + | ||
| 396 | + ret = bytearray() | ||
| 397 | + # Every while loop iteration goes through all decoders once. | ||
| 398 | + # It exits when enough data is read or no more data can be read. | ||
| 399 | + # It is possible that the while loop iteration does not produce | ||
| 400 | + # any data because we retrieve up to `max_length` from every | ||
| 401 | + # decoder, and the amount of bytes may be insufficient for the | ||
| 402 | + # next decoder to produce enough/any output. | ||
| 403 | + while True: | ||
| 404 | + any_data = False | ||
| 405 | + for d in reversed(self._decoders): | ||
| 406 | + data = d.decompress(data, max_length=max_length - len(ret)) | ||
| 407 | + if data: | ||
| 408 | + any_data = True | ||
| 409 | + # We should not break when no data is returned because | ||
| 410 | + # next decoders may produce data even with empty input. | ||
| 411 | + ret += data | ||
| 412 | + if not any_data or len(ret) >= max_length: | ||
| 413 | + return bytes(ret) | ||
| 414 | + data = b"" | ||
| 415 | + | ||
| 416 | + @property | ||
| 417 | + def has_unconsumed_tail(self) -> bool: | ||
| 418 | + return any(d.has_unconsumed_tail for d in self._decoders) | ||
| 419 | |||
| 420 | |||
| 421 | def _get_decoder(mode: str) -> ContentDecoder: | ||
| 422 | @@ -246,9 +393,6 @@ class BytesQueueBuffer: | ||
| 423 | |||
| 424 | * self.buffer, which contains the full data | ||
| 425 | * the largest chunk that we will copy in get() | ||
| 426 | - | ||
| 427 | - The worst case scenario is a single chunk, in which case we'll make a full copy of | ||
| 428 | - the data inside get(). | ||
| 429 | """ | ||
| 430 | |||
| 431 | def __init__(self) -> None: | ||
| 432 | @@ -270,6 +414,10 @@ class BytesQueueBuffer: | ||
| 433 | elif n < 0: | ||
| 434 | raise ValueError("n should be > 0") | ||
| 435 | |||
| 436 | + if len(self.buffer[0]) == n and isinstance(self.buffer[0], bytes): | ||
| 437 | + self._size -= n | ||
| 438 | + return self.buffer.popleft() | ||
| 439 | + | ||
| 440 | fetched = 0 | ||
| 441 | ret = io.BytesIO() | ||
| 442 | while fetched < n: | ||
| 443 | @@ -473,7 +621,11 @@ class BaseHTTPResponse(io.IOBase): | ||
| 444 | self._decoder = _get_decoder(content_encoding) | ||
| 445 | |||
| 446 | def _decode( | ||
| 447 | - self, data: bytes, decode_content: bool | None, flush_decoder: bool | ||
| 448 | + self, | ||
| 449 | + data: bytes, | ||
| 450 | + decode_content: bool | None, | ||
| 451 | + flush_decoder: bool, | ||
| 452 | + max_length: int | None = None, | ||
| 453 | ) -> bytes: | ||
| 454 | """ | ||
| 455 | Decode the data passed in and potentially flush the decoder. | ||
| 456 | @@ -486,9 +638,12 @@ class BaseHTTPResponse(io.IOBase): | ||
| 457 | ) | ||
| 458 | return data | ||
| 459 | |||
| 460 | + if max_length is None or flush_decoder: | ||
| 461 | + max_length = -1 | ||
| 462 | + | ||
| 463 | try: | ||
| 464 | if self._decoder: | ||
| 465 | - data = self._decoder.decompress(data) | ||
| 466 | + data = self._decoder.decompress(data, max_length=max_length) | ||
| 467 | self._has_decoded_content = True | ||
| 468 | except self.DECODER_ERROR_CLASSES as e: | ||
| 469 | content_encoding = self.headers.get("content-encoding", "").lower() | ||
| 470 | @@ -953,6 +1108,14 @@ class HTTPResponse(BaseHTTPResponse): | ||
| 471 | elif amt is not None: | ||
| 472 | cache_content = False | ||
| 473 | |||
| 474 | + if self._decoder and self._decoder.has_unconsumed_tail: | ||
| 475 | + decoded_data = self._decode( | ||
| 476 | + b"", | ||
| 477 | + decode_content, | ||
| 478 | + flush_decoder=False, | ||
| 479 | + max_length=amt - len(self._decoded_buffer), | ||
| 480 | + ) | ||
| 481 | + self._decoded_buffer.put(decoded_data) | ||
| 482 | if len(self._decoded_buffer) >= amt: | ||
| 483 | return self._decoded_buffer.get(amt) | ||
| 484 | |||
| 485 | @@ -960,7 +1123,11 @@ class HTTPResponse(BaseHTTPResponse): | ||
| 486 | |||
| 487 | flush_decoder = amt is None or (amt != 0 and not data) | ||
| 488 | |||
| 489 | - if not data and len(self._decoded_buffer) == 0: | ||
| 490 | + if ( | ||
| 491 | + not data | ||
| 492 | + and len(self._decoded_buffer) == 0 | ||
| 493 | + and not (self._decoder and self._decoder.has_unconsumed_tail) | ||
| 494 | + ): | ||
| 495 | return data | ||
| 496 | |||
| 497 | if amt is None: | ||
| 498 | @@ -977,7 +1144,12 @@ class HTTPResponse(BaseHTTPResponse): | ||
| 499 | ) | ||
| 500 | return data | ||
| 501 | |||
| 502 | - decoded_data = self._decode(data, decode_content, flush_decoder) | ||
| 503 | + decoded_data = self._decode( | ||
| 504 | + data, | ||
| 505 | + decode_content, | ||
| 506 | + flush_decoder, | ||
| 507 | + max_length=amt - len(self._decoded_buffer), | ||
| 508 | + ) | ||
| 509 | self._decoded_buffer.put(decoded_data) | ||
| 510 | |||
| 511 | while len(self._decoded_buffer) < amt and data: | ||
| 512 | @@ -985,7 +1157,12 @@ class HTTPResponse(BaseHTTPResponse): | ||
| 513 | # For example, the GZ file header takes 10 bytes, we don't want to read | ||
| 514 | # it one byte at a time | ||
| 515 | data = self._raw_read(amt) | ||
| 516 | - decoded_data = self._decode(data, decode_content, flush_decoder) | ||
| 517 | + decoded_data = self._decode( | ||
| 518 | + data, | ||
| 519 | + decode_content, | ||
| 520 | + flush_decoder, | ||
| 521 | + max_length=amt - len(self._decoded_buffer), | ||
| 522 | + ) | ||
| 523 | self._decoded_buffer.put(decoded_data) | ||
| 524 | data = self._decoded_buffer.get(amt) | ||
| 525 | |||
| 526 | @@ -1020,6 +1197,20 @@ class HTTPResponse(BaseHTTPResponse): | ||
| 527 | "Calling read1(decode_content=False) is not supported after " | ||
| 528 | "read1(decode_content=True) was called." | ||
| 529 | ) | ||
| 530 | + if ( | ||
| 531 | + self._decoder | ||
| 532 | + and self._decoder.has_unconsumed_tail | ||
| 533 | + and (amt is None or len(self._decoded_buffer) < amt) | ||
| 534 | + ): | ||
| 535 | + decoded_data = self._decode( | ||
| 536 | + b"", | ||
| 537 | + decode_content, | ||
| 538 | + flush_decoder=False, | ||
| 539 | + max_length=( | ||
| 540 | + amt - len(self._decoded_buffer) if amt is not None else None | ||
| 541 | + ), | ||
| 542 | + ) | ||
| 543 | + self._decoded_buffer.put(decoded_data) | ||
| 544 | if len(self._decoded_buffer) > 0: | ||
| 545 | if amt is None: | ||
| 546 | return self._decoded_buffer.get_all() | ||
| 547 | @@ -1035,7 +1226,9 @@ class HTTPResponse(BaseHTTPResponse): | ||
| 548 | self._init_decoder() | ||
| 549 | while True: | ||
| 550 | flush_decoder = not data | ||
| 551 | - decoded_data = self._decode(data, decode_content, flush_decoder) | ||
| 552 | + decoded_data = self._decode( | ||
| 553 | + data, decode_content, flush_decoder, max_length=amt | ||
| 554 | + ) | ||
| 555 | self._decoded_buffer.put(decoded_data) | ||
| 556 | if decoded_data or flush_decoder: | ||
| 557 | break | ||
| 558 | @@ -1066,7 +1259,11 @@ class HTTPResponse(BaseHTTPResponse): | ||
| 559 | if self.chunked and self.supports_chunked_reads(): | ||
| 560 | yield from self.read_chunked(amt, decode_content=decode_content) | ||
| 561 | else: | ||
| 562 | - while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0: | ||
| 563 | + while ( | ||
| 564 | + not is_fp_closed(self._fp) | ||
| 565 | + or len(self._decoded_buffer) > 0 | ||
| 566 | + or (self._decoder and self._decoder.has_unconsumed_tail) | ||
| 567 | + ): | ||
| 568 | data = self.read(amt=amt, decode_content=decode_content) | ||
| 569 | |||
| 570 | if data: | ||
| 571 | @@ -1218,7 +1415,10 @@ class HTTPResponse(BaseHTTPResponse): | ||
| 572 | break | ||
| 573 | chunk = self._handle_chunk(amt) | ||
| 574 | decoded = self._decode( | ||
| 575 | - chunk, decode_content=decode_content, flush_decoder=False | ||
| 576 | + chunk, | ||
| 577 | + decode_content=decode_content, | ||
| 578 | + flush_decoder=False, | ||
| 579 | + max_length=amt, | ||
| 580 | ) | ||
| 581 | if decoded: | ||
| 582 | yield decoded | ||
| 583 | -- | ||
| 584 | 2.34.1 | ||
| 585 | |||
diff --git a/meta/recipes-devtools/python/python3-urllib3_2.2.2.bb b/meta/recipes-devtools/python/python3-urllib3_2.2.2.bb index bdb1c7ca8d..620927322a 100644 --- a/meta/recipes-devtools/python/python3-urllib3_2.2.2.bb +++ b/meta/recipes-devtools/python/python3-urllib3_2.2.2.bb | |||
| @@ -9,6 +9,8 @@ inherit pypi python_hatchling | |||
| 9 | 9 | ||
| 10 | SRC_URI += " \ | 10 | SRC_URI += " \ |
| 11 | file://CVE-2025-50181.patch \ | 11 | file://CVE-2025-50181.patch \ |
| 12 | file://CVE-2025-66418.patch \ | ||
| 13 | file://CVE-2025-66471.patch \ | ||
| 12 | " | 14 | " |
| 13 | 15 | ||
| 14 | RDEPENDS:${PN} += "\ | 16 | RDEPENDS:${PN} += "\ |
