diff options
Diffstat (limited to 'meta-python/recipes-devtools/python/python3-aiohttp/CVE-2024-23829.patch')
| -rw-r--r-- | meta-python/recipes-devtools/python/python3-aiohttp/CVE-2024-23829.patch | 344 |
1 files changed, 344 insertions, 0 deletions
diff --git a/meta-python/recipes-devtools/python/python3-aiohttp/CVE-2024-23829.patch b/meta-python/recipes-devtools/python/python3-aiohttp/CVE-2024-23829.patch new file mode 100644 index 0000000000..6da5b5ba42 --- /dev/null +++ b/meta-python/recipes-devtools/python/python3-aiohttp/CVE-2024-23829.patch | |||
| @@ -0,0 +1,344 @@ | |||
| 1 | From d33bc21414e283c9e6fe7f6caf69e2ed60d66c82 Mon Sep 17 00:00:00 2001 | ||
| 2 | From: Sam Bull <git@sambull.org> | ||
| 3 | Date: Sun, 28 Jan 2024 17:09:58 +0000 | ||
| 4 | Subject: [PATCH] Improve validation in HTTP parser (#8074) (#8078) | ||
| 5 | |||
| 6 | Co-authored-by: Paul J. Dorn <pajod@users.noreply.github.com> | ||
| 7 | Co-authored-by: Sviatoslav Sydorenko (Святослав Сидоренко) | ||
| 8 | <sviat@redhat.com> | ||
| 9 | (cherry picked from commit 33ccdfb0a12690af5bb49bda2319ec0907fa7827) | ||
| 10 | |||
| 11 | CVE: CVE-2024-23829 | ||
| 12 | |||
| 13 | Upstream-Status: Backport [https://github.com/aio-libs/aiohttp/commit/d33bc21414e283c9e6fe7f6caf69e2ed60d66c82] | ||
| 14 | |||
| 15 | Signed-off-by: Soumya Sambu <soumya.sambu@windriver.com> | ||
| 16 | --- | ||
| 17 | CONTRIBUTORS.txt | 1 + | ||
| 18 | aiohttp/http_parser.py | 30 ++++---- | ||
| 19 | tests/test_http_parser.py | 139 +++++++++++++++++++++++++++++++++++++- | ||
| 20 | 3 files changed, 155 insertions(+), 15 deletions(-) | ||
| 21 | |||
| 22 | diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt | ||
| 23 | index f8a8df5..b9cdf75 100644 | ||
| 24 | --- a/CONTRIBUTORS.txt | ||
| 25 | +++ b/CONTRIBUTORS.txt | ||
| 26 | @@ -240,6 +240,7 @@ Panagiotis Kolokotronis | ||
| 27 | Pankaj Pandey | ||
| 28 | Pau Freixes | ||
| 29 | Paul Colomiets | ||
| 30 | +Paul J. Dorn | ||
| 31 | Paulius Šileikis | ||
| 32 | Paulus Schoutsen | ||
| 33 | Pavel Kamaev | ||
| 34 | diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py | ||
| 35 | index 175eb7f..91784b3 100644 | ||
| 36 | --- a/aiohttp/http_parser.py | ||
| 37 | +++ b/aiohttp/http_parser.py | ||
| 38 | @@ -76,10 +76,11 @@ ASCIISET: Final[Set[str]] = set(string.printable) | ||
| 39 | # tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / | ||
| 40 | # "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA | ||
| 41 | # token = 1*tchar | ||
| 42 | -METHRE: Final[Pattern[str]] = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+") | ||
| 43 | -VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d).(\d)") | ||
| 44 | -HDRRE: Final[Pattern[bytes]] = re.compile(rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\"\\]") | ||
| 45 | -HEXDIGIT = re.compile(rb"[0-9a-fA-F]+") | ||
| 46 | +_TCHAR_SPECIALS: Final[str] = re.escape("!#$%&'*+-.^_`|~") | ||
| 47 | +TOKENRE: Final[Pattern[str]] = re.compile(f"[0-9A-Za-z{_TCHAR_SPECIALS}]+") | ||
| 48 | +VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d)\.(\d)", re.ASCII) | ||
| 49 | +DIGITS: Final[Pattern[str]] = re.compile(r"\d+", re.ASCII) | ||
| 50 | +HEXDIGITS: Final[Pattern[bytes]] = re.compile(rb"[0-9a-fA-F]+") | ||
| 51 | |||
| 52 | |||
| 53 | class RawRequestMessage(NamedTuple): | ||
| 54 | @@ -145,6 +146,7 @@ class HeadersParser: | ||
| 55 | self, lines: List[bytes] | ||
| 56 | ) -> Tuple["CIMultiDictProxy[str]", RawHeaders]: | ||
| 57 | headers: CIMultiDict[str] = CIMultiDict() | ||
| 58 | + # note: "raw" does not mean inclusion of OWS before/after the field value | ||
| 59 | raw_headers = [] | ||
| 60 | |||
| 61 | lines_idx = 0 | ||
| 62 | @@ -158,13 +160,14 @@ class HeadersParser: | ||
| 63 | except ValueError: | ||
| 64 | raise InvalidHeader(line) from None | ||
| 65 | |||
| 66 | + if len(bname) == 0: | ||
| 67 | + raise InvalidHeader(bname) | ||
| 68 | + | ||
| 69 | # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2 | ||
| 70 | if {bname[0], bname[-1]} & {32, 9}: # {" ", "\t"} | ||
| 71 | raise InvalidHeader(line) | ||
| 72 | |||
| 73 | bvalue = bvalue.lstrip(b" \t") | ||
| 74 | - if HDRRE.search(bname): | ||
| 75 | - raise InvalidHeader(bname) | ||
| 76 | if len(bname) > self.max_field_size: | ||
| 77 | raise LineTooLong( | ||
| 78 | "request header name {}".format( | ||
| 79 | @@ -173,6 +176,9 @@ class HeadersParser: | ||
| 80 | str(self.max_field_size), | ||
| 81 | str(len(bname)), | ||
| 82 | ) | ||
| 83 | + name = bname.decode("utf-8", "surrogateescape") | ||
| 84 | + if not TOKENRE.fullmatch(name): | ||
| 85 | + raise InvalidHeader(bname) | ||
| 86 | |||
| 87 | header_length = len(bvalue) | ||
| 88 | |||
| 89 | @@ -220,7 +226,6 @@ class HeadersParser: | ||
| 90 | ) | ||
| 91 | |||
| 92 | bvalue = bvalue.strip(b" \t") | ||
| 93 | - name = bname.decode("utf-8", "surrogateescape") | ||
| 94 | value = bvalue.decode("utf-8", "surrogateescape") | ||
| 95 | |||
| 96 | # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5 | ||
| 97 | @@ -348,7 +353,8 @@ class HttpParser(abc.ABC, Generic[_MsgT]): | ||
| 98 | |||
| 99 | # Shouldn't allow +/- or other number formats. | ||
| 100 | # https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2 | ||
| 101 | - if not length_hdr.strip(" \t").isdecimal(): | ||
| 102 | + # msg.headers is already stripped of leading/trailing wsp | ||
| 103 | + if not DIGITS.fullmatch(length_hdr): | ||
| 104 | raise InvalidHeader(CONTENT_LENGTH) | ||
| 105 | |||
| 106 | return int(length_hdr) | ||
| 107 | @@ -582,7 +588,7 @@ class HttpRequestParser(HttpParser[RawRequestMessage]): | ||
| 108 | ) | ||
| 109 | |||
| 110 | # method | ||
| 111 | - if not METHRE.match(method): | ||
| 112 | + if not TOKENRE.fullmatch(method): | ||
| 113 | raise BadStatusLine(method) | ||
| 114 | |||
| 115 | # version | ||
| 116 | @@ -690,8 +696,8 @@ class HttpResponseParser(HttpParser[RawResponseMessage]): | ||
| 117 | raise BadStatusLine(line) | ||
| 118 | version_o = HttpVersion(int(match.group(1)), int(match.group(2))) | ||
| 119 | |||
| 120 | - # The status code is a three-digit number | ||
| 121 | - if len(status) != 3 or not status.isdecimal(): | ||
| 122 | + # The status code is a three-digit ASCII number, no padding | ||
| 123 | + if len(status) != 3 or not DIGITS.fullmatch(status): | ||
| 124 | raise BadStatusLine(line) | ||
| 125 | status_i = int(status) | ||
| 126 | |||
| 127 | @@ -844,7 +850,7 @@ class HttpPayloadParser: | ||
| 128 | if self._lax: # Allow whitespace in lax mode. | ||
| 129 | size_b = size_b.strip() | ||
| 130 | |||
| 131 | - if not re.fullmatch(HEXDIGIT, size_b): | ||
| 132 | + if not re.fullmatch(HEXDIGITS, size_b): | ||
| 133 | exc = TransferEncodingError( | ||
| 134 | chunk[:pos].decode("ascii", "surrogateescape") | ||
| 135 | ) | ||
| 136 | diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py | ||
| 137 | index 4b185c9..bcf6058 100644 | ||
| 138 | --- a/tests/test_http_parser.py | ||
| 139 | +++ b/tests/test_http_parser.py | ||
| 140 | @@ -2,7 +2,8 @@ | ||
| 141 | |||
| 142 | import asyncio | ||
| 143 | import re | ||
| 144 | -from typing import Any, List | ||
| 145 | +from contextlib import nullcontext | ||
| 146 | +from typing import Any, Dict, List | ||
| 147 | from unittest import mock | ||
| 148 | from urllib.parse import quote | ||
| 149 | |||
| 150 | @@ -168,12 +169,28 @@ def test_cve_2023_37276(parser) -> None: | ||
| 151 | parser.feed_data(text) | ||
| 152 | |||
| 153 | |||
| 154 | +@pytest.mark.parametrize( | ||
| 155 | + "rfc9110_5_6_2_token_delim", | ||
| 156 | + r'"(),/:;<=>?@[\]{}', | ||
| 157 | +) | ||
| 158 | +def test_bad_header_name(parser: Any, rfc9110_5_6_2_token_delim: str) -> None: | ||
| 159 | + text = f"POST / HTTP/1.1\r\nhead{rfc9110_5_6_2_token_delim}er: val\r\n\r\n".encode() | ||
| 160 | + expectation = pytest.raises(http_exceptions.BadHttpMessage) | ||
| 161 | + if rfc9110_5_6_2_token_delim == ":": | ||
| 162 | + # Inserting colon into header just splits name/value earlier. | ||
| 163 | + expectation = nullcontext() | ||
| 164 | + with expectation: | ||
| 165 | + parser.feed_data(text) | ||
| 166 | + | ||
| 167 | + | ||
| 168 | @pytest.mark.parametrize( | ||
| 169 | "hdr", | ||
| 170 | ( | ||
| 171 | # https://www.rfc-editor.org/rfc/rfc9110.html#name-content-length | ||
| 172 | "Content-Length: -5", | ||
| 173 | "Content-Length: +256", | ||
| 174 | + "Content-Length: \N{superscript one}", | ||
| 175 | + "Content-Length: \N{mathematical double-struck digit one}", | ||
| 176 | "Foo: abc\rdef", # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5 | ||
| 177 | "Bar: abc\ndef", | ||
| 178 | "Baz: abc\x00def", | ||
| 179 | @@ -240,6 +257,20 @@ def test_whitespace_before_header(parser) -> None: | ||
| 180 | parser.feed_data(text) | ||
| 181 | |||
| 182 | |||
| 183 | +def test_parse_unusual_request_line(parser) -> None: | ||
| 184 | + if not isinstance(response, HttpResponseParserPy): | ||
| 185 | + pytest.xfail("Regression test for Py parser. May match C behaviour later.") | ||
| 186 | + text = b"#smol //a HTTP/1.3\r\n\r\n" | ||
| 187 | + messages, upgrade, tail = parser.feed_data(text) | ||
| 188 | + assert len(messages) == 1 | ||
| 189 | + msg, _ = messages[0] | ||
| 190 | + assert msg.compression is None | ||
| 191 | + assert not msg.upgrade | ||
| 192 | + assert msg.method == "#smol" | ||
| 193 | + assert msg.path == "//a" | ||
| 194 | + assert msg.version == (1, 3) | ||
| 195 | + | ||
| 196 | + | ||
| 197 | def test_parse(parser) -> None: | ||
| 198 | text = b"GET /test HTTP/1.1\r\n\r\n" | ||
| 199 | messages, upgrade, tail = parser.feed_data(text) | ||
| 200 | @@ -533,6 +564,43 @@ def test_headers_content_length_err_2(parser) -> None: | ||
| 201 | parser.feed_data(text) | ||
| 202 | |||
| 203 | |||
| 204 | +_pad: Dict[bytes, str] = { | ||
| 205 | + b"": "empty", | ||
| 206 | + # not a typo. Python likes triple zero | ||
| 207 | + b"\000": "NUL", | ||
| 208 | + b" ": "SP", | ||
| 209 | + b" ": "SPSP", | ||
| 210 | + # not a typo: both 0xa0 and 0x0a in case of 8-bit fun | ||
| 211 | + b"\n": "LF", | ||
| 212 | + b"\xa0": "NBSP", | ||
| 213 | + b"\t ": "TABSP", | ||
| 214 | +} | ||
| 215 | + | ||
| 216 | + | ||
| 217 | +@pytest.mark.parametrize("hdr", [b"", b"foo"], ids=["name-empty", "with-name"]) | ||
| 218 | +@pytest.mark.parametrize("pad2", _pad.keys(), ids=["post-" + n for n in _pad.values()]) | ||
| 219 | +@pytest.mark.parametrize("pad1", _pad.keys(), ids=["pre-" + n for n in _pad.values()]) | ||
| 220 | +def test_invalid_header_spacing(parser, pad1: bytes, pad2: bytes, hdr: bytes) -> None: | ||
| 221 | + text = b"GET /test HTTP/1.1\r\n" b"%s%s%s: value\r\n\r\n" % (pad1, hdr, pad2) | ||
| 222 | + expectation = pytest.raises(http_exceptions.BadHttpMessage) | ||
| 223 | + if pad1 == pad2 == b"" and hdr != b"": | ||
| 224 | + # one entry in param matrix is correct: non-empty name, not padded | ||
| 225 | + expectation = nullcontext() | ||
| 226 | + if pad1 == pad2 == hdr == b"": | ||
| 227 | + if not isinstance(response, HttpResponseParserPy): | ||
| 228 | + pytest.xfail("Regression test for Py parser. May match C behaviour later.") | ||
| 229 | + with expectation: | ||
| 230 | + parser.feed_data(text) | ||
| 231 | + | ||
| 232 | + | ||
| 233 | +def test_empty_header_name(parser) -> None: | ||
| 234 | + if not isinstance(response, HttpResponseParserPy): | ||
| 235 | + pytest.xfail("Regression test for Py parser. May match C behaviour later.") | ||
| 236 | + text = b"GET /test HTTP/1.1\r\n" b":test\r\n\r\n" | ||
| 237 | + with pytest.raises(http_exceptions.BadHttpMessage): | ||
| 238 | + parser.feed_data(text) | ||
| 239 | + | ||
| 240 | + | ||
| 241 | def test_invalid_header(parser) -> None: | ||
| 242 | text = b"GET /test HTTP/1.1\r\n" b"test line\r\n\r\n" | ||
| 243 | with pytest.raises(http_exceptions.BadHttpMessage): | ||
| 244 | @@ -655,6 +723,34 @@ def test_http_request_bad_status_line(parser) -> None: | ||
| 245 | assert r"\n" not in exc_info.value.message | ||
| 246 | |||
| 247 | |||
| 248 | +_num: Dict[bytes, str] = { | ||
| 249 | + # dangerous: accepted by Python int() | ||
| 250 | + # unicodedata.category("\U0001D7D9") == 'Nd' | ||
| 251 | + "\N{mathematical double-struck digit one}".encode(): "utf8digit", | ||
| 252 | + # only added for interop tests, refused by Python int() | ||
| 253 | + # unicodedata.category("\U000000B9") == 'No' | ||
| 254 | + "\N{superscript one}".encode(): "utf8number", | ||
| 255 | + "\N{superscript one}".encode("latin-1"): "latin1number", | ||
| 256 | +} | ||
| 257 | + | ||
| 258 | + | ||
| 259 | +@pytest.mark.parametrize("nonascii_digit", _num.keys(), ids=_num.values()) | ||
| 260 | +def test_http_request_bad_status_line_number( | ||
| 261 | + parser: Any, nonascii_digit: bytes | ||
| 262 | +) -> None: | ||
| 263 | + text = b"GET /digit HTTP/1." + nonascii_digit + b"\r\n\r\n" | ||
| 264 | + with pytest.raises(http_exceptions.BadStatusLine): | ||
| 265 | + parser.feed_data(text) | ||
| 266 | + | ||
| 267 | + | ||
| 268 | +def test_http_request_bad_status_line_separator(parser: Any) -> None: | ||
| 269 | + # single code point, old, multibyte NFKC, multibyte NFKD | ||
| 270 | + utf8sep = "\N{arabic ligature sallallahou alayhe wasallam}".encode() | ||
| 271 | + text = b"GET /ligature HTTP/1" + utf8sep + b"1\r\n\r\n" | ||
| 272 | + with pytest.raises(http_exceptions.BadStatusLine): | ||
| 273 | + parser.feed_data(text) | ||
| 274 | + | ||
| 275 | + | ||
| 276 | def test_http_request_upgrade(parser) -> None: | ||
| 277 | text = ( | ||
| 278 | b"GET /test HTTP/1.1\r\n" | ||
| 279 | @@ -670,6 +766,31 @@ def test_http_request_upgrade(parser) -> None: | ||
| 280 | assert tail == b"some raw data" | ||
| 281 | |||
| 282 | |||
| 283 | +def test_http_request_parser_utf8_request_line(parser) -> None: | ||
| 284 | + if not isinstance(response, HttpResponseParserPy): | ||
| 285 | + pytest.xfail("Regression test for Py parser. May match C behaviour later.") | ||
| 286 | + messages, upgrade, tail = parser.feed_data( | ||
| 287 | + # note the truncated unicode sequence | ||
| 288 | + b"GET /P\xc3\xbcnktchen\xa0\xef\xb7 HTTP/1.1\r\n" + | ||
| 289 | + # for easier grep: ASCII 0xA0 more commonly known as non-breaking space | ||
| 290 | + # note the leading and trailing spaces | ||
| 291 | + "sTeP: \N{latin small letter sharp s}nek\t\N{no-break space} " | ||
| 292 | + "\r\n\r\n".encode() | ||
| 293 | + ) | ||
| 294 | + msg = messages[0][0] | ||
| 295 | + | ||
| 296 | + assert msg.method == "GET" | ||
| 297 | + assert msg.path == "/Pünktchen\udca0\udcef\udcb7" | ||
| 298 | + assert msg.version == (1, 1) | ||
| 299 | + assert msg.headers == CIMultiDict([("STEP", "ßnek\t\xa0")]) | ||
| 300 | + assert msg.raw_headers == ((b"sTeP", "ßnek\t\xa0".encode()),) | ||
| 301 | + assert not msg.should_close | ||
| 302 | + assert msg.compression is None | ||
| 303 | + assert not msg.upgrade | ||
| 304 | + assert not msg.chunked | ||
| 305 | + assert msg.url.path == URL("/P%C3%BCnktchen\udca0\udcef\udcb7").path | ||
| 306 | + | ||
| 307 | + | ||
| 308 | def test_http_request_parser_utf8(parser) -> None: | ||
| 309 | text = "GET /path HTTP/1.1\r\nx-test:тест\r\n\r\n".encode() | ||
| 310 | messages, upgrade, tail = parser.feed_data(text) | ||
| 311 | @@ -719,9 +840,15 @@ def test_http_request_parser_two_slashes(parser) -> None: | ||
| 312 | assert not msg.chunked | ||
| 313 | |||
| 314 | |||
| 315 | -def test_http_request_parser_bad_method(parser) -> None: | ||
| 316 | +@pytest.mark.parametrize( | ||
| 317 | + "rfc9110_5_6_2_token_delim", | ||
| 318 | + [bytes([i]) for i in rb'"(),/:;<=>?@[\]{}'], | ||
| 319 | +) | ||
| 320 | +def test_http_request_parser_bad_method( | ||
| 321 | + parser, rfc9110_5_6_2_token_delim: bytes | ||
| 322 | +) -> None: | ||
| 323 | with pytest.raises(http_exceptions.BadStatusLine): | ||
| 324 | - parser.feed_data(b'=":<G>(e),[T];?" /get HTTP/1.1\r\n\r\n') | ||
| 325 | + parser.feed_data(rfc9110_5_6_2_token_delim + b'ET" /get HTTP/1.1\r\n\r\n') | ||
| 326 | |||
| 327 | |||
| 328 | def test_http_request_parser_bad_version(parser) -> None: | ||
| 329 | @@ -907,6 +1034,12 @@ def test_http_response_parser_code_not_int(response) -> None: | ||
| 330 | response.feed_data(b"HTTP/1.1 ttt test\r\n\r\n") | ||
| 331 | |||
| 332 | |||
| 333 | +@pytest.mark.parametrize("nonascii_digit", _num.keys(), ids=_num.values()) | ||
| 334 | +def test_http_response_parser_code_not_ascii(response, nonascii_digit: bytes) -> None: | ||
| 335 | + with pytest.raises(http_exceptions.BadStatusLine): | ||
| 336 | + response.feed_data(b"HTTP/1.1 20" + nonascii_digit + b" test\r\n\r\n") | ||
| 337 | + | ||
| 338 | + | ||
| 339 | def test_http_request_chunked_payload(parser) -> None: | ||
| 340 | text = b"GET /test HTTP/1.1\r\n" b"transfer-encoding: chunked\r\n\r\n" | ||
| 341 | msg, payload = parser.feed_data(text)[0][0] | ||
| 342 | -- | ||
| 343 | 2.40.0 | ||
| 344 | |||
