summaryrefslogtreecommitdiffstats
path: root/meta-python/recipes-devtools/python/python3-aiohttp/CVE-2024-23829.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-python/recipes-devtools/python/python3-aiohttp/CVE-2024-23829.patch')
-rw-r--r--meta-python/recipes-devtools/python/python3-aiohttp/CVE-2024-23829.patch344
1 files changed, 344 insertions, 0 deletions
diff --git a/meta-python/recipes-devtools/python/python3-aiohttp/CVE-2024-23829.patch b/meta-python/recipes-devtools/python/python3-aiohttp/CVE-2024-23829.patch
new file mode 100644
index 0000000000..6da5b5ba42
--- /dev/null
+++ b/meta-python/recipes-devtools/python/python3-aiohttp/CVE-2024-23829.patch
@@ -0,0 +1,344 @@
1From d33bc21414e283c9e6fe7f6caf69e2ed60d66c82 Mon Sep 17 00:00:00 2001
2From: Sam Bull <git@sambull.org>
3Date: Sun, 28 Jan 2024 17:09:58 +0000
4Subject: [PATCH] Improve validation in HTTP parser (#8074) (#8078)
5
6Co-authored-by: Paul J. Dorn <pajod@users.noreply.github.com>
7Co-authored-by: Sviatoslav Sydorenko (Святослав Сидоренко)
8<sviat@redhat.com>
9(cherry picked from commit 33ccdfb0a12690af5bb49bda2319ec0907fa7827)
10
11CVE: CVE-2024-23829
12
13Upstream-Status: Backport [https://github.com/aio-libs/aiohttp/commit/d33bc21414e283c9e6fe7f6caf69e2ed60d66c82]
14
15Signed-off-by: Soumya Sambu <soumya.sambu@windriver.com>
16---
17 CONTRIBUTORS.txt | 1 +
18 aiohttp/http_parser.py | 30 ++++----
19 tests/test_http_parser.py | 139 +++++++++++++++++++++++++++++++++++++-
20 3 files changed, 155 insertions(+), 15 deletions(-)
21
22diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt
23index f8a8df5..b9cdf75 100644
24--- a/CONTRIBUTORS.txt
25+++ b/CONTRIBUTORS.txt
26@@ -240,6 +240,7 @@ Panagiotis Kolokotronis
27 Pankaj Pandey
28 Pau Freixes
29 Paul Colomiets
30+Paul J. Dorn
31 Paulius Šileikis
32 Paulus Schoutsen
33 Pavel Kamaev
34diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py
35index 175eb7f..91784b3 100644
36--- a/aiohttp/http_parser.py
37+++ b/aiohttp/http_parser.py
38@@ -76,10 +76,11 @@ ASCIISET: Final[Set[str]] = set(string.printable)
39 # tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
40 # "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
41 # token = 1*tchar
42-METHRE: Final[Pattern[str]] = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+")
43-VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d).(\d)")
44-HDRRE: Final[Pattern[bytes]] = re.compile(rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\"\\]")
45-HEXDIGIT = re.compile(rb"[0-9a-fA-F]+")
46+_TCHAR_SPECIALS: Final[str] = re.escape("!#$%&'*+-.^_`|~")
47+TOKENRE: Final[Pattern[str]] = re.compile(f"[0-9A-Za-z{_TCHAR_SPECIALS}]+")
48+VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d)\.(\d)", re.ASCII)
49+DIGITS: Final[Pattern[str]] = re.compile(r"\d+", re.ASCII)
50+HEXDIGITS: Final[Pattern[bytes]] = re.compile(rb"[0-9a-fA-F]+")
51
52
53 class RawRequestMessage(NamedTuple):
54@@ -145,6 +146,7 @@ class HeadersParser:
55 self, lines: List[bytes]
56 ) -> Tuple["CIMultiDictProxy[str]", RawHeaders]:
57 headers: CIMultiDict[str] = CIMultiDict()
58+ # note: "raw" does not mean inclusion of OWS before/after the field value
59 raw_headers = []
60
61 lines_idx = 0
62@@ -158,13 +160,14 @@ class HeadersParser:
63 except ValueError:
64 raise InvalidHeader(line) from None
65
66+ if len(bname) == 0:
67+ raise InvalidHeader(bname)
68+
69 # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2
70 if {bname[0], bname[-1]} & {32, 9}: # {" ", "\t"}
71 raise InvalidHeader(line)
72
73 bvalue = bvalue.lstrip(b" \t")
74- if HDRRE.search(bname):
75- raise InvalidHeader(bname)
76 if len(bname) > self.max_field_size:
77 raise LineTooLong(
78 "request header name {}".format(
79@@ -173,6 +176,9 @@ class HeadersParser:
80 str(self.max_field_size),
81 str(len(bname)),
82 )
83+ name = bname.decode("utf-8", "surrogateescape")
84+ if not TOKENRE.fullmatch(name):
85+ raise InvalidHeader(bname)
86
87 header_length = len(bvalue)
88
89@@ -220,7 +226,6 @@ class HeadersParser:
90 )
91
92 bvalue = bvalue.strip(b" \t")
93- name = bname.decode("utf-8", "surrogateescape")
94 value = bvalue.decode("utf-8", "surrogateescape")
95
96 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
97@@ -348,7 +353,8 @@ class HttpParser(abc.ABC, Generic[_MsgT]):
98
99 # Shouldn't allow +/- or other number formats.
100 # https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2
101- if not length_hdr.strip(" \t").isdecimal():
102+ # msg.headers is already stripped of leading/trailing wsp
103+ if not DIGITS.fullmatch(length_hdr):
104 raise InvalidHeader(CONTENT_LENGTH)
105
106 return int(length_hdr)
107@@ -582,7 +588,7 @@ class HttpRequestParser(HttpParser[RawRequestMessage]):
108 )
109
110 # method
111- if not METHRE.match(method):
112+ if not TOKENRE.fullmatch(method):
113 raise BadStatusLine(method)
114
115 # version
116@@ -690,8 +696,8 @@ class HttpResponseParser(HttpParser[RawResponseMessage]):
117 raise BadStatusLine(line)
118 version_o = HttpVersion(int(match.group(1)), int(match.group(2)))
119
120- # The status code is a three-digit number
121- if len(status) != 3 or not status.isdecimal():
122+ # The status code is a three-digit ASCII number, no padding
123+ if len(status) != 3 or not DIGITS.fullmatch(status):
124 raise BadStatusLine(line)
125 status_i = int(status)
126
127@@ -844,7 +850,7 @@ class HttpPayloadParser:
128 if self._lax: # Allow whitespace in lax mode.
129 size_b = size_b.strip()
130
131- if not re.fullmatch(HEXDIGIT, size_b):
132+ if not re.fullmatch(HEXDIGITS, size_b):
133 exc = TransferEncodingError(
134 chunk[:pos].decode("ascii", "surrogateescape")
135 )
136diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py
137index 4b185c9..bcf6058 100644
138--- a/tests/test_http_parser.py
139+++ b/tests/test_http_parser.py
140@@ -2,7 +2,8 @@
141
142 import asyncio
143 import re
144-from typing import Any, List
145+from contextlib import nullcontext
146+from typing import Any, Dict, List
147 from unittest import mock
148 from urllib.parse import quote
149
150@@ -168,12 +169,28 @@ def test_cve_2023_37276(parser) -> None:
151 parser.feed_data(text)
152
153
154+@pytest.mark.parametrize(
155+ "rfc9110_5_6_2_token_delim",
156+ r'"(),/:;<=>?@[\]{}',
157+)
158+def test_bad_header_name(parser: Any, rfc9110_5_6_2_token_delim: str) -> None:
159+ text = f"POST / HTTP/1.1\r\nhead{rfc9110_5_6_2_token_delim}er: val\r\n\r\n".encode()
160+ expectation = pytest.raises(http_exceptions.BadHttpMessage)
161+ if rfc9110_5_6_2_token_delim == ":":
162+ # Inserting colon into header just splits name/value earlier.
163+ expectation = nullcontext()
164+ with expectation:
165+ parser.feed_data(text)
166+
167+
168 @pytest.mark.parametrize(
169 "hdr",
170 (
171 # https://www.rfc-editor.org/rfc/rfc9110.html#name-content-length
172 "Content-Length: -5",
173 "Content-Length: +256",
174+ "Content-Length: \N{superscript one}",
175+ "Content-Length: \N{mathematical double-struck digit one}",
176 "Foo: abc\rdef", # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
177 "Bar: abc\ndef",
178 "Baz: abc\x00def",
179@@ -240,6 +257,20 @@ def test_whitespace_before_header(parser) -> None:
180 parser.feed_data(text)
181
182
183+def test_parse_unusual_request_line(parser) -> None:
184+ if not isinstance(response, HttpResponseParserPy):
185+ pytest.xfail("Regression test for Py parser. May match C behaviour later.")
186+ text = b"#smol //a HTTP/1.3\r\n\r\n"
187+ messages, upgrade, tail = parser.feed_data(text)
188+ assert len(messages) == 1
189+ msg, _ = messages[0]
190+ assert msg.compression is None
191+ assert not msg.upgrade
192+ assert msg.method == "#smol"
193+ assert msg.path == "//a"
194+ assert msg.version == (1, 3)
195+
196+
197 def test_parse(parser) -> None:
198 text = b"GET /test HTTP/1.1\r\n\r\n"
199 messages, upgrade, tail = parser.feed_data(text)
200@@ -533,6 +564,43 @@ def test_headers_content_length_err_2(parser) -> None:
201 parser.feed_data(text)
202
203
204+_pad: Dict[bytes, str] = {
205+ b"": "empty",
206+ # not a typo. Python likes triple zero
207+ b"\000": "NUL",
208+ b" ": "SP",
209+ b" ": "SPSP",
210+ # not a typo: both 0xa0 and 0x0a in case of 8-bit fun
211+ b"\n": "LF",
212+ b"\xa0": "NBSP",
213+ b"\t ": "TABSP",
214+}
215+
216+
217+@pytest.mark.parametrize("hdr", [b"", b"foo"], ids=["name-empty", "with-name"])
218+@pytest.mark.parametrize("pad2", _pad.keys(), ids=["post-" + n for n in _pad.values()])
219+@pytest.mark.parametrize("pad1", _pad.keys(), ids=["pre-" + n for n in _pad.values()])
220+def test_invalid_header_spacing(parser, pad1: bytes, pad2: bytes, hdr: bytes) -> None:
221+ text = b"GET /test HTTP/1.1\r\n" b"%s%s%s: value\r\n\r\n" % (pad1, hdr, pad2)
222+ expectation = pytest.raises(http_exceptions.BadHttpMessage)
223+ if pad1 == pad2 == b"" and hdr != b"":
224+ # one entry in param matrix is correct: non-empty name, not padded
225+ expectation = nullcontext()
226+ if pad1 == pad2 == hdr == b"":
227+ if not isinstance(response, HttpResponseParserPy):
228+ pytest.xfail("Regression test for Py parser. May match C behaviour later.")
229+ with expectation:
230+ parser.feed_data(text)
231+
232+
233+def test_empty_header_name(parser) -> None:
234+ if not isinstance(response, HttpResponseParserPy):
235+ pytest.xfail("Regression test for Py parser. May match C behaviour later.")
236+ text = b"GET /test HTTP/1.1\r\n" b":test\r\n\r\n"
237+ with pytest.raises(http_exceptions.BadHttpMessage):
238+ parser.feed_data(text)
239+
240+
241 def test_invalid_header(parser) -> None:
242 text = b"GET /test HTTP/1.1\r\n" b"test line\r\n\r\n"
243 with pytest.raises(http_exceptions.BadHttpMessage):
244@@ -655,6 +723,34 @@ def test_http_request_bad_status_line(parser) -> None:
245 assert r"\n" not in exc_info.value.message
246
247
248+_num: Dict[bytes, str] = {
249+ # dangerous: accepted by Python int()
250+ # unicodedata.category("\U0001D7D9") == 'Nd'
251+ "\N{mathematical double-struck digit one}".encode(): "utf8digit",
252+ # only added for interop tests, refused by Python int()
253+ # unicodedata.category("\U000000B9") == 'No'
254+ "\N{superscript one}".encode(): "utf8number",
255+ "\N{superscript one}".encode("latin-1"): "latin1number",
256+}
257+
258+
259+@pytest.mark.parametrize("nonascii_digit", _num.keys(), ids=_num.values())
260+def test_http_request_bad_status_line_number(
261+ parser: Any, nonascii_digit: bytes
262+) -> None:
263+ text = b"GET /digit HTTP/1." + nonascii_digit + b"\r\n\r\n"
264+ with pytest.raises(http_exceptions.BadStatusLine):
265+ parser.feed_data(text)
266+
267+
268+def test_http_request_bad_status_line_separator(parser: Any) -> None:
269+ # single code point, old, multibyte NFKC, multibyte NFKD
270+ utf8sep = "\N{arabic ligature sallallahou alayhe wasallam}".encode()
271+ text = b"GET /ligature HTTP/1" + utf8sep + b"1\r\n\r\n"
272+ with pytest.raises(http_exceptions.BadStatusLine):
273+ parser.feed_data(text)
274+
275+
276 def test_http_request_upgrade(parser) -> None:
277 text = (
278 b"GET /test HTTP/1.1\r\n"
279@@ -670,6 +766,31 @@ def test_http_request_upgrade(parser) -> None:
280 assert tail == b"some raw data"
281
282
283+def test_http_request_parser_utf8_request_line(parser) -> None:
284+ if not isinstance(response, HttpResponseParserPy):
285+ pytest.xfail("Regression test for Py parser. May match C behaviour later.")
286+ messages, upgrade, tail = parser.feed_data(
287+ # note the truncated unicode sequence
288+ b"GET /P\xc3\xbcnktchen\xa0\xef\xb7 HTTP/1.1\r\n" +
289+ # for easier grep: ASCII 0xA0 more commonly known as non-breaking space
290+ # note the leading and trailing spaces
291+ "sTeP: \N{latin small letter sharp s}nek\t\N{no-break space} "
292+ "\r\n\r\n".encode()
293+ )
294+ msg = messages[0][0]
295+
296+ assert msg.method == "GET"
297+ assert msg.path == "/Pünktchen\udca0\udcef\udcb7"
298+ assert msg.version == (1, 1)
299+ assert msg.headers == CIMultiDict([("STEP", "ßnek\t\xa0")])
300+ assert msg.raw_headers == ((b"sTeP", "ßnek\t\xa0".encode()),)
301+ assert not msg.should_close
302+ assert msg.compression is None
303+ assert not msg.upgrade
304+ assert not msg.chunked
305+ assert msg.url.path == URL("/P%C3%BCnktchen\udca0\udcef\udcb7").path
306+
307+
308 def test_http_request_parser_utf8(parser) -> None:
309 text = "GET /path HTTP/1.1\r\nx-test:тест\r\n\r\n".encode()
310 messages, upgrade, tail = parser.feed_data(text)
311@@ -719,9 +840,15 @@ def test_http_request_parser_two_slashes(parser) -> None:
312 assert not msg.chunked
313
314
315-def test_http_request_parser_bad_method(parser) -> None:
316+@pytest.mark.parametrize(
317+ "rfc9110_5_6_2_token_delim",
318+ [bytes([i]) for i in rb'"(),/:;<=>?@[\]{}'],
319+)
320+def test_http_request_parser_bad_method(
321+ parser, rfc9110_5_6_2_token_delim: bytes
322+) -> None:
323 with pytest.raises(http_exceptions.BadStatusLine):
324- parser.feed_data(b'=":<G>(e),[T];?" /get HTTP/1.1\r\n\r\n')
325+ parser.feed_data(rfc9110_5_6_2_token_delim + b'ET" /get HTTP/1.1\r\n\r\n')
326
327
328 def test_http_request_parser_bad_version(parser) -> None:
329@@ -907,6 +1034,12 @@ def test_http_response_parser_code_not_int(response) -> None:
330 response.feed_data(b"HTTP/1.1 ttt test\r\n\r\n")
331
332
333+@pytest.mark.parametrize("nonascii_digit", _num.keys(), ids=_num.values())
334+def test_http_response_parser_code_not_ascii(response, nonascii_digit: bytes) -> None:
335+ with pytest.raises(http_exceptions.BadStatusLine):
336+ response.feed_data(b"HTTP/1.1 20" + nonascii_digit + b" test\r\n\r\n")
337+
338+
339 def test_http_request_chunked_payload(parser) -> None:
340 text = b"GET /test HTTP/1.1\r\n" b"transfer-encoding: chunked\r\n\r\n"
341 msg, payload = parser.feed_data(text)[0][0]
342--
3432.40.0
344