1 files changed, 344 insertions, 0 deletions
diff --git a/meta-python/recipes-devtools/python/python3-aiohttp/CVE-2024-23829.patch b/meta-python/recipes-devtools/python/python3-aiohttp/CVE-2024-23829.patch
new file mode 100644
index 0000000000..6da5b5ba42
--- /dev/null
+++ b/meta-python/recipes-devtools/python/python3-aiohttp/CVE-2024-23829.patch
@@ -0,0 +1,344 @@
+From d33bc21414e283c9e6fe7f6caf69e2ed60d66c82 Mon Sep 17 00:00:00 2001
+From: Sam Bull <git@sambull.org>
+Date: Sun, 28 Jan 2024 17:09:58 +0000
+Subject: [PATCH] Improve validation in HTTP parser (#8074) (#8078)
+Co-authored-by: Paul J. Dorn <pajod@users.noreply.github.com>
+Co-authored-by: Sviatoslav Sydorenko (Святослав Сидоренко)
+<sviat@redhat.com>
+(cherry picked from commit 33ccdfb0a12690af5bb49bda2319ec0907fa7827)
+CVE: CVE-2024-23829
+Upstream-Status: Backport [https://github.com/aio-libs/aiohttp/commit/d33bc21414e283c9e6fe7f6caf69e2ed60d66c82]
+Signed-off-by: Soumya Sambu <soumya.sambu@windriver.com>
+---
+ CONTRIBUTORS.txt          |   1 +
+ aiohttp/http_parser.py    |  30 ++++----
+ tests/test_http_parser.py | 139 +++++++++++++++++++++++++++++++++++++-
+ 3 files changed, 155 insertions(+), 15 deletions(-)
+diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt
+index f8a8df5..b9cdf75 100644
+--- a/CONTRIBUTORS.txt
+++ b/CONTRIBUTORS.txt
+@@ -240,6 +240,7 @@ Panagiotis Kolokotronis
+ Pankaj Pandey
+ Pau Freixes
+ Paul Colomiets
+Paul J. Dorn
+ Paulius Šileikis
+ Paulus Schoutsen
+ Pavel Kamaev
+diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py
+index 175eb7f..91784b3 100644
+--- a/aiohttp/http_parser.py
+++ b/aiohttp/http_parser.py
+@@ -76,10 +76,11 @@ ASCIISET: Final[Set[str]] = set(string.printable)
+ #     tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
+ #             "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
+ #     token = 1*tchar
+-METHRE: Final[Pattern[str]] = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+")
+-VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d).(\d)")
+-HDRRE: Final[Pattern[bytes]] = re.compile(rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\"\\]")
+-HEXDIGIT = re.compile(rb"[0-9a-fA-F]+")
+_TCHAR_SPECIALS: Final[str] = re.escape("!#$%&'*+-.^_`|~")
+TOKENRE: Final[Pattern[str]] = re.compile(f"[0-9A-Za-z{_TCHAR_SPECIALS}]+")
+VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d)\.(\d)", re.ASCII)
+DIGITS: Final[Pattern[str]] = re.compile(r"\d+", re.ASCII)
+HEXDIGITS: Final[Pattern[bytes]] = re.compile(rb"[0-9a-fA-F]+")
+ 
+ 
+ class RawRequestMessage(NamedTuple):
+@@ -145,6 +146,7 @@ class HeadersParser:
+         self, lines: List[bytes]
+     ) -> Tuple["CIMultiDictProxy[str]", RawHeaders]:
+         headers: CIMultiDict[str] = CIMultiDict()
+        # note: "raw" does not mean inclusion of OWS before/after the field value
+         raw_headers = []
+ 
+         lines_idx = 0
+@@ -158,13 +160,14 @@ class HeadersParser:
+             except ValueError:
+                 raise InvalidHeader(line) from None
+ 
+            if len(bname) == 0:
+                raise InvalidHeader(bname)
+
+             # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2
+             if {bname[0], bname[-1]} & {32, 9}:  # {" ", "\t"}
+                 raise InvalidHeader(line)
+ 
+             bvalue = bvalue.lstrip(b" \t")
+-            if HDRRE.search(bname):
+-                raise InvalidHeader(bname)
+             if len(bname) > self.max_field_size:
+                 raise LineTooLong(
+                     "request header name {}".format(
+@@ -173,6 +176,9 @@ class HeadersParser:
+                     str(self.max_field_size),
+                     str(len(bname)),
+                 )
+            name = bname.decode("utf-8", "surrogateescape")
+            if not TOKENRE.fullmatch(name):
+                raise InvalidHeader(bname)
+ 
+             header_length = len(bvalue)
+ 
+@@ -220,7 +226,6 @@ class HeadersParser:
+                     )
+ 
+             bvalue = bvalue.strip(b" \t")
+-            name = bname.decode("utf-8", "surrogateescape")
+             value = bvalue.decode("utf-8", "surrogateescape")
+ 
+             # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
+@@ -348,7 +353,8 @@ class HttpParser(abc.ABC, Generic[_MsgT]):
+ 
+                             # Shouldn't allow +/- or other number formats.
+                             # https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2
+-                            if not length_hdr.strip(" \t").isdecimal():
+                            # msg.headers is already stripped of leading/trailing wsp
+                            if not DIGITS.fullmatch(length_hdr):
+                                 raise InvalidHeader(CONTENT_LENGTH)
+ 
+                             return int(length_hdr)
+@@ -582,7 +588,7 @@ class HttpRequestParser(HttpParser[RawRequestMessage]):
+             )
+ 
+         # method
+-        if not METHRE.match(method):
+        if not TOKENRE.fullmatch(method):
+             raise BadStatusLine(method)
+ 
+         # version
+@@ -690,8 +696,8 @@ class HttpResponseParser(HttpParser[RawResponseMessage]):
+             raise BadStatusLine(line)
+         version_o = HttpVersion(int(match.group(1)), int(match.group(2)))
+ 
+-        # The status code is a three-digit number
+-        if len(status) != 3 or not status.isdecimal():
+        # The status code is a three-digit ASCII number, no padding
+        if len(status) != 3 or not DIGITS.fullmatch(status):
+             raise BadStatusLine(line)
+         status_i = int(status)
+ 
+@@ -844,7 +850,7 @@ class HttpPayloadParser:
+                         if self._lax:  # Allow whitespace in lax mode.
+                             size_b = size_b.strip()
+ 
+-                        if not re.fullmatch(HEXDIGIT, size_b):
+                        if not re.fullmatch(HEXDIGITS, size_b):
+                             exc = TransferEncodingError(
+                                 chunk[:pos].decode("ascii", "surrogateescape")
+                             )
+diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py
+index 4b185c9..bcf6058 100644
+--- a/tests/test_http_parser.py
+++ b/tests/test_http_parser.py
+@@ -2,7 +2,8 @@
+ 
+ import asyncio
+ import re
+-from typing import Any, List
+from contextlib import nullcontext
+from typing import Any, Dict, List
+ from unittest import mock
+ from urllib.parse import quote
+ 
+@@ -168,12 +169,28 @@ def test_cve_2023_37276(parser) -> None:
+         parser.feed_data(text)
+ 
+ 
+@pytest.mark.parametrize(
+    "rfc9110_5_6_2_token_delim",
+    r'"(),/:;<=>?@[\]{}',
+)
+def test_bad_header_name(parser: Any, rfc9110_5_6_2_token_delim: str) -> None:
+    text = f"POST / HTTP/1.1\r\nhead{rfc9110_5_6_2_token_delim}er: val\r\n\r\n".encode()
+    expectation = pytest.raises(http_exceptions.BadHttpMessage)
+    if rfc9110_5_6_2_token_delim == ":":
+        # Inserting colon into header just splits name/value earlier.
+        expectation = nullcontext()
+    with expectation:
+        parser.feed_data(text)
+
+
+ @pytest.mark.parametrize(
+     "hdr",
+     (
+         # https://www.rfc-editor.org/rfc/rfc9110.html#name-content-length
+         "Content-Length: -5",
+         "Content-Length: +256",
+        "Content-Length: \N{superscript one}",
+        "Content-Length: \N{mathematical double-struck digit one}",
+         "Foo: abc\rdef",  # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
+         "Bar: abc\ndef",
+         "Baz: abc\x00def",
+@@ -240,6 +257,20 @@ def test_whitespace_before_header(parser) -> None:
+         parser.feed_data(text)
+ 
+ 
+def test_parse_unusual_request_line(parser) -> None:
+    if not isinstance(response, HttpResponseParserPy):
+        pytest.xfail("Regression test for Py parser. May match C behaviour later.")
+    text = b"#smol //a HTTP/1.3\r\n\r\n"
+    messages, upgrade, tail = parser.feed_data(text)
+    assert len(messages) == 1
+    msg, _ = messages[0]
+    assert msg.compression is None
+    assert not msg.upgrade
+    assert msg.method == "#smol"
+    assert msg.path == "//a"
+    assert msg.version == (1, 3)
+
+
+ def test_parse(parser) -> None:
+     text = b"GET /test HTTP/1.1\r\n\r\n"
+     messages, upgrade, tail = parser.feed_data(text)
+@@ -533,6 +564,43 @@ def test_headers_content_length_err_2(parser) -> None:
+         parser.feed_data(text)
+ 
+ 
+_pad: Dict[bytes, str] = {
+    b"": "empty",
+    # not a typo. Python likes triple zero
+    b"\000": "NUL",
+    b" ": "SP",
+    b"  ": "SPSP",
+    # not a typo: both 0xa0 and 0x0a in case of 8-bit fun
+    b"\n": "LF",
+    b"\xa0": "NBSP",
+    b"\t ": "TABSP",
+}
+
+
+@pytest.mark.parametrize("hdr", [b"", b"foo"], ids=["name-empty", "with-name"])
+@pytest.mark.parametrize("pad2", _pad.keys(), ids=["post-" + n for n in _pad.values()])
+@pytest.mark.parametrize("pad1", _pad.keys(), ids=["pre-" + n for n in _pad.values()])
+def test_invalid_header_spacing(parser, pad1: bytes, pad2: bytes, hdr: bytes) -> None:
+    text = b"GET /test HTTP/1.1\r\n" b"%s%s%s: value\r\n\r\n" % (pad1, hdr, pad2)
+    expectation = pytest.raises(http_exceptions.BadHttpMessage)
+    if pad1 == pad2 == b"" and hdr != b"":
+        # one entry in param matrix is correct: non-empty name, not padded
+        expectation = nullcontext()
+    if pad1 == pad2 == hdr == b"":
+        if not isinstance(response, HttpResponseParserPy):
+            pytest.xfail("Regression test for Py parser. May match C behaviour later.")
+    with expectation:
+        parser.feed_data(text)
+
+
+def test_empty_header_name(parser) -> None:
+    if not isinstance(response, HttpResponseParserPy):
+        pytest.xfail("Regression test for Py parser. May match C behaviour later.")
+    text = b"GET /test HTTP/1.1\r\n" b":test\r\n\r\n"
+    with pytest.raises(http_exceptions.BadHttpMessage):
+        parser.feed_data(text)
+
+
+ def test_invalid_header(parser) -> None:
+     text = b"GET /test HTTP/1.1\r\n" b"test line\r\n\r\n"
+     with pytest.raises(http_exceptions.BadHttpMessage):
+@@ -655,6 +723,34 @@ def test_http_request_bad_status_line(parser) -> None:
+     assert r"\n" not in exc_info.value.message
+ 
+ 
+_num: Dict[bytes, str] = {
+    # dangerous: accepted by Python int()
+    # unicodedata.category("\U0001D7D9") == 'Nd'
+    "\N{mathematical double-struck digit one}".encode(): "utf8digit",
+    # only added for interop tests, refused by Python int()
+    # unicodedata.category("\U000000B9") == 'No'
+    "\N{superscript one}".encode(): "utf8number",
+    "\N{superscript one}".encode("latin-1"): "latin1number",
+}
+
+
+@pytest.mark.parametrize("nonascii_digit", _num.keys(), ids=_num.values())
+def test_http_request_bad_status_line_number(
+    parser: Any, nonascii_digit: bytes
+) -> None:
+    text = b"GET /digit HTTP/1." + nonascii_digit + b"\r\n\r\n"
+    with pytest.raises(http_exceptions.BadStatusLine):
+        parser.feed_data(text)
+
+
+def test_http_request_bad_status_line_separator(parser: Any) -> None:
+    # single code point, old, multibyte NFKC, multibyte NFKD
+    utf8sep = "\N{arabic ligature sallallahou alayhe wasallam}".encode()
+    text = b"GET /ligature HTTP/1" + utf8sep + b"1\r\n\r\n"
+    with pytest.raises(http_exceptions.BadStatusLine):
+        parser.feed_data(text)
+
+
+ def test_http_request_upgrade(parser) -> None:
+     text = (
+         b"GET /test HTTP/1.1\r\n"
+@@ -670,6 +766,31 @@ def test_http_request_upgrade(parser) -> None:
+     assert tail == b"some raw data"
+ 
+ 
+def test_http_request_parser_utf8_request_line(parser) -> None:
+    if not isinstance(response, HttpResponseParserPy):
+        pytest.xfail("Regression test for Py parser. May match C behaviour later.")
+    messages, upgrade, tail = parser.feed_data(
+        # note the truncated unicode sequence
+        b"GET /P\xc3\xbcnktchen\xa0\xef\xb7 HTTP/1.1\r\n" +
+        # for easier grep: ASCII 0xA0 more commonly known as non-breaking space
+        # note the leading and trailing spaces
+        "sTeP:  \N{latin small letter sharp s}nek\t\N{no-break space}  "
+        "\r\n\r\n".encode()
+    )
+    msg = messages[0][0]
+
+    assert msg.method == "GET"
+    assert msg.path == "/Pünktchen\udca0\udcef\udcb7"
+    assert msg.version == (1, 1)
+    assert msg.headers == CIMultiDict([("STEP", "ßnek\t\xa0")])
+    assert msg.raw_headers == ((b"sTeP", "ßnek\t\xa0".encode()),)
+    assert not msg.should_close
+    assert msg.compression is None
+    assert not msg.upgrade
+    assert not msg.chunked
+    assert msg.url.path == URL("/P%C3%BCnktchen\udca0\udcef\udcb7").path
+
+
+ def test_http_request_parser_utf8(parser) -> None:
+     text = "GET /path HTTP/1.1\r\nx-test:тест\r\n\r\n".encode()
+     messages, upgrade, tail = parser.feed_data(text)
+@@ -719,9 +840,15 @@ def test_http_request_parser_two_slashes(parser) -> None:
+     assert not msg.chunked
+ 
+ 
+-def test_http_request_parser_bad_method(parser) -> None:
+@pytest.mark.parametrize(
+    "rfc9110_5_6_2_token_delim",
+    [bytes([i]) for i in rb'"(),/:;<=>?@[\]{}'],
+)
+def test_http_request_parser_bad_method(
+    parser, rfc9110_5_6_2_token_delim: bytes
+) -> None:
+     with pytest.raises(http_exceptions.BadStatusLine):
+-        parser.feed_data(b'=":<G>(e),[T];?" /get HTTP/1.1\r\n\r\n')
+        parser.feed_data(rfc9110_5_6_2_token_delim + b'ET" /get HTTP/1.1\r\n\r\n')
+ 
+ 
+ def test_http_request_parser_bad_version(parser) -> None:
+@@ -907,6 +1034,12 @@ def test_http_response_parser_code_not_int(response) -> None:
+         response.feed_data(b"HTTP/1.1 ttt test\r\n\r\n")
+ 
+ 
+@pytest.mark.parametrize("nonascii_digit", _num.keys(), ids=_num.values())
+def test_http_response_parser_code_not_ascii(response, nonascii_digit: bytes) -> None:
+    with pytest.raises(http_exceptions.BadStatusLine):
+        response.feed_data(b"HTTP/1.1 20" + nonascii_digit + b" test\r\n\r\n")
+
+
+ def test_http_request_chunked_payload(parser) -> None:
+     text = b"GET /test HTTP/1.1\r\n" b"transfer-encoding: chunked\r\n\r\n"
+     msg, payload = parser.feed_data(text)[0][0]
+-- 
+2.40.0

diff --git a/meta-python/recipes-devtools/python/python3-aiohttp/CVE-2024-23829.patch b/meta-python/recipes-devtools/python/python3-aiohttp/CVE-2024-23829.patch new file mode 100644 index 0000000000..6da5b5ba42 --- /dev/null +++ b/meta-python/recipes-devtools/python/python3-aiohttp/CVE-2024-23829.patch
@@ -0,0 +1,344 @@
	1	From d33bc21414e283c9e6fe7f6caf69e2ed60d66c82 Mon Sep 17 00:00:00 2001
	2	From: Sam Bull <git@sambull.org>
	3	Date: Sun, 28 Jan 2024 17:09:58 +0000
	4	Subject: [PATCH] Improve validation in HTTP parser (#8074) (#8078)
	5
	6	Co-authored-by: Paul J. Dorn <pajod@users.noreply.github.com>
	7	Co-authored-by: Sviatoslav Sydorenko (Святослав Сидоренко)
	8	<sviat@redhat.com>
	9	(cherry picked from commit 33ccdfb0a12690af5bb49bda2319ec0907fa7827)
	10
	11	CVE: CVE-2024-23829
	12
	13	Upstream-Status: Backport [https://github.com/aio-libs/aiohttp/commit/d33bc21414e283c9e6fe7f6caf69e2ed60d66c82]
	14
	15	Signed-off-by: Soumya Sambu <soumya.sambu@windriver.com>
	16	---
	17	CONTRIBUTORS.txt \| 1 +
	18	aiohttp/http_parser.py \| 30 ++++----
	19	tests/test_http_parser.py \| 139 +++++++++++++++++++++++++++++++++++++-
	20	3 files changed, 155 insertions(+), 15 deletions(-)
	21
	22	diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt
	23	index f8a8df5..b9cdf75 100644
	24	--- a/CONTRIBUTORS.txt
	25	+++ b/CONTRIBUTORS.txt
	26	@@ -240,6 +240,7 @@ Panagiotis Kolokotronis
	27	Pankaj Pandey
	28	Pau Freixes
	29	Paul Colomiets
	30	+Paul J. Dorn
	31	Paulius Šileikis
	32	Paulus Schoutsen
	33	Pavel Kamaev
	34	diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py
	35	index 175eb7f..91784b3 100644
	36	--- a/aiohttp/http_parser.py
	37	+++ b/aiohttp/http_parser.py
	38	@@ -76,10 +76,11 @@ ASCIISET: Final[Set[str]] = set(string.printable)
	39	# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
	40	# "^" / "_" / "`" / "\|" / "~" / DIGIT / ALPHA
	41	# token = 1*tchar
	42	-METHRE: Final[Pattern[str]] = re.compile(r"[!#$%&'*+\-.^_`\|~0-9A-Za-z]+")
	43	-VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d).(\d)")
	44	-HDRRE: Final[Pattern[bytes]] = re.compile(rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\"\\]")
	45	-HEXDIGIT = re.compile(rb"[0-9a-fA-F]+")
	46	+_TCHAR_SPECIALS: Final[str] = re.escape("!#$%&'*+-.^_`\|~")
	47	+TOKENRE: Final[Pattern[str]] = re.compile(f"[0-9A-Za-z{_TCHAR_SPECIALS}]+")
	48	+VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d)\.(\d)", re.ASCII)
	49	+DIGITS: Final[Pattern[str]] = re.compile(r"\d+", re.ASCII)
	50	+HEXDIGITS: Final[Pattern[bytes]] = re.compile(rb"[0-9a-fA-F]+")
	51
	52
	53	class RawRequestMessage(NamedTuple):
	54	@@ -145,6 +146,7 @@ class HeadersParser:
	55	self, lines: List[bytes]
	56	) -> Tuple["CIMultiDictProxy[str]", RawHeaders]:
	57	headers: CIMultiDict[str] = CIMultiDict()
	58	+ # note: "raw" does not mean inclusion of OWS before/after the field value
	59	raw_headers = []
	60
	61	lines_idx = 0
	62	@@ -158,13 +160,14 @@ class HeadersParser:
	63	except ValueError:
	64	raise InvalidHeader(line) from None
	65
	66	+ if len(bname) == 0:
	67	+ raise InvalidHeader(bname)
	68	+
	69	# https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2
	70	if {bname[0], bname[-1]} & {32, 9}: # {" ", "\t"}
	71	raise InvalidHeader(line)
	72
	73	bvalue = bvalue.lstrip(b" \t")
	74	- if HDRRE.search(bname):
	75	- raise InvalidHeader(bname)
	76	if len(bname) > self.max_field_size:
	77	raise LineTooLong(
	78	"request header name {}".format(
	79	@@ -173,6 +176,9 @@ class HeadersParser:
	80	str(self.max_field_size),
	81	str(len(bname)),
	82	)
	83	+ name = bname.decode("utf-8", "surrogateescape")
	84	+ if not TOKENRE.fullmatch(name):
	85	+ raise InvalidHeader(bname)
	86
	87	header_length = len(bvalue)
	88
	89	@@ -220,7 +226,6 @@ class HeadersParser:
	90	)
	91
	92	bvalue = bvalue.strip(b" \t")
	93	- name = bname.decode("utf-8", "surrogateescape")
	94	value = bvalue.decode("utf-8", "surrogateescape")
	95
	96	# https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
	97	@@ -348,7 +353,8 @@ class HttpParser(abc.ABC, Generic[_MsgT]):
	98
	99	# Shouldn't allow +/- or other number formats.
	100	# https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2
	101	- if not length_hdr.strip(" \t").isdecimal():
	102	+ # msg.headers is already stripped of leading/trailing wsp
	103	+ if not DIGITS.fullmatch(length_hdr):
	104	raise InvalidHeader(CONTENT_LENGTH)
	105
	106	return int(length_hdr)
	107	@@ -582,7 +588,7 @@ class HttpRequestParser(HttpParser[RawRequestMessage]):
	108	)
	109
	110	# method
	111	- if not METHRE.match(method):
	112	+ if not TOKENRE.fullmatch(method):
	113	raise BadStatusLine(method)
	114
	115	# version
	116	@@ -690,8 +696,8 @@ class HttpResponseParser(HttpParser[RawResponseMessage]):
	117	raise BadStatusLine(line)
	118	version_o = HttpVersion(int(match.group(1)), int(match.group(2)))
	119
	120	- # The status code is a three-digit number
	121	- if len(status) != 3 or not status.isdecimal():
	122	+ # The status code is a three-digit ASCII number, no padding
	123	+ if len(status) != 3 or not DIGITS.fullmatch(status):
	124	raise BadStatusLine(line)
	125	status_i = int(status)
	126
	127	@@ -844,7 +850,7 @@ class HttpPayloadParser:
	128	if self._lax: # Allow whitespace in lax mode.
	129	size_b = size_b.strip()
	130
	131	- if not re.fullmatch(HEXDIGIT, size_b):
	132	+ if not re.fullmatch(HEXDIGITS, size_b):
	133	exc = TransferEncodingError(
	134	chunk[:pos].decode("ascii", "surrogateescape")
	135	)
	136	diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py
	137	index 4b185c9..bcf6058 100644
	138	--- a/tests/test_http_parser.py
	139	+++ b/tests/test_http_parser.py
	140	@@ -2,7 +2,8 @@
	141
	142	import asyncio
	143	import re
	144	-from typing import Any, List
	145	+from contextlib import nullcontext
	146	+from typing import Any, Dict, List
	147	from unittest import mock
	148	from urllib.parse import quote
	149
	150	@@ -168,12 +169,28 @@ def test_cve_2023_37276(parser) -> None:
	151	parser.feed_data(text)
	152
	153
	154	+@pytest.mark.parametrize(
	155	+ "rfc9110_5_6_2_token_delim",
	156	+ r'"(),/:;<=>?@[\]{}',
	157	+)
	158	+def test_bad_header_name(parser: Any, rfc9110_5_6_2_token_delim: str) -> None:
	159	+ text = f"POST / HTTP/1.1\r\nhead{rfc9110_5_6_2_token_delim}er: val\r\n\r\n".encode()
	160	+ expectation = pytest.raises(http_exceptions.BadHttpMessage)
	161	+ if rfc9110_5_6_2_token_delim == ":":
	162	+ # Inserting colon into header just splits name/value earlier.
	163	+ expectation = nullcontext()
	164	+ with expectation:
	165	+ parser.feed_data(text)
	166	+
	167	+
	168	@pytest.mark.parametrize(
	169	"hdr",
	170	(
	171	# https://www.rfc-editor.org/rfc/rfc9110.html#name-content-length
	172	"Content-Length: -5",
	173	"Content-Length: +256",
	174	+ "Content-Length: \N{superscript one}",
	175	+ "Content-Length: \N{mathematical double-struck digit one}",
	176	"Foo: abc\rdef", # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
	177	"Bar: abc\ndef",
	178	"Baz: abc\x00def",
	179	@@ -240,6 +257,20 @@ def test_whitespace_before_header(parser) -> None:
	180	parser.feed_data(text)
	181
	182
	183	+def test_parse_unusual_request_line(parser) -> None:
	184	+ if not isinstance(response, HttpResponseParserPy):
	185	+ pytest.xfail("Regression test for Py parser. May match C behaviour later.")
	186	+ text = b"#smol //a HTTP/1.3\r\n\r\n"
	187	+ messages, upgrade, tail = parser.feed_data(text)
	188	+ assert len(messages) == 1
	189	+ msg, _ = messages[0]
	190	+ assert msg.compression is None
	191	+ assert not msg.upgrade
	192	+ assert msg.method == "#smol"
	193	+ assert msg.path == "//a"
	194	+ assert msg.version == (1, 3)
	195	+
	196	+
	197	def test_parse(parser) -> None:
	198	text = b"GET /test HTTP/1.1\r\n\r\n"
	199	messages, upgrade, tail = parser.feed_data(text)
	200	@@ -533,6 +564,43 @@ def test_headers_content_length_err_2(parser) -> None:
	201	parser.feed_data(text)
	202
	203
	204	+_pad: Dict[bytes, str] = {
	205	+ b"": "empty",
	206	+ # not a typo. Python likes triple zero
	207	+ b"\000": "NUL",
	208	+ b" ": "SP",
	209	+ b" ": "SPSP",
	210	+ # not a typo: both 0xa0 and 0x0a in case of 8-bit fun
	211	+ b"\n": "LF",
	212	+ b"\xa0": "NBSP",
	213	+ b"\t ": "TABSP",
	214	+}
	215	+
	216	+
	217	+@pytest.mark.parametrize("hdr", [b"", b"foo"], ids=["name-empty", "with-name"])
	218	+@pytest.mark.parametrize("pad2", _pad.keys(), ids=["post-" + n for n in _pad.values()])
	219	+@pytest.mark.parametrize("pad1", _pad.keys(), ids=["pre-" + n for n in _pad.values()])
	220	+def test_invalid_header_spacing(parser, pad1: bytes, pad2: bytes, hdr: bytes) -> None:
	221	+ text = b"GET /test HTTP/1.1\r\n" b"%s%s%s: value\r\n\r\n" % (pad1, hdr, pad2)
	222	+ expectation = pytest.raises(http_exceptions.BadHttpMessage)
	223	+ if pad1 == pad2 == b"" and hdr != b"":
	224	+ # one entry in param matrix is correct: non-empty name, not padded
	225	+ expectation = nullcontext()
	226	+ if pad1 == pad2 == hdr == b"":
	227	+ if not isinstance(response, HttpResponseParserPy):
	228	+ pytest.xfail("Regression test for Py parser. May match C behaviour later.")
	229	+ with expectation:
	230	+ parser.feed_data(text)
	231	+
	232	+
	233	+def test_empty_header_name(parser) -> None:
	234	+ if not isinstance(response, HttpResponseParserPy):
	235	+ pytest.xfail("Regression test for Py parser. May match C behaviour later.")
	236	+ text = b"GET /test HTTP/1.1\r\n" b":test\r\n\r\n"
	237	+ with pytest.raises(http_exceptions.BadHttpMessage):
	238	+ parser.feed_data(text)
	239	+
	240	+
	241	def test_invalid_header(parser) -> None:
	242	text = b"GET /test HTTP/1.1\r\n" b"test line\r\n\r\n"
	243	with pytest.raises(http_exceptions.BadHttpMessage):
	244	@@ -655,6 +723,34 @@ def test_http_request_bad_status_line(parser) -> None:
	245	assert r"\n" not in exc_info.value.message
	246
	247
	248	+_num: Dict[bytes, str] = {
	249	+ # dangerous: accepted by Python int()
	250	+ # unicodedata.category("\U0001D7D9") == 'Nd'
	251	+ "\N{mathematical double-struck digit one}".encode(): "utf8digit",
	252	+ # only added for interop tests, refused by Python int()
	253	+ # unicodedata.category("\U000000B9") == 'No'
	254	+ "\N{superscript one}".encode(): "utf8number",
	255	+ "\N{superscript one}".encode("latin-1"): "latin1number",
	256	+}
	257	+
	258	+
	259	+@pytest.mark.parametrize("nonascii_digit", _num.keys(), ids=_num.values())
	260	+def test_http_request_bad_status_line_number(
	261	+ parser: Any, nonascii_digit: bytes
	262	+) -> None:
	263	+ text = b"GET /digit HTTP/1." + nonascii_digit + b"\r\n\r\n"
	264	+ with pytest.raises(http_exceptions.BadStatusLine):
	265	+ parser.feed_data(text)
	266	+
	267	+
	268	+def test_http_request_bad_status_line_separator(parser: Any) -> None:
	269	+ # single code point, old, multibyte NFKC, multibyte NFKD
	270	+ utf8sep = "\N{arabic ligature sallallahou alayhe wasallam}".encode()
	271	+ text = b"GET /ligature HTTP/1" + utf8sep + b"1\r\n\r\n"
	272	+ with pytest.raises(http_exceptions.BadStatusLine):
	273	+ parser.feed_data(text)
	274	+
	275	+
	276	def test_http_request_upgrade(parser) -> None:
	277	text = (
	278	b"GET /test HTTP/1.1\r\n"
	279	@@ -670,6 +766,31 @@ def test_http_request_upgrade(parser) -> None:
	280	assert tail == b"some raw data"
	281
	282
	283	+def test_http_request_parser_utf8_request_line(parser) -> None:
	284	+ if not isinstance(response, HttpResponseParserPy):
	285	+ pytest.xfail("Regression test for Py parser. May match C behaviour later.")
	286	+ messages, upgrade, tail = parser.feed_data(
	287	+ # note the truncated unicode sequence
	288	+ b"GET /P\xc3\xbcnktchen\xa0\xef\xb7 HTTP/1.1\r\n" +
	289	+ # for easier grep: ASCII 0xA0 more commonly known as non-breaking space
	290	+ # note the leading and trailing spaces
	291	+ "sTeP: \N{latin small letter sharp s}nek\t\N{no-break space} "
	292	+ "\r\n\r\n".encode()
	293	+ )
	294	+ msg = messages[0][0]
	295	+
	296	+ assert msg.method == "GET"
	297	+ assert msg.path == "/Pünktchen\udca0\udcef\udcb7"
	298	+ assert msg.version == (1, 1)
	299	+ assert msg.headers == CIMultiDict([("STEP", "ßnek\t\xa0")])
	300	+ assert msg.raw_headers == ((b"sTeP", "ßnek\t\xa0".encode()),)
	301	+ assert not msg.should_close
	302	+ assert msg.compression is None
	303	+ assert not msg.upgrade
	304	+ assert not msg.chunked
	305	+ assert msg.url.path == URL("/P%C3%BCnktchen\udca0\udcef\udcb7").path
	306	+
	307	+
	308	def test_http_request_parser_utf8(parser) -> None:
	309	text = "GET /path HTTP/1.1\r\nx-test:тест\r\n\r\n".encode()
	310	messages, upgrade, tail = parser.feed_data(text)
	311	@@ -719,9 +840,15 @@ def test_http_request_parser_two_slashes(parser) -> None:
	312	assert not msg.chunked
	313
	314
	315	-def test_http_request_parser_bad_method(parser) -> None:
	316	+@pytest.mark.parametrize(
	317	+ "rfc9110_5_6_2_token_delim",
	318	+ [bytes([i]) for i in rb'"(),/:;<=>?@[\]{}'],
	319	+)
	320	+def test_http_request_parser_bad_method(
	321	+ parser, rfc9110_5_6_2_token_delim: bytes
	322	+) -> None:
	323	with pytest.raises(http_exceptions.BadStatusLine):
	324	- parser.feed_data(b'=":<G>(e),[T];?" /get HTTP/1.1\r\n\r\n')
	325	+ parser.feed_data(rfc9110_5_6_2_token_delim + b'ET" /get HTTP/1.1\r\n\r\n')
	326
	327
	328	def test_http_request_parser_bad_version(parser) -> None:
	329	@@ -907,6 +1034,12 @@ def test_http_response_parser_code_not_int(response) -> None:
	330	response.feed_data(b"HTTP/1.1 ttt test\r\n\r\n")
	331
	332
	333	+@pytest.mark.parametrize("nonascii_digit", _num.keys(), ids=_num.values())
	334	+def test_http_response_parser_code_not_ascii(response, nonascii_digit: bytes) -> None:
	335	+ with pytest.raises(http_exceptions.BadStatusLine):
	336	+ response.feed_data(b"HTTP/1.1 20" + nonascii_digit + b" test\r\n\r\n")
	337	+
	338	+
	339	def test_http_request_chunked_payload(parser) -> None:
	340	text = b"GET /test HTTP/1.1\r\n" b"transfer-encoding: chunked\r\n\r\n"
	341	msg, payload = parser.feed_data(text)[0][0]
	342	--
	343	2.40.0
	344