1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
|
From d33bc21414e283c9e6fe7f6caf69e2ed60d66c82 Mon Sep 17 00:00:00 2001
From: Sam Bull <git@sambull.org>
Date: Sun, 28 Jan 2024 17:09:58 +0000
Subject: [PATCH] Improve validation in HTTP parser (#8074) (#8078)
Co-authored-by: Paul J. Dorn <pajod@users.noreply.github.com>
Co-authored-by: Sviatoslav Sydorenko (Святослав Сидоренко)
<sviat@redhat.com>
(cherry picked from commit 33ccdfb0a12690af5bb49bda2319ec0907fa7827)
CVE: CVE-2024-23829
Upstream-Status: Backport [https://github.com/aio-libs/aiohttp/commit/d33bc21414e283c9e6fe7f6caf69e2ed60d66c82]
Signed-off-by: Soumya Sambu <soumya.sambu@windriver.com>
---
CONTRIBUTORS.txt | 1 +
aiohttp/http_parser.py | 30 ++++----
tests/test_http_parser.py | 139 +++++++++++++++++++++++++++++++++++++-
3 files changed, 155 insertions(+), 15 deletions(-)
diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt
index f8a8df5..b9cdf75 100644
--- a/CONTRIBUTORS.txt
+++ b/CONTRIBUTORS.txt
@@ -240,6 +240,7 @@ Panagiotis Kolokotronis
Pankaj Pandey
Pau Freixes
Paul Colomiets
+Paul J. Dorn
Paulius Šileikis
Paulus Schoutsen
Pavel Kamaev
diff --git a/aiohttp/http_parser.py b/aiohttp/http_parser.py
index 175eb7f..91784b3 100644
--- a/aiohttp/http_parser.py
+++ b/aiohttp/http_parser.py
@@ -76,10 +76,11 @@ ASCIISET: Final[Set[str]] = set(string.printable)
# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
# "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
# token = 1*tchar
-METHRE: Final[Pattern[str]] = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+")
-VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d).(\d)")
-HDRRE: Final[Pattern[bytes]] = re.compile(rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\"\\]")
-HEXDIGIT = re.compile(rb"[0-9a-fA-F]+")
+_TCHAR_SPECIALS: Final[str] = re.escape("!#$%&'*+-.^_`|~")
+TOKENRE: Final[Pattern[str]] = re.compile(f"[0-9A-Za-z{_TCHAR_SPECIALS}]+")
+VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d)\.(\d)", re.ASCII)
+DIGITS: Final[Pattern[str]] = re.compile(r"\d+", re.ASCII)
+HEXDIGITS: Final[Pattern[bytes]] = re.compile(rb"[0-9a-fA-F]+")
class RawRequestMessage(NamedTuple):
@@ -145,6 +146,7 @@ class HeadersParser:
self, lines: List[bytes]
) -> Tuple["CIMultiDictProxy[str]", RawHeaders]:
headers: CIMultiDict[str] = CIMultiDict()
+ # note: "raw" does not mean inclusion of OWS before/after the field value
raw_headers = []
lines_idx = 0
@@ -158,13 +160,14 @@ class HeadersParser:
except ValueError:
raise InvalidHeader(line) from None
+ if len(bname) == 0:
+ raise InvalidHeader(bname)
+
# https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2
if {bname[0], bname[-1]} & {32, 9}: # {" ", "\t"}
raise InvalidHeader(line)
bvalue = bvalue.lstrip(b" \t")
- if HDRRE.search(bname):
- raise InvalidHeader(bname)
if len(bname) > self.max_field_size:
raise LineTooLong(
"request header name {}".format(
@@ -173,6 +176,9 @@ class HeadersParser:
str(self.max_field_size),
str(len(bname)),
)
+ name = bname.decode("utf-8", "surrogateescape")
+ if not TOKENRE.fullmatch(name):
+ raise InvalidHeader(bname)
header_length = len(bvalue)
@@ -220,7 +226,6 @@ class HeadersParser:
)
bvalue = bvalue.strip(b" \t")
- name = bname.decode("utf-8", "surrogateescape")
value = bvalue.decode("utf-8", "surrogateescape")
# https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
@@ -348,7 +353,8 @@ class HttpParser(abc.ABC, Generic[_MsgT]):
# Shouldn't allow +/- or other number formats.
# https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2
- if not length_hdr.strip(" \t").isdecimal():
+ # msg.headers is already stripped of leading/trailing wsp
+ if not DIGITS.fullmatch(length_hdr):
raise InvalidHeader(CONTENT_LENGTH)
return int(length_hdr)
@@ -582,7 +588,7 @@ class HttpRequestParser(HttpParser[RawRequestMessage]):
)
# method
- if not METHRE.match(method):
+ if not TOKENRE.fullmatch(method):
raise BadStatusLine(method)
# version
@@ -690,8 +696,8 @@ class HttpResponseParser(HttpParser[RawResponseMessage]):
raise BadStatusLine(line)
version_o = HttpVersion(int(match.group(1)), int(match.group(2)))
- # The status code is a three-digit number
- if len(status) != 3 or not status.isdecimal():
+ # The status code is a three-digit ASCII number, no padding
+ if len(status) != 3 or not DIGITS.fullmatch(status):
raise BadStatusLine(line)
status_i = int(status)
@@ -844,7 +850,7 @@ class HttpPayloadParser:
if self._lax: # Allow whitespace in lax mode.
size_b = size_b.strip()
- if not re.fullmatch(HEXDIGIT, size_b):
+ if not re.fullmatch(HEXDIGITS, size_b):
exc = TransferEncodingError(
chunk[:pos].decode("ascii", "surrogateescape")
)
diff --git a/tests/test_http_parser.py b/tests/test_http_parser.py
index 4b185c9..bcf6058 100644
--- a/tests/test_http_parser.py
+++ b/tests/test_http_parser.py
@@ -2,7 +2,8 @@
import asyncio
import re
-from typing import Any, List
+from contextlib import nullcontext
+from typing import Any, Dict, List
from unittest import mock
from urllib.parse import quote
@@ -168,12 +169,28 @@ def test_cve_2023_37276(parser) -> None:
parser.feed_data(text)
+@pytest.mark.parametrize(
+ "rfc9110_5_6_2_token_delim",
+ r'"(),/:;<=>?@[\]{}',
+)
+def test_bad_header_name(parser: Any, rfc9110_5_6_2_token_delim: str) -> None:
+ text = f"POST / HTTP/1.1\r\nhead{rfc9110_5_6_2_token_delim}er: val\r\n\r\n".encode()
+ expectation = pytest.raises(http_exceptions.BadHttpMessage)
+ if rfc9110_5_6_2_token_delim == ":":
+ # Inserting colon into header just splits name/value earlier.
+ expectation = nullcontext()
+ with expectation:
+ parser.feed_data(text)
+
+
@pytest.mark.parametrize(
"hdr",
(
# https://www.rfc-editor.org/rfc/rfc9110.html#name-content-length
"Content-Length: -5",
"Content-Length: +256",
+ "Content-Length: \N{superscript one}",
+ "Content-Length: \N{mathematical double-struck digit one}",
"Foo: abc\rdef", # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
"Bar: abc\ndef",
"Baz: abc\x00def",
@@ -240,6 +257,20 @@ def test_whitespace_before_header(parser) -> None:
parser.feed_data(text)
+def test_parse_unusual_request_line(parser) -> None:
+ if not isinstance(response, HttpResponseParserPy):
+ pytest.xfail("Regression test for Py parser. May match C behaviour later.")
+ text = b"#smol //a HTTP/1.3\r\n\r\n"
+ messages, upgrade, tail = parser.feed_data(text)
+ assert len(messages) == 1
+ msg, _ = messages[0]
+ assert msg.compression is None
+ assert not msg.upgrade
+ assert msg.method == "#smol"
+ assert msg.path == "//a"
+ assert msg.version == (1, 3)
+
+
def test_parse(parser) -> None:
text = b"GET /test HTTP/1.1\r\n\r\n"
messages, upgrade, tail = parser.feed_data(text)
@@ -533,6 +564,43 @@ def test_headers_content_length_err_2(parser) -> None:
parser.feed_data(text)
+_pad: Dict[bytes, str] = {
+ b"": "empty",
+ # not a typo. Python likes triple zero
+ b"\000": "NUL",
+ b" ": "SP",
+ b" ": "SPSP",
+ # not a typo: both 0xa0 and 0x0a in case of 8-bit fun
+ b"\n": "LF",
+ b"\xa0": "NBSP",
+ b"\t ": "TABSP",
+}
+
+
+@pytest.mark.parametrize("hdr", [b"", b"foo"], ids=["name-empty", "with-name"])
+@pytest.mark.parametrize("pad2", _pad.keys(), ids=["post-" + n for n in _pad.values()])
+@pytest.mark.parametrize("pad1", _pad.keys(), ids=["pre-" + n for n in _pad.values()])
+def test_invalid_header_spacing(parser, pad1: bytes, pad2: bytes, hdr: bytes) -> None:
+ text = b"GET /test HTTP/1.1\r\n" b"%s%s%s: value\r\n\r\n" % (pad1, hdr, pad2)
+ expectation = pytest.raises(http_exceptions.BadHttpMessage)
+ if pad1 == pad2 == b"" and hdr != b"":
+ # one entry in param matrix is correct: non-empty name, not padded
+ expectation = nullcontext()
+ if pad1 == pad2 == hdr == b"":
+ if not isinstance(response, HttpResponseParserPy):
+ pytest.xfail("Regression test for Py parser. May match C behaviour later.")
+ with expectation:
+ parser.feed_data(text)
+
+
+def test_empty_header_name(parser) -> None:
+ if not isinstance(response, HttpResponseParserPy):
+ pytest.xfail("Regression test for Py parser. May match C behaviour later.")
+ text = b"GET /test HTTP/1.1\r\n" b":test\r\n\r\n"
+ with pytest.raises(http_exceptions.BadHttpMessage):
+ parser.feed_data(text)
+
+
def test_invalid_header(parser) -> None:
text = b"GET /test HTTP/1.1\r\n" b"test line\r\n\r\n"
with pytest.raises(http_exceptions.BadHttpMessage):
@@ -655,6 +723,34 @@ def test_http_request_bad_status_line(parser) -> None:
assert r"\n" not in exc_info.value.message
+_num: Dict[bytes, str] = {
+ # dangerous: accepted by Python int()
+ # unicodedata.category("\U0001D7D9") == 'Nd'
+ "\N{mathematical double-struck digit one}".encode(): "utf8digit",
+ # only added for interop tests, refused by Python int()
+ # unicodedata.category("\U000000B9") == 'No'
+ "\N{superscript one}".encode(): "utf8number",
+ "\N{superscript one}".encode("latin-1"): "latin1number",
+}
+
+
+@pytest.mark.parametrize("nonascii_digit", _num.keys(), ids=_num.values())
+def test_http_request_bad_status_line_number(
+ parser: Any, nonascii_digit: bytes
+) -> None:
+ text = b"GET /digit HTTP/1." + nonascii_digit + b"\r\n\r\n"
+ with pytest.raises(http_exceptions.BadStatusLine):
+ parser.feed_data(text)
+
+
+def test_http_request_bad_status_line_separator(parser: Any) -> None:
+ # single code point, old, multibyte NFKC, multibyte NFKD
+ utf8sep = "\N{arabic ligature sallallahou alayhe wasallam}".encode()
+ text = b"GET /ligature HTTP/1" + utf8sep + b"1\r\n\r\n"
+ with pytest.raises(http_exceptions.BadStatusLine):
+ parser.feed_data(text)
+
+
def test_http_request_upgrade(parser) -> None:
text = (
b"GET /test HTTP/1.1\r\n"
@@ -670,6 +766,31 @@ def test_http_request_upgrade(parser) -> None:
assert tail == b"some raw data"
+def test_http_request_parser_utf8_request_line(parser) -> None:
+ if not isinstance(response, HttpResponseParserPy):
+ pytest.xfail("Regression test for Py parser. May match C behaviour later.")
+ messages, upgrade, tail = parser.feed_data(
+ # note the truncated unicode sequence
+ b"GET /P\xc3\xbcnktchen\xa0\xef\xb7 HTTP/1.1\r\n" +
+ # for easier grep: ASCII 0xA0 more commonly known as non-breaking space
+ # note the leading and trailing spaces
+ "sTeP: \N{latin small letter sharp s}nek\t\N{no-break space} "
+ "\r\n\r\n".encode()
+ )
+ msg = messages[0][0]
+
+ assert msg.method == "GET"
+ assert msg.path == "/Pünktchen\udca0\udcef\udcb7"
+ assert msg.version == (1, 1)
+ assert msg.headers == CIMultiDict([("STEP", "ßnek\t\xa0")])
+ assert msg.raw_headers == ((b"sTeP", "ßnek\t\xa0".encode()),)
+ assert not msg.should_close
+ assert msg.compression is None
+ assert not msg.upgrade
+ assert not msg.chunked
+ assert msg.url.path == URL("/P%C3%BCnktchen\udca0\udcef\udcb7").path
+
+
def test_http_request_parser_utf8(parser) -> None:
text = "GET /path HTTP/1.1\r\nx-test:тест\r\n\r\n".encode()
messages, upgrade, tail = parser.feed_data(text)
@@ -719,9 +840,15 @@ def test_http_request_parser_two_slashes(parser) -> None:
assert not msg.chunked
-def test_http_request_parser_bad_method(parser) -> None:
+@pytest.mark.parametrize(
+ "rfc9110_5_6_2_token_delim",
+ [bytes([i]) for i in rb'"(),/:;<=>?@[\]{}'],
+)
+def test_http_request_parser_bad_method(
+ parser, rfc9110_5_6_2_token_delim: bytes
+) -> None:
with pytest.raises(http_exceptions.BadStatusLine):
- parser.feed_data(b'=":<G>(e),[T];?" /get HTTP/1.1\r\n\r\n')
+ parser.feed_data(rfc9110_5_6_2_token_delim + b'ET" /get HTTP/1.1\r\n\r\n')
def test_http_request_parser_bad_version(parser) -> None:
@@ -907,6 +1034,12 @@ def test_http_response_parser_code_not_int(response) -> None:
response.feed_data(b"HTTP/1.1 ttt test\r\n\r\n")
+@pytest.mark.parametrize("nonascii_digit", _num.keys(), ids=_num.values())
+def test_http_response_parser_code_not_ascii(response, nonascii_digit: bytes) -> None:
+ with pytest.raises(http_exceptions.BadStatusLine):
+ response.feed_data(b"HTTP/1.1 20" + nonascii_digit + b" test\r\n\r\n")
+
+
def test_http_request_chunked_payload(parser) -> None:
text = b"GET /test HTTP/1.1\r\n" b"transfer-encoding: chunked\r\n\r\n"
msg, payload = parser.feed_data(text)[0][0]
--
2.40.0
|