summaryrefslogtreecommitdiffstats
path: root/meta-python/recipes-devtools/python/python3-tornado/CVE-2025-47287.patch
blob: 02439c43e0d1d003c4d6ab0bc4493eaaa0f9054a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
From 85a6a33e774376ec5b286d3a4857c569b8a8c4a8 Mon Sep 17 00:00:00 2001
From: Ben Darnell <ben@bendarnell.com>
Date: Thu, 8 May 2025 13:29:43 -0400
Subject: [PATCH] httputil: Raise errors instead of logging in
 multipart/form-data parsing

We used to continue after logging an error, which allowed repeated
errors to spam the logs. The error raised here will still be logged,
but only once per request, consistent with other error handling in
Tornado.

CVE: CVE-2025-47287
Upstream-Status: Backport [https://github.com/tornadoweb/tornado/commit/cc61050e8f26697463142d99864b562e8470b41d]
Signed-off-by: Ankur Tyagi <ankur.tyagi85@gmail.com>
---
 tornado/httputil.py             | 30 +++++++++++-------------------
 tornado/test/httpserver_test.py |  4 ++--
 tornado/test/httputil_test.py   | 13 ++++++++-----
 tornado/web.py                  | 17 +++++++++++++----
 4 files changed, 34 insertions(+), 30 deletions(-)

diff --git a/tornado/httputil.py b/tornado/httputil.py
index ebdc8059..090a977d 100644
--- a/tornado/httputil.py
+++ b/tornado/httputil.py
@@ -34,7 +34,6 @@ import unicodedata
 from urllib.parse import urlencode, urlparse, urlunparse, parse_qsl
 
 from tornado.escape import native_str, parse_qs_bytes, utf8
-from tornado.log import gen_log
 from tornado.util import ObjectDict, unicode_type
 
 
@@ -762,25 +761,22 @@ def parse_body_arguments(
     """
     if content_type.startswith("application/x-www-form-urlencoded"):
         if headers and "Content-Encoding" in headers:
-            gen_log.warning(
-                "Unsupported Content-Encoding: %s", headers["Content-Encoding"]
+            raise HTTPInputError(
+                "Unsupported Content-Encoding: %s" % headers["Content-Encoding"]
             )
-            return
         try:
             # real charset decoding will happen in RequestHandler.decode_argument()
             uri_arguments = parse_qs_bytes(body, keep_blank_values=True)
         except Exception as e:
-            gen_log.warning("Invalid x-www-form-urlencoded body: %s", e)
-            uri_arguments = {}
+            raise HTTPInputError("Invalid x-www-form-urlencoded body: %s" % e) from e
         for name, values in uri_arguments.items():
             if values:
                 arguments.setdefault(name, []).extend(values)
     elif content_type.startswith("multipart/form-data"):
         if headers and "Content-Encoding" in headers:
-            gen_log.warning(
-                "Unsupported Content-Encoding: %s", headers["Content-Encoding"]
+            raise HTTPInputError(
+                "Unsupported Content-Encoding: %s" % headers["Content-Encoding"]
             )
-            return
         try:
             fields = content_type.split(";")
             for field in fields:
@@ -789,9 +785,9 @@ def parse_body_arguments(
                     parse_multipart_form_data(utf8(v), body, arguments, files)
                     break
             else:
-                raise ValueError("multipart boundary not found")
+                raise HTTPInputError("multipart boundary not found")
         except Exception as e:
-            gen_log.warning("Invalid multipart/form-data: %s", e)
+            raise HTTPInputError("Invalid multipart/form-data: %s" % e) from e
 
 
 def parse_multipart_form_data(
@@ -820,26 +816,22 @@ def parse_multipart_form_data(
         boundary = boundary[1:-1]
     final_boundary_index = data.rfind(b"--" + boundary + b"--")
     if final_boundary_index == -1:
-        gen_log.warning("Invalid multipart/form-data: no final boundary")
-        return
+        raise HTTPInputError("Invalid multipart/form-data: no final boundary found")
     parts = data[:final_boundary_index].split(b"--" + boundary + b"\r\n")
     for part in parts:
         if not part:
             continue
         eoh = part.find(b"\r\n\r\n")
         if eoh == -1:
-            gen_log.warning("multipart/form-data missing headers")
-            continue
+            raise HTTPInputError("multipart/form-data missing headers")
         headers = HTTPHeaders.parse(part[:eoh].decode("utf-8"))
         disp_header = headers.get("Content-Disposition", "")
         disposition, disp_params = _parse_header(disp_header)
         if disposition != "form-data" or not part.endswith(b"\r\n"):
-            gen_log.warning("Invalid multipart/form-data")
-            continue
+            raise HTTPInputError("Invalid multipart/form-data")
         value = part[eoh + 4 : -2]
         if not disp_params.get("name"):
-            gen_log.warning("multipart/form-data value missing name")
-            continue
+            raise HTTPInputError("multipart/form-data missing name")
         name = disp_params["name"]
         if disp_params.get("filename"):
             ctype = headers.get("Content-Type", "application/unknown")
diff --git a/tornado/test/httpserver_test.py b/tornado/test/httpserver_test.py
index 0b29a39c..5d5fb13a 100644
--- a/tornado/test/httpserver_test.py
+++ b/tornado/test/httpserver_test.py
@@ -1131,9 +1131,9 @@ class GzipUnsupportedTest(GzipBaseTest, AsyncHTTPTestCase):
         # Gzip support is opt-in; without it the server fails to parse
         # the body (but parsing form bodies is currently just a log message,
         # not a fatal error).
-        with ExpectLog(gen_log, "Unsupported Content-Encoding"):
+        with ExpectLog(gen_log, ".*Unsupported Content-Encoding"):
             response = self.post_gzip("foo=bar")
-        self.assertEqual(json_decode(response.body), {})
+        self.assertEqual(response.code, 400)
 
 
 class StreamingChunkSizeTest(AsyncHTTPTestCase):
diff --git a/tornado/test/httputil_test.py b/tornado/test/httputil_test.py
index 975900aa..9494d0c1 100644
--- a/tornado/test/httputil_test.py
+++ b/tornado/test/httputil_test.py
@@ -12,7 +12,6 @@ from tornado.httputil import (
 )
 from tornado.escape import utf8, native_str
 from tornado.log import gen_log
-from tornado.testing import ExpectLog
 from tornado.test.util import ignore_deprecation
 
 import copy
@@ -195,7 +194,9 @@ Foo
             b"\n", b"\r\n"
         )
         args, files = form_data_args()
-        with ExpectLog(gen_log, "multipart/form-data missing headers"):
+        with self.assertRaises(
+            HTTPInputError, msg="multipart/form-data missing headers"
+        ):
             parse_multipart_form_data(b"1234", data, args, files)
         self.assertEqual(files, {})
 
@@ -209,7 +210,7 @@ Foo
             b"\n", b"\r\n"
         )
         args, files = form_data_args()
-        with ExpectLog(gen_log, "Invalid multipart/form-data"):
+        with self.assertRaises(HTTPInputError, msg="Invalid multipart/form-data"):
             parse_multipart_form_data(b"1234", data, args, files)
         self.assertEqual(files, {})
 
@@ -222,7 +223,7 @@ Foo--1234--""".replace(
             b"\n", b"\r\n"
         )
         args, files = form_data_args()
-        with ExpectLog(gen_log, "Invalid multipart/form-data"):
+        with self.assertRaises(HTTPInputError, msg="Invalid multipart/form-data"):
             parse_multipart_form_data(b"1234", data, args, files)
         self.assertEqual(files, {})
 
@@ -236,7 +237,9 @@ Foo
             b"\n", b"\r\n"
         )
         args, files = form_data_args()
-        with ExpectLog(gen_log, "multipart/form-data value missing name"):
+        with self.assertRaises(
+            HTTPInputError, msg="multipart/form-data value missing name"
+        ):
             parse_multipart_form_data(b"1234", data, args, files)
         self.assertEqual(files, {})
 
diff --git a/tornado/web.py b/tornado/web.py
index 03939647..8ec5601b 100644
--- a/tornado/web.py
+++ b/tornado/web.py
@@ -1751,6 +1751,14 @@ class RequestHandler(object):
         try:
             if self.request.method not in self.SUPPORTED_METHODS:
                 raise HTTPError(405)
+
+            # If we're not in stream_request_body mode, this is the place where we parse the body.
+            if not _has_stream_request_body(self.__class__):
+                try:
+                    self.request._parse_body()
+                except httputil.HTTPInputError as e:
+                    raise HTTPError(400, "Invalid body: %s" % e) from e
+
             self.path_args = [self.decode_argument(arg) for arg in args]
             self.path_kwargs = dict(
                 (k, self.decode_argument(v, name=k)) for (k, v) in kwargs.items()
@@ -1941,7 +1949,7 @@ def _has_stream_request_body(cls: Type[RequestHandler]) -> bool:
 
 
 def removeslash(
-    method: Callable[..., Optional[Awaitable[None]]]
+    method: Callable[..., Optional[Awaitable[None]]],
 ) -> Callable[..., Optional[Awaitable[None]]]:
     """Use this decorator to remove trailing slashes from the request path.
 
@@ -1970,7 +1978,7 @@ def removeslash(
 
 
 def addslash(
-    method: Callable[..., Optional[Awaitable[None]]]
+    method: Callable[..., Optional[Awaitable[None]]],
 ) -> Callable[..., Optional[Awaitable[None]]]:
     """Use this decorator to add a missing trailing slash to the request path.
 
@@ -2394,8 +2402,9 @@ class _HandlerDelegate(httputil.HTTPMessageDelegate):
         if self.stream_request_body:
             future_set_result_unless_cancelled(self.request._body_future, None)
         else:
+            # Note that the body gets parsed in RequestHandler._execute so it can be in
+            # the right exception handler scope.
             self.request.body = b"".join(self.chunks)
-            self.request._parse_body()
             self.execute()
 
     def on_connection_close(self) -> None:
@@ -3267,7 +3276,7 @@ class GZipContentEncoding(OutputTransform):
 
 
 def authenticated(
-    method: Callable[..., Optional[Awaitable[None]]]
+    method: Callable[..., Optional[Awaitable[None]]],
 ) -> Callable[..., Optional[Awaitable[None]]]:
     """Decorate methods with this to require that the user be logged in.