summaryrefslogtreecommitdiffstats
path: root/meta/recipes-core/expat/expat/CVE-2023-52425-0003.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta/recipes-core/expat/expat/CVE-2023-52425-0003.patch')
-rw-r--r--meta/recipes-core/expat/expat/CVE-2023-52425-0003.patch222
1 files changed, 222 insertions, 0 deletions
diff --git a/meta/recipes-core/expat/expat/CVE-2023-52425-0003.patch b/meta/recipes-core/expat/expat/CVE-2023-52425-0003.patch
new file mode 100644
index 0000000000..e5c3606e19
--- /dev/null
+++ b/meta/recipes-core/expat/expat/CVE-2023-52425-0003.patch
@@ -0,0 +1,222 @@
1From 9cdf9b8d77d5c2c2a27d15fb68dd3f83cafb45a1 Mon Sep 17 00:00:00 2001
2From: Snild Dolkow <snild@sony.com>
3Date: Thu, 17 Aug 2023 16:25:26 +0200
4Subject: [PATCH] Skip parsing after repeated partials on the same token When
5 the parse buffer contains the starting bytes of a token but not all of them,
6 we cannot parse the token to completion. We call this a partial token. When
7 this happens, the parse position is reset to the start of the token, and the
8 parse() call returns. The client is then expected to provide more data and
9 call parse() again.
10MIME-Version: 1.0
11Content-Type: text/plain; charset=UTF-8
12Content-Transfer-Encoding: 8bit
13
14In extreme cases, this means that the bytes of a token may be parsed
15many times: once for every buffer refill required before the full token
16is present in the buffer.
17
18Math:
19 Assume there's a token of T bytes
20 Assume the client fills the buffer in chunks of X bytes
21 We'll try to parse X, 2X, 3X, 4X ... until mX == T (technically >=)
22 That's (m²+m)X/2 = (T²/X+T)/2 bytes parsed (arithmetic progression)
23 While it is alleviated by larger refills, this amounts to O(T²)
24
25Expat grows its internal buffer by doubling it when necessary, but has
26no way to inform the client about how much space is available. Instead,
27we add a heuristic that skips parsing when we've repeatedly stopped on
28an incomplete token. Specifically:
29
30 * Only try to parse if we have a certain amount of data buffered
31 * Every time we stop on an incomplete token, double the threshold
32 * As soon as any token completes, the threshold is reset
33
34This means that when we get stuck on an incomplete token, the threshold
35grows exponentially, effectively making the client perform larger buffer
36fills, limiting how many times we can end up re-parsing the same bytes.
37
38Math:
39 Assume there's a token of T bytes
40 Assume the client fills the buffer in chunks of X bytes
41 We'll try to parse X, 2X, 4X, 8X ... until (2^k)X == T (or larger)
42 That's (2^(k+1)-1)X bytes parsed -- e.g. 15X if T = 8X
43 This is equal to 2T-X, which amounts to O(T)
44
45We could've chosen a faster growth rate, e.g. 4 or 8. Those seem to
46increase performance further, at the cost of further increasing the
47risk of growing the buffer more than necessary. This can easily be
48adjusted in the future, if desired.
49
50This is all completely transparent to the client, except for:
511. possible delay of some callbacks (when our heuristic overshoots)
522. apps that never do isFinal=XML_TRUE could miss data at the end
53
54For the affected testdata, this change shows a 100-400x speedup.
55The recset.xml benchmark shows no clear change either way.
56
57Before:
58benchmark -n ../testdata/largefiles/recset.xml 65535 3
59 3 loops, with buffer size 65535. Average time per loop: 0.270223
60benchmark -n ../testdata/largefiles/aaaaaa_attr.xml 4096 3
61 3 loops, with buffer size 4096. Average time per loop: 15.033048
62benchmark -n ../testdata/largefiles/aaaaaa_cdata.xml 4096 3
63 3 loops, with buffer size 4096. Average time per loop: 0.018027
64benchmark -n ../testdata/largefiles/aaaaaa_comment.xml 4096 3
65 3 loops, with buffer size 4096. Average time per loop: 11.775362
66benchmark -n ../testdata/largefiles/aaaaaa_tag.xml 4096 3
67 3 loops, with buffer size 4096. Average time per loop: 11.711414
68benchmark -n ../testdata/largefiles/aaaaaa_text.xml 4096 3
69 3 loops, with buffer size 4096. Average time per loop: 0.019362
70
71After:
72./run.sh benchmark -n ../testdata/largefiles/recset.xml 65535 3
73 3 loops, with buffer size 65535. Average time per loop: 0.269030
74./run.sh benchmark -n ../testdata/largefiles/aaaaaa_attr.xml 4096 3
75 3 loops, with buffer size 4096. Average time per loop: 0.044794
76./run.sh benchmark -n ../testdata/largefiles/aaaaaa_cdata.xml 4096 3
77 3 loops, with buffer size 4096. Average time per loop: 0.016377
78./run.sh benchmark -n ../testdata/largefiles/aaaaaa_comment.xml 4096 3
79 3 loops, with buffer size 4096. Average time per loop: 0.027022
80./run.sh benchmark -n ../testdata/largefiles/aaaaaa_tag.xml 4096 3
81 3 loops, with buffer size 4096. Average time per loop: 0.099360
82./run.sh benchmark -n ../testdata/largefiles/aaaaaa_text.xml 4096 3
83 3 loops, with buffer size 4096. Average time per loop: 0.017956
84
85CVE: CVE-2023-52425
86
87Upstream-Status: Backport [https://github.com/libexpat/libexpat/commit/9cdf9b8d77d5c2c2a27d15fb68dd3f83cafb45a1]
88
89Signed-off-by: Meenali Gupta <meenali.gupta@windriver.com>
90---
91 lib/xmlparse.c | 58 +++++++++++++++++++++++++++++++++-----------------
92 1 file changed, 39 insertions(+), 19 deletions(-)
93
94diff --git a/lib/xmlparse.c b/lib/xmlparse.c
95index bbffcaa..5695417 100644
96--- a/lib/xmlparse.c
97+++ b/lib/xmlparse.c
98@@ -81,6 +81,7 @@
99 # endif
100 #endif
101
102+#include <stdbool.h>
103 #include <stddef.h>
104 #include <string.h> /* memset(), memcpy() */
105 #include <assert.h>
106@@ -629,6 +630,7 @@ struct XML_ParserStruct {
107 const char *m_bufferLim;
108 XML_Index m_parseEndByteIndex;
109 const char *m_parseEndPtr;
110+ size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */
111 XML_Char *m_dataBuf;
112 XML_Char *m_dataBufEnd;
113 XML_StartElementHandler m_startElementHandler;
114@@ -960,6 +962,32 @@ get_hash_secret_salt(XML_Parser parser) {
115 return parser->m_hash_secret_salt;
116 }
117
118+static enum XML_Error
119+callProcessor(XML_Parser parser, const char *start, const char *end,
120+ const char **endPtr) {
121+ const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start);
122+
123+ if (! parser->m_parsingStatus.finalBuffer) {
124+ // Heuristic: don't try to parse a partial token again until the amount of
125+ // available data has increased significantly.
126+ const size_t had_before = parser->m_partialTokenBytesBefore;
127+ const bool enough = (have_now >= 2 * had_before);
128+
129+ if (! enough) {
130+ *endPtr = start; // callers may expect this to be set
131+ return XML_ERROR_NONE;
132+ }
133+ }
134+ const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr);
135+ // if we consumed nothing, remember what we had on this parse attempt.
136+ if (*endPtr == start) {
137+ parser->m_partialTokenBytesBefore = have_now;
138+ } else {
139+ parser->m_partialTokenBytesBefore = 0;
140+ }
141+ return ret;
142+}
143+
144 static XML_Bool /* only valid for root parser */
145 startParsing(XML_Parser parser) {
146 /* hash functions must be initialized before setContext() is called */
147@@ -1141,6 +1169,7 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) {
148 parser->m_bufferEnd = parser->m_buffer;
149 parser->m_parseEndByteIndex = 0;
150 parser->m_parseEndPtr = NULL;
151+ parser->m_partialTokenBytesBefore = 0;
152 parser->m_declElementType = NULL;
153 parser->m_declAttributeId = NULL;
154 parser->m_declEntity = NULL;
155@@ -1872,29 +1901,20 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
156 to detect errors based on that fact.
157 */
158 parser->m_errorCode
159- = parser->m_processor(parser, parser->m_bufferPtr,
160- parser->m_parseEndPtr, &parser->m_bufferPtr);
161+ = callProcessor(parser, parser->m_bufferPtr, parser->m_parseEndPtr,
162+ &parser->m_bufferPtr);
163
164 if (parser->m_errorCode == XML_ERROR_NONE) {
165 switch (parser->m_parsingStatus.parsing) {
166 case XML_SUSPENDED:
167- /* It is hard to be certain, but it seems that this case
168- * cannot occur. This code is cleaning up a previous parse
169- * with no new data (since len == 0). Changing the parsing
170- * state requires getting to execute a handler function, and
171- * there doesn't seem to be an opportunity for that while in
172- * this circumstance.
173- *
174- * Given the uncertainty, we retain the code but exclude it
175- * from coverage tests.
176- *
177- * LCOV_EXCL_START
178- */
179+ /* While we added no new data, the finalBuffer flag may have caused
180+ * us to parse previously-unparsed data in the internal buffer.
181+ * If that triggered a callback to the application, it would have
182+ * had an opportunity to suspend parsing. */
183 XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
184 parser->m_bufferPtr, &parser->m_position);
185 parser->m_positionPtr = parser->m_bufferPtr;
186 return XML_STATUS_SUSPENDED;
187- /* LCOV_EXCL_STOP */
188 case XML_INITIALIZED:
189 case XML_PARSING:
190 parser->m_parsingStatus.parsing = XML_FINISHED;
191@@ -1924,7 +1944,7 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
192 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
193
194 parser->m_errorCode
195- = parser->m_processor(parser, s, parser->m_parseEndPtr = s + len, &end);
196+ = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end);
197
198 if (parser->m_errorCode != XML_ERROR_NONE) {
199 parser->m_eventEndPtr = parser->m_eventPtr;
200@@ -2027,8 +2047,8 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
201 parser->m_parseEndByteIndex += len;
202 parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
203
204- parser->m_errorCode = parser->m_processor(
205- parser, start, parser->m_parseEndPtr, &parser->m_bufferPtr);
206+ parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr,
207+ &parser->m_bufferPtr);
208
209 if (parser->m_errorCode != XML_ERROR_NONE) {
210 parser->m_eventEndPtr = parser->m_eventPtr;
211@@ -2220,7 +2240,7 @@ XML_ResumeParser(XML_Parser parser) {
212 }
213 parser->m_parsingStatus.parsing = XML_PARSING;
214
215- parser->m_errorCode = parser->m_processor(
216+ parser->m_errorCode = callProcessor(
217 parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
218
219 if (parser->m_errorCode != XML_ERROR_NONE) {
220--
2212.40.0
222