diff options
Diffstat (limited to 'meta/recipes-core/expat/expat/CVE-2023-52425-0003.patch')
-rw-r--r-- | meta/recipes-core/expat/expat/CVE-2023-52425-0003.patch | 222 |
1 files changed, 222 insertions, 0 deletions
diff --git a/meta/recipes-core/expat/expat/CVE-2023-52425-0003.patch b/meta/recipes-core/expat/expat/CVE-2023-52425-0003.patch new file mode 100644 index 0000000000..e5c3606e19 --- /dev/null +++ b/meta/recipes-core/expat/expat/CVE-2023-52425-0003.patch | |||
@@ -0,0 +1,222 @@ | |||
1 | From 9cdf9b8d77d5c2c2a27d15fb68dd3f83cafb45a1 Mon Sep 17 00:00:00 2001 | ||
2 | From: Snild Dolkow <snild@sony.com> | ||
3 | Date: Thu, 17 Aug 2023 16:25:26 +0200 | ||
4 | Subject: [PATCH] Skip parsing after repeated partials on the same token When | ||
5 | the parse buffer contains the starting bytes of a token but not all of them, | ||
6 | we cannot parse the token to completion. We call this a partial token. When | ||
7 | this happens, the parse position is reset to the start of the token, and the | ||
8 | parse() call returns. The client is then expected to provide more data and | ||
9 | call parse() again. | ||
10 | MIME-Version: 1.0 | ||
11 | Content-Type: text/plain; charset=UTF-8 | ||
12 | Content-Transfer-Encoding: 8bit | ||
13 | |||
14 | In extreme cases, this means that the bytes of a token may be parsed | ||
15 | many times: once for every buffer refill required before the full token | ||
16 | is present in the buffer. | ||
17 | |||
18 | Math: | ||
19 | Assume there's a token of T bytes | ||
20 | Assume the client fills the buffer in chunks of X bytes | ||
21 | We'll try to parse X, 2X, 3X, 4X ... until mX == T (technically >=) | ||
22 | That's (m²+m)X/2 = (T²/X+T)/2 bytes parsed (arithmetic progression) | ||
23 | While it is alleviated by larger refills, this amounts to O(T²) | ||
24 | |||
25 | Expat grows its internal buffer by doubling it when necessary, but has | ||
26 | no way to inform the client about how much space is available. Instead, | ||
27 | we add a heuristic that skips parsing when we've repeatedly stopped on | ||
28 | an incomplete token. Specifically: | ||
29 | |||
30 | * Only try to parse if we have a certain amount of data buffered | ||
31 | * Every time we stop on an incomplete token, double the threshold | ||
32 | * As soon as any token completes, the threshold is reset | ||
33 | |||
34 | This means that when we get stuck on an incomplete token, the threshold | ||
35 | grows exponentially, effectively making the client perform larger buffer | ||
36 | fills, limiting how many times we can end up re-parsing the same bytes. | ||
37 | |||
38 | Math: | ||
39 | Assume there's a token of T bytes | ||
40 | Assume the client fills the buffer in chunks of X bytes | ||
41 | We'll try to parse X, 2X, 4X, 8X ... until (2^k)X == T (or larger) | ||
42 | That's (2^(k+1)-1)X bytes parsed -- e.g. 15X if T = 8X | ||
43 | This is equal to 2T-X, which amounts to O(T) | ||
44 | |||
45 | We could've chosen a faster growth rate, e.g. 4 or 8. Those seem to | ||
46 | increase performance further, at the cost of further increasing the | ||
47 | risk of growing the buffer more than necessary. This can easily be | ||
48 | adjusted in the future, if desired. | ||
49 | |||
50 | This is all completely transparent to the client, except for: | ||
51 | 1. possible delay of some callbacks (when our heuristic overshoots) | ||
52 | 2. apps that never do isFinal=XML_TRUE could miss data at the end | ||
53 | |||
54 | For the affected testdata, this change shows a 100-400x speedup. | ||
55 | The recset.xml benchmark shows no clear change either way. | ||
56 | |||
57 | Before: | ||
58 | benchmark -n ../testdata/largefiles/recset.xml 65535 3 | ||
59 | 3 loops, with buffer size 65535. Average time per loop: 0.270223 | ||
60 | benchmark -n ../testdata/largefiles/aaaaaa_attr.xml 4096 3 | ||
61 | 3 loops, with buffer size 4096. Average time per loop: 15.033048 | ||
62 | benchmark -n ../testdata/largefiles/aaaaaa_cdata.xml 4096 3 | ||
63 | 3 loops, with buffer size 4096. Average time per loop: 0.018027 | ||
64 | benchmark -n ../testdata/largefiles/aaaaaa_comment.xml 4096 3 | ||
65 | 3 loops, with buffer size 4096. Average time per loop: 11.775362 | ||
66 | benchmark -n ../testdata/largefiles/aaaaaa_tag.xml 4096 3 | ||
67 | 3 loops, with buffer size 4096. Average time per loop: 11.711414 | ||
68 | benchmark -n ../testdata/largefiles/aaaaaa_text.xml 4096 3 | ||
69 | 3 loops, with buffer size 4096. Average time per loop: 0.019362 | ||
70 | |||
71 | After: | ||
72 | ./run.sh benchmark -n ../testdata/largefiles/recset.xml 65535 3 | ||
73 | 3 loops, with buffer size 65535. Average time per loop: 0.269030 | ||
74 | ./run.sh benchmark -n ../testdata/largefiles/aaaaaa_attr.xml 4096 3 | ||
75 | 3 loops, with buffer size 4096. Average time per loop: 0.044794 | ||
76 | ./run.sh benchmark -n ../testdata/largefiles/aaaaaa_cdata.xml 4096 3 | ||
77 | 3 loops, with buffer size 4096. Average time per loop: 0.016377 | ||
78 | ./run.sh benchmark -n ../testdata/largefiles/aaaaaa_comment.xml 4096 3 | ||
79 | 3 loops, with buffer size 4096. Average time per loop: 0.027022 | ||
80 | ./run.sh benchmark -n ../testdata/largefiles/aaaaaa_tag.xml 4096 3 | ||
81 | 3 loops, with buffer size 4096. Average time per loop: 0.099360 | ||
82 | ./run.sh benchmark -n ../testdata/largefiles/aaaaaa_text.xml 4096 3 | ||
83 | 3 loops, with buffer size 4096. Average time per loop: 0.017956 | ||
84 | |||
85 | CVE: CVE-2023-52425 | ||
86 | |||
87 | Upstream-Status: Backport [https://github.com/libexpat/libexpat/commit/9cdf9b8d77d5c2c2a27d15fb68dd3f83cafb45a1] | ||
88 | |||
89 | Signed-off-by: Meenali Gupta <meenali.gupta@windriver.com> | ||
90 | --- | ||
91 | lib/xmlparse.c | 58 +++++++++++++++++++++++++++++++++----------------- | ||
92 | 1 file changed, 39 insertions(+), 19 deletions(-) | ||
93 | |||
94 | diff --git a/lib/xmlparse.c b/lib/xmlparse.c | ||
95 | index bbffcaa..5695417 100644 | ||
96 | --- a/lib/xmlparse.c | ||
97 | +++ b/lib/xmlparse.c | ||
98 | @@ -81,6 +81,7 @@ | ||
99 | # endif | ||
100 | #endif | ||
101 | |||
102 | +#include <stdbool.h> | ||
103 | #include <stddef.h> | ||
104 | #include <string.h> /* memset(), memcpy() */ | ||
105 | #include <assert.h> | ||
106 | @@ -629,6 +630,7 @@ struct XML_ParserStruct { | ||
107 | const char *m_bufferLim; | ||
108 | XML_Index m_parseEndByteIndex; | ||
109 | const char *m_parseEndPtr; | ||
110 | + size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */ | ||
111 | XML_Char *m_dataBuf; | ||
112 | XML_Char *m_dataBufEnd; | ||
113 | XML_StartElementHandler m_startElementHandler; | ||
114 | @@ -960,6 +962,32 @@ get_hash_secret_salt(XML_Parser parser) { | ||
115 | return parser->m_hash_secret_salt; | ||
116 | } | ||
117 | |||
118 | +static enum XML_Error | ||
119 | +callProcessor(XML_Parser parser, const char *start, const char *end, | ||
120 | + const char **endPtr) { | ||
121 | + const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start); | ||
122 | + | ||
123 | + if (! parser->m_parsingStatus.finalBuffer) { | ||
124 | + // Heuristic: don't try to parse a partial token again until the amount of | ||
125 | + // available data has increased significantly. | ||
126 | + const size_t had_before = parser->m_partialTokenBytesBefore; | ||
127 | + const bool enough = (have_now >= 2 * had_before); | ||
128 | + | ||
129 | + if (! enough) { | ||
130 | + *endPtr = start; // callers may expect this to be set | ||
131 | + return XML_ERROR_NONE; | ||
132 | + } | ||
133 | + } | ||
134 | + const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr); | ||
135 | + // if we consumed nothing, remember what we had on this parse attempt. | ||
136 | + if (*endPtr == start) { | ||
137 | + parser->m_partialTokenBytesBefore = have_now; | ||
138 | + } else { | ||
139 | + parser->m_partialTokenBytesBefore = 0; | ||
140 | + } | ||
141 | + return ret; | ||
142 | +} | ||
143 | + | ||
144 | static XML_Bool /* only valid for root parser */ | ||
145 | startParsing(XML_Parser parser) { | ||
146 | /* hash functions must be initialized before setContext() is called */ | ||
147 | @@ -1141,6 +1169,7 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) { | ||
148 | parser->m_bufferEnd = parser->m_buffer; | ||
149 | parser->m_parseEndByteIndex = 0; | ||
150 | parser->m_parseEndPtr = NULL; | ||
151 | + parser->m_partialTokenBytesBefore = 0; | ||
152 | parser->m_declElementType = NULL; | ||
153 | parser->m_declAttributeId = NULL; | ||
154 | parser->m_declEntity = NULL; | ||
155 | @@ -1872,29 +1901,20 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { | ||
156 | to detect errors based on that fact. | ||
157 | */ | ||
158 | parser->m_errorCode | ||
159 | - = parser->m_processor(parser, parser->m_bufferPtr, | ||
160 | - parser->m_parseEndPtr, &parser->m_bufferPtr); | ||
161 | + = callProcessor(parser, parser->m_bufferPtr, parser->m_parseEndPtr, | ||
162 | + &parser->m_bufferPtr); | ||
163 | |||
164 | if (parser->m_errorCode == XML_ERROR_NONE) { | ||
165 | switch (parser->m_parsingStatus.parsing) { | ||
166 | case XML_SUSPENDED: | ||
167 | - /* It is hard to be certain, but it seems that this case | ||
168 | - * cannot occur. This code is cleaning up a previous parse | ||
169 | - * with no new data (since len == 0). Changing the parsing | ||
170 | - * state requires getting to execute a handler function, and | ||
171 | - * there doesn't seem to be an opportunity for that while in | ||
172 | - * this circumstance. | ||
173 | - * | ||
174 | - * Given the uncertainty, we retain the code but exclude it | ||
175 | - * from coverage tests. | ||
176 | - * | ||
177 | - * LCOV_EXCL_START | ||
178 | - */ | ||
179 | + /* While we added no new data, the finalBuffer flag may have caused | ||
180 | + * us to parse previously-unparsed data in the internal buffer. | ||
181 | + * If that triggered a callback to the application, it would have | ||
182 | + * had an opportunity to suspend parsing. */ | ||
183 | XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, | ||
184 | parser->m_bufferPtr, &parser->m_position); | ||
185 | parser->m_positionPtr = parser->m_bufferPtr; | ||
186 | return XML_STATUS_SUSPENDED; | ||
187 | - /* LCOV_EXCL_STOP */ | ||
188 | case XML_INITIALIZED: | ||
189 | case XML_PARSING: | ||
190 | parser->m_parsingStatus.parsing = XML_FINISHED; | ||
191 | @@ -1924,7 +1944,7 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { | ||
192 | parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; | ||
193 | |||
194 | parser->m_errorCode | ||
195 | - = parser->m_processor(parser, s, parser->m_parseEndPtr = s + len, &end); | ||
196 | + = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end); | ||
197 | |||
198 | if (parser->m_errorCode != XML_ERROR_NONE) { | ||
199 | parser->m_eventEndPtr = parser->m_eventPtr; | ||
200 | @@ -2027,8 +2047,8 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal) { | ||
201 | parser->m_parseEndByteIndex += len; | ||
202 | parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; | ||
203 | |||
204 | - parser->m_errorCode = parser->m_processor( | ||
205 | - parser, start, parser->m_parseEndPtr, &parser->m_bufferPtr); | ||
206 | + parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr, | ||
207 | + &parser->m_bufferPtr); | ||
208 | |||
209 | if (parser->m_errorCode != XML_ERROR_NONE) { | ||
210 | parser->m_eventEndPtr = parser->m_eventPtr; | ||
211 | @@ -2220,7 +2240,7 @@ XML_ResumeParser(XML_Parser parser) { | ||
212 | } | ||
213 | parser->m_parsingStatus.parsing = XML_PARSING; | ||
214 | |||
215 | - parser->m_errorCode = parser->m_processor( | ||
216 | + parser->m_errorCode = callProcessor( | ||
217 | parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr); | ||
218 | |||
219 | if (parser->m_errorCode != XML_ERROR_NONE) { | ||
220 | -- | ||
221 | 2.40.0 | ||
222 | |||