1 files changed, 222 insertions, 0 deletions
diff --git a/meta/recipes-core/expat/expat/CVE-2023-52425-0003.patch b/meta/recipes-core/expat/expat/CVE-2023-52425-0003.patch
new file mode 100644
index 0000000000..e5c3606e19
--- /dev/null
+++ b/meta/recipes-core/expat/expat/CVE-2023-52425-0003.patch
@@ -0,0 +1,222 @@
+From 9cdf9b8d77d5c2c2a27d15fb68dd3f83cafb45a1 Mon Sep 17 00:00:00 2001
+From: Snild Dolkow <snild@sony.com>
+Date: Thu, 17 Aug 2023 16:25:26 +0200
+Subject: [PATCH] Skip parsing after repeated partials on the same token When
+ the parse buffer contains the starting bytes of a token but not all of them,
+ we cannot parse the token to completion. We call this a partial token.  When
+ this happens, the parse position is reset to the start of the token, and the
+ parse() call returns. The client is then expected to provide more data and
+ call parse() again.
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+In extreme cases, this means that the bytes of a token may be parsed
+many times: once for every buffer refill required before the full token
+is present in the buffer.
+Math:
+  Assume there's a token of T bytes
+  Assume the client fills the buffer in chunks of X bytes
+  We'll try to parse X, 2X, 3X, 4X ... until mX == T (technically >=)
+  That's (m²+m)X/2 = (T²/X+T)/2 bytes parsed (arithmetic progression)
+  While it is alleviated by larger refills, this amounts to O(T²)
+Expat grows its internal buffer by doubling it when necessary, but has
+no way to inform the client about how much space is available. Instead,
+we add a heuristic that skips parsing when we've repeatedly stopped on
+an incomplete token. Specifically:
+ * Only try to parse if we have a certain amount of data buffered
+ * Every time we stop on an incomplete token, double the threshold
+ * As soon as any token completes, the threshold is reset
+This means that when we get stuck on an incomplete token, the threshold
+grows exponentially, effectively making the client perform larger buffer
+fills, limiting how many times we can end up re-parsing the same bytes.
+Math:
+  Assume there's a token of T bytes
+  Assume the client fills the buffer in chunks of X bytes
+  We'll try to parse X, 2X, 4X, 8X ... until (2^k)X == T (or larger)
+  That's (2^(k+1)-1)X bytes parsed -- e.g. 15X if T = 8X
+  This is equal to 2T-X, which amounts to O(T)
+We could've chosen a faster growth rate, e.g. 4 or 8. Those seem to
+increase performance further, at the cost of further increasing the
+risk of growing the buffer more than necessary. This can easily be
+adjusted in the future, if desired.
+This is all completely transparent to the client, except for:
+1. possible delay of some callbacks (when our heuristic overshoots)
+2. apps that never do isFinal=XML_TRUE could miss data at the end
+For the affected testdata, this change shows a 100-400x speedup.
+The recset.xml benchmark shows no clear change either way.
+Before:
+benchmark -n ../testdata/largefiles/recset.xml 65535 3
+  3 loops, with buffer size 65535. Average time per loop: 0.270223
+benchmark -n ../testdata/largefiles/aaaaaa_attr.xml 4096 3
+  3 loops, with buffer size 4096. Average time per loop: 15.033048
+benchmark -n ../testdata/largefiles/aaaaaa_cdata.xml 4096 3
+  3 loops, with buffer size 4096. Average time per loop: 0.018027
+benchmark -n ../testdata/largefiles/aaaaaa_comment.xml 4096 3
+  3 loops, with buffer size 4096. Average time per loop: 11.775362
+benchmark -n ../testdata/largefiles/aaaaaa_tag.xml 4096 3
+  3 loops, with buffer size 4096. Average time per loop: 11.711414
+benchmark -n ../testdata/largefiles/aaaaaa_text.xml 4096 3
+  3 loops, with buffer size 4096. Average time per loop: 0.019362
+After:
+./run.sh benchmark -n ../testdata/largefiles/recset.xml 65535 3
+  3 loops, with buffer size 65535. Average time per loop: 0.269030
+./run.sh benchmark -n ../testdata/largefiles/aaaaaa_attr.xml 4096 3
+  3 loops, with buffer size 4096. Average time per loop: 0.044794
+./run.sh benchmark -n ../testdata/largefiles/aaaaaa_cdata.xml 4096 3
+  3 loops, with buffer size 4096. Average time per loop: 0.016377
+./run.sh benchmark -n ../testdata/largefiles/aaaaaa_comment.xml 4096 3
+  3 loops, with buffer size 4096. Average time per loop: 0.027022
+./run.sh benchmark -n ../testdata/largefiles/aaaaaa_tag.xml 4096 3
+  3 loops, with buffer size 4096. Average time per loop: 0.099360
+./run.sh benchmark -n ../testdata/largefiles/aaaaaa_text.xml 4096 3
+  3 loops, with buffer size 4096. Average time per loop: 0.017956
+CVE: CVE-2023-52425
+Upstream-Status: Backport [https://github.com/libexpat/libexpat/commit/9cdf9b8d77d5c2c2a27d15fb68dd3f83cafb45a1]
+Signed-off-by: Meenali Gupta <meenali.gupta@windriver.com>
+---
+ lib/xmlparse.c | 58 +++++++++++++++++++++++++++++++++-----------------
+ 1 file changed, 39 insertions(+), 19 deletions(-)
+diff --git a/lib/xmlparse.c b/lib/xmlparse.c
+index bbffcaa..5695417 100644
+--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
+@@ -81,6 +81,7 @@
+ #  endif
+ #endif
+ 
+#include <stdbool.h>
+ #include <stddef.h>
+ #include <string.h> /* memset(), memcpy() */
+ #include <assert.h>
+@@ -629,6 +630,7 @@ struct XML_ParserStruct {
+   const char *m_bufferLim;
+   XML_Index m_parseEndByteIndex;
+   const char *m_parseEndPtr;
+  size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */
+   XML_Char *m_dataBuf;
+   XML_Char *m_dataBufEnd;
+   XML_StartElementHandler m_startElementHandler;
+@@ -960,6 +962,32 @@ get_hash_secret_salt(XML_Parser parser) {
+   return parser->m_hash_secret_salt;
+ }
+ 
+static enum XML_Error
+callProcessor(XML_Parser parser, const char *start, const char *end,
+              const char **endPtr) {
+  const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start);
+
+  if (! parser->m_parsingStatus.finalBuffer) {
+    // Heuristic: don't try to parse a partial token again until the amount of
+    // available data has increased significantly.
+    const size_t had_before = parser->m_partialTokenBytesBefore;
+    const bool enough = (have_now >= 2 * had_before);
+
+    if (! enough) {
+      *endPtr = start; // callers may expect this to be set
+      return XML_ERROR_NONE;
+    }
+  }
+  const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr);
+  // if we consumed nothing, remember what we had on this parse attempt.
+  if (*endPtr == start) {
+    parser->m_partialTokenBytesBefore = have_now;
+  } else {
+    parser->m_partialTokenBytesBefore = 0;
+  }
+  return ret;
+}
+
+ static XML_Bool /* only valid for root parser */
+ startParsing(XML_Parser parser) {
+   /* hash functions must be initialized before setContext() is called */
+@@ -1141,6 +1169,7 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) {
+   parser->m_bufferEnd = parser->m_buffer;
+   parser->m_parseEndByteIndex = 0;
+   parser->m_parseEndPtr = NULL;
+  parser->m_partialTokenBytesBefore = 0;
+   parser->m_declElementType = NULL;
+   parser->m_declAttributeId = NULL;
+   parser->m_declEntity = NULL;
+@@ -1872,29 +1901,20 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
+        to detect errors based on that fact.
+     */
+     parser->m_errorCode
+-        = parser->m_processor(parser, parser->m_bufferPtr,
+-                              parser->m_parseEndPtr, &parser->m_bufferPtr);
+        = callProcessor(parser, parser->m_bufferPtr, parser->m_parseEndPtr,
+                        &parser->m_bufferPtr);
+ 
+     if (parser->m_errorCode == XML_ERROR_NONE) {
+       switch (parser->m_parsingStatus.parsing) {
+       case XML_SUSPENDED:
+-        /* It is hard to be certain, but it seems that this case
+-         * cannot occur.  This code is cleaning up a previous parse
+-         * with no new data (since len == 0).  Changing the parsing
+-         * state requires getting to execute a handler function, and
+-         * there doesn't seem to be an opportunity for that while in
+-         * this circumstance.
+-         *
+-         * Given the uncertainty, we retain the code but exclude it
+-         * from coverage tests.
+-         *
+-         * LCOV_EXCL_START
+-         */
+        /* While we added no new data, the finalBuffer flag may have caused
+         * us to parse previously-unparsed data in the internal buffer.
+         * If that triggered a callback to the application, it would have
+         * had an opportunity to suspend parsing. */
+         XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
+                           parser->m_bufferPtr, &parser->m_position);
+         parser->m_positionPtr = parser->m_bufferPtr;
+         return XML_STATUS_SUSPENDED;
+-        /* LCOV_EXCL_STOP */
+       case XML_INITIALIZED:
+       case XML_PARSING:
+         parser->m_parsingStatus.parsing = XML_FINISHED;
+@@ -1924,7 +1944,7 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
+     parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
+ 
+     parser->m_errorCode
+-        = parser->m_processor(parser, s, parser->m_parseEndPtr = s + len, &end);
+        = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end);
+ 
+     if (parser->m_errorCode != XML_ERROR_NONE) {
+       parser->m_eventEndPtr = parser->m_eventPtr;
+@@ -2027,8 +2047,8 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
+   parser->m_parseEndByteIndex += len;
+   parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
+ 
+-  parser->m_errorCode = parser->m_processor(
+-      parser, start, parser->m_parseEndPtr, &parser->m_bufferPtr);
+  parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr,
+                                      &parser->m_bufferPtr);
+ 
+   if (parser->m_errorCode != XML_ERROR_NONE) {
+     parser->m_eventEndPtr = parser->m_eventPtr;
+@@ -2220,7 +2240,7 @@ XML_ResumeParser(XML_Parser parser) {
+   }
+   parser->m_parsingStatus.parsing = XML_PARSING;
+ 
+-  parser->m_errorCode = parser->m_processor(
+  parser->m_errorCode = callProcessor(
+       parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
+ 
+   if (parser->m_errorCode != XML_ERROR_NONE) {
+-- 
+2.40.0

diff --git a/meta/recipes-core/expat/expat/CVE-2023-52425-0003.patch b/meta/recipes-core/expat/expat/CVE-2023-52425-0003.patch new file mode 100644 index 0000000000..e5c3606e19 --- /dev/null +++ b/meta/recipes-core/expat/expat/CVE-2023-52425-0003.patch
@@ -0,0 +1,222 @@
	1	From 9cdf9b8d77d5c2c2a27d15fb68dd3f83cafb45a1 Mon Sep 17 00:00:00 2001
	2	From: Snild Dolkow <snild@sony.com>
	3	Date: Thu, 17 Aug 2023 16:25:26 +0200
	4	Subject: [PATCH] Skip parsing after repeated partials on the same token When
	5	the parse buffer contains the starting bytes of a token but not all of them,
	6	we cannot parse the token to completion. We call this a partial token. When
	7	this happens, the parse position is reset to the start of the token, and the
	8	parse() call returns. The client is then expected to provide more data and
	9	call parse() again.
	10	MIME-Version: 1.0
	11	Content-Type: text/plain; charset=UTF-8
	12	Content-Transfer-Encoding: 8bit
	13
	14	In extreme cases, this means that the bytes of a token may be parsed
	15	many times: once for every buffer refill required before the full token
	16	is present in the buffer.
	17
	18	Math:
	19	Assume there's a token of T bytes
	20	Assume the client fills the buffer in chunks of X bytes
	21	We'll try to parse X, 2X, 3X, 4X ... until mX == T (technically >=)
	22	That's (m²+m)X/2 = (T²/X+T)/2 bytes parsed (arithmetic progression)
	23	While it is alleviated by larger refills, this amounts to O(T²)
	24
	25	Expat grows its internal buffer by doubling it when necessary, but has
	26	no way to inform the client about how much space is available. Instead,
	27	we add a heuristic that skips parsing when we've repeatedly stopped on
	28	an incomplete token. Specifically:
	29
	30	* Only try to parse if we have a certain amount of data buffered
	31	* Every time we stop on an incomplete token, double the threshold
	32	* As soon as any token completes, the threshold is reset
	33
	34	This means that when we get stuck on an incomplete token, the threshold
	35	grows exponentially, effectively making the client perform larger buffer
	36	fills, limiting how many times we can end up re-parsing the same bytes.
	37
	38	Math:
	39	Assume there's a token of T bytes
	40	Assume the client fills the buffer in chunks of X bytes
	41	We'll try to parse X, 2X, 4X, 8X ... until (2^k)X == T (or larger)
	42	That's (2^(k+1)-1)X bytes parsed -- e.g. 15X if T = 8X
	43	This is equal to 2T-X, which amounts to O(T)
	44
	45	We could've chosen a faster growth rate, e.g. 4 or 8. Those seem to
	46	increase performance further, at the cost of further increasing the
	47	risk of growing the buffer more than necessary. This can easily be
	48	adjusted in the future, if desired.
	49
	50	This is all completely transparent to the client, except for:
	51	1. possible delay of some callbacks (when our heuristic overshoots)
	52	2. apps that never do isFinal=XML_TRUE could miss data at the end
	53
	54	For the affected testdata, this change shows a 100-400x speedup.
	55	The recset.xml benchmark shows no clear change either way.
	56
	57	Before:
	58	benchmark -n ../testdata/largefiles/recset.xml 65535 3
	59	3 loops, with buffer size 65535. Average time per loop: 0.270223
	60	benchmark -n ../testdata/largefiles/aaaaaa_attr.xml 4096 3
	61	3 loops, with buffer size 4096. Average time per loop: 15.033048
	62	benchmark -n ../testdata/largefiles/aaaaaa_cdata.xml 4096 3
	63	3 loops, with buffer size 4096. Average time per loop: 0.018027
	64	benchmark -n ../testdata/largefiles/aaaaaa_comment.xml 4096 3
	65	3 loops, with buffer size 4096. Average time per loop: 11.775362
	66	benchmark -n ../testdata/largefiles/aaaaaa_tag.xml 4096 3
	67	3 loops, with buffer size 4096. Average time per loop: 11.711414
	68	benchmark -n ../testdata/largefiles/aaaaaa_text.xml 4096 3
	69	3 loops, with buffer size 4096. Average time per loop: 0.019362
	70
	71	After:
	72	./run.sh benchmark -n ../testdata/largefiles/recset.xml 65535 3
	73	3 loops, with buffer size 65535. Average time per loop: 0.269030
	74	./run.sh benchmark -n ../testdata/largefiles/aaaaaa_attr.xml 4096 3
	75	3 loops, with buffer size 4096. Average time per loop: 0.044794
	76	./run.sh benchmark -n ../testdata/largefiles/aaaaaa_cdata.xml 4096 3
	77	3 loops, with buffer size 4096. Average time per loop: 0.016377
	78	./run.sh benchmark -n ../testdata/largefiles/aaaaaa_comment.xml 4096 3
	79	3 loops, with buffer size 4096. Average time per loop: 0.027022
	80	./run.sh benchmark -n ../testdata/largefiles/aaaaaa_tag.xml 4096 3
	81	3 loops, with buffer size 4096. Average time per loop: 0.099360
	82	./run.sh benchmark -n ../testdata/largefiles/aaaaaa_text.xml 4096 3
	83	3 loops, with buffer size 4096. Average time per loop: 0.017956
	84
	85	CVE: CVE-2023-52425
	86
	87	Upstream-Status: Backport [https://github.com/libexpat/libexpat/commit/9cdf9b8d77d5c2c2a27d15fb68dd3f83cafb45a1]
	88
	89	Signed-off-by: Meenali Gupta <meenali.gupta@windriver.com>
	90	---
	91	lib/xmlparse.c \| 58 +++++++++++++++++++++++++++++++++-----------------
	92	1 file changed, 39 insertions(+), 19 deletions(-)
	93
	94	diff --git a/lib/xmlparse.c b/lib/xmlparse.c
	95	index bbffcaa..5695417 100644
	96	--- a/lib/xmlparse.c
	97	+++ b/lib/xmlparse.c
	98	@@ -81,6 +81,7 @@
	99	# endif
	100	#endif
	101
	102	+#include <stdbool.h>
	103	#include <stddef.h>
	104	#include <string.h> /* memset(), memcpy() */
	105	#include <assert.h>
	106	@@ -629,6 +630,7 @@ struct XML_ParserStruct {
	107	const char *m_bufferLim;
	108	XML_Index m_parseEndByteIndex;
	109	const char *m_parseEndPtr;
	110	+ size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */
	111	XML_Char *m_dataBuf;
	112	XML_Char *m_dataBufEnd;
	113	XML_StartElementHandler m_startElementHandler;
	114	@@ -960,6 +962,32 @@ get_hash_secret_salt(XML_Parser parser) {
	115	return parser->m_hash_secret_salt;
	116	}
	117
	118	+static enum XML_Error
	119	+callProcessor(XML_Parser parser, const char start, const char end,
	120	+ const char **endPtr) {
	121	+ const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start);
	122	+
	123	+ if (! parser->m_parsingStatus.finalBuffer) {
	124	+ // Heuristic: don't try to parse a partial token again until the amount of
	125	+ // available data has increased significantly.
	126	+ const size_t had_before = parser->m_partialTokenBytesBefore;
	127	+ const bool enough = (have_now >= 2 * had_before);
	128	+
	129	+ if (! enough) {
	130	+ *endPtr = start; // callers may expect this to be set
	131	+ return XML_ERROR_NONE;
	132	+ }
	133	+ }
	134	+ const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr);
	135	+ // if we consumed nothing, remember what we had on this parse attempt.
	136	+ if (*endPtr == start) {
	137	+ parser->m_partialTokenBytesBefore = have_now;
	138	+ } else {
	139	+ parser->m_partialTokenBytesBefore = 0;
	140	+ }
	141	+ return ret;
	142	+}
	143	+
	144	static XML_Bool /* only valid for root parser */
	145	startParsing(XML_Parser parser) {
	146	/* hash functions must be initialized before setContext() is called */
	147	@@ -1141,6 +1169,7 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) {
	148	parser->m_bufferEnd = parser->m_buffer;
	149	parser->m_parseEndByteIndex = 0;
	150	parser->m_parseEndPtr = NULL;
	151	+ parser->m_partialTokenBytesBefore = 0;
	152	parser->m_declElementType = NULL;
	153	parser->m_declAttributeId = NULL;
	154	parser->m_declEntity = NULL;
	155	@@ -1872,29 +1901,20 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
	156	to detect errors based on that fact.
	157	*/
	158	parser->m_errorCode
	159	- = parser->m_processor(parser, parser->m_bufferPtr,
	160	- parser->m_parseEndPtr, &parser->m_bufferPtr);
	161	+ = callProcessor(parser, parser->m_bufferPtr, parser->m_parseEndPtr,
	162	+ &parser->m_bufferPtr);
	163
	164	if (parser->m_errorCode == XML_ERROR_NONE) {
	165	switch (parser->m_parsingStatus.parsing) {
	166	case XML_SUSPENDED:
	167	- /* It is hard to be certain, but it seems that this case
	168	- * cannot occur. This code is cleaning up a previous parse
	169	- * with no new data (since len == 0). Changing the parsing
	170	- * state requires getting to execute a handler function, and
	171	- * there doesn't seem to be an opportunity for that while in
	172	- * this circumstance.
	173	- *
	174	- * Given the uncertainty, we retain the code but exclude it
	175	- * from coverage tests.
	176	- *
	177	- * LCOV_EXCL_START
	178	- */
	179	+ /* While we added no new data, the finalBuffer flag may have caused
	180	+ * us to parse previously-unparsed data in the internal buffer.
	181	+ * If that triggered a callback to the application, it would have
	182	+ * had an opportunity to suspend parsing. */
	183	XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
	184	parser->m_bufferPtr, &parser->m_position);
	185	parser->m_positionPtr = parser->m_bufferPtr;
	186	return XML_STATUS_SUSPENDED;
	187	- /* LCOV_EXCL_STOP */
	188	case XML_INITIALIZED:
	189	case XML_PARSING:
	190	parser->m_parsingStatus.parsing = XML_FINISHED;
	191	@@ -1924,7 +1944,7 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
	192	parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
	193
	194	parser->m_errorCode
	195	- = parser->m_processor(parser, s, parser->m_parseEndPtr = s + len, &end);
	196	+ = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end);
	197
	198	if (parser->m_errorCode != XML_ERROR_NONE) {
	199	parser->m_eventEndPtr = parser->m_eventPtr;
	200	@@ -2027,8 +2047,8 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
	201	parser->m_parseEndByteIndex += len;
	202	parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
	203
	204	- parser->m_errorCode = parser->m_processor(
	205	- parser, start, parser->m_parseEndPtr, &parser->m_bufferPtr);
	206	+ parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr,
	207	+ &parser->m_bufferPtr);
	208
	209	if (parser->m_errorCode != XML_ERROR_NONE) {
	210	parser->m_eventEndPtr = parser->m_eventPtr;
	211	@@ -2220,7 +2240,7 @@ XML_ResumeParser(XML_Parser parser) {
	212	}
	213	parser->m_parsingStatus.parsing = XML_PARSING;
	214
	215	- parser->m_errorCode = parser->m_processor(
	216	+ parser->m_errorCode = callProcessor(
	217	parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
	218
	219	if (parser->m_errorCode != XML_ERROR_NONE) {
	220	--
	221	2.40.0
	222