expat: fix CVE-2022-25235

Backport patch to fix CVE-2022-25235 for expat. CVE: CVE-2022-25235 (From OE-Core rev: 60dd7d2deeda838346f30b6f8de28dfac7efac0d) Signed-off-by: Kai Kang <kai.kang@windriver.com> Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
author: Kai Kang <kai.kang@windriver.com> 2022-03-11 21:58:34 +0800
committer: Richard Purdie <richard.purdie@linuxfoundation.org> 2022-03-22 22:18:51 +0000
commit: 37a86b19585fefbb83bc94572c5742c2a60c4490 (patch)
tree: d5da85ad5ca0f3637445a0f463722adc268fbcdc
parent: 02a3d2d460c8172251c783d52b00ca256d23f351 (diff)
download: poky-37a86b19585fefbb83bc94572c5742c2a60c4490.tar.gz
2 files changed, 262 insertions, 0 deletions
diff --git a/meta/recipes-core/expat/expat/CVE-2022-25235.patch b/meta/recipes-core/expat/expat/CVE-2022-25235.patch
new file mode 100644
index 0000000000..9febeae609
--- /dev/null
+++ b/meta/recipes-core/expat/expat/CVE-2022-25235.patch
@@ -0,0 +1,261 @@
+Upstream-Status: Backport [https://github.com/libexpat/libexpat/commit/306b721]
+CVE: CVE-2022-25235
+The commit is a merge commit, and this patch is created by:
+$ git show -m -p --stat 306b72134f157bbfd1637b20a22cabf4acfa136a
+Remove modification for expat/Changes which fails to be applied.
+Signed-off-by: Kai Kang <kai.kang@windriver.com>
+commit 306b72134f157bbfd1637b20a22cabf4acfa136a (from 2cc97e875ef84da4bcf55156c83599116f7523b4)
+Merge: 2cc97e87 c16300f0
+Author: Sebastian Pipping <sebastian@pipping.org>
+Date:   Fri Feb 18 20:12:32 2022 +0100
+    Merge pull request #562 from libexpat/utf8-security
+    
+    [CVE-2022-25235] lib: Protect against malformed encoding (e.g. malformed UTF-8)
+---
+ expat/Changes           |   7 ++++
+ expat/lib/xmltok.c      |   5 ---
+ expat/lib/xmltok_impl.c |  18 ++++----
+ expat/tests/runtests.c  | 109 ++++++++++++++++++++++++++++++++++++++++++++++++
+ 4 files changed, 127 insertions(+), 12 deletions(-)
+diff --git a/lib/xmltok.c b/lib/xmltok.c
+index a72200e8..3bddf125 100644
+--- a/lib/xmltok.c
+++ b/lib/xmltok.c
+@@ -98,11 +98,6 @@
+         + ((((byte)[1]) & 3) << 1) + ((((byte)[2]) >> 5) & 1)]                 \
+    & (1u << (((byte)[2]) & 0x1F)))
+ 
+-#define UTF8_GET_NAMING(pages, p, n)                                           \
+-  ((n) == 2                                                                    \
+-       ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p))                   \
+-       : ((n) == 3 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) : 0))
+-
+ /* Detection of invalid UTF-8 sequences is based on Table 3.1B
+    of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/
+    with the additional restriction of not allowing the Unicode
+diff --git a/lib/xmltok_impl.c b/lib/xmltok_impl.c
+index 0430591b..84ff35f9 100644
+--- a/lib/xmltok_impl.c
+++ b/lib/xmltok_impl.c
+@@ -69,7 +69,7 @@
+   case BT_LEAD##n:                                                             \
+     if (end - ptr < n)                                                         \
+       return XML_TOK_PARTIAL_CHAR;                                             \
+-    if (! IS_NAME_CHAR(enc, ptr, n)) {                                         \
+    if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NAME_CHAR(enc, ptr, n)) {         \
+       *nextTokPtr = ptr;                                                       \
+       return XML_TOK_INVALID;                                                  \
+     }                                                                          \
+@@ -98,7 +98,7 @@
+   case BT_LEAD##n:                                                             \
+     if (end - ptr < n)                                                         \
+       return XML_TOK_PARTIAL_CHAR;                                             \
+-    if (! IS_NMSTRT_CHAR(enc, ptr, n)) {                                       \
+    if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NMSTRT_CHAR(enc, ptr, n)) {       \
+       *nextTokPtr = ptr;                                                       \
+       return XML_TOK_INVALID;                                                  \
+     }                                                                          \
+@@ -1142,6 +1142,10 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
+   case BT_LEAD##n:                                                             \
+     if (end - ptr < n)                                                         \
+       return XML_TOK_PARTIAL_CHAR;                                             \
+    if (IS_INVALID_CHAR(enc, ptr, n)) {                                        \
+      *nextTokPtr = ptr;                                                       \
+      return XML_TOK_INVALID;                                                  \
+    }                                                                          \
+     if (IS_NMSTRT_CHAR(enc, ptr, n)) {                                         \
+       ptr += n;                                                                \
+       tok = XML_TOK_NAME;                                                      \
+@@ -1270,7 +1274,7 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end,
+     switch (BYTE_TYPE(enc, ptr)) {
+ #  define LEAD_CASE(n)                                                         \
+   case BT_LEAD##n:                                                             \
+-    ptr += n;                                                                  \
+    ptr += n; /* NOTE: The encoding has already been validated. */             \
+     break;
+       LEAD_CASE(2)
+       LEAD_CASE(3)
+@@ -1339,7 +1343,7 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end,
+     switch (BYTE_TYPE(enc, ptr)) {
+ #  define LEAD_CASE(n)                                                         \
+   case BT_LEAD##n:                                                             \
+-    ptr += n;                                                                  \
+    ptr += n; /* NOTE: The encoding has already been validated. */             \
+     break;
+       LEAD_CASE(2)
+       LEAD_CASE(3)
+@@ -1518,7 +1522,7 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr, int attsMax,
+       state = inName;                                                          \
+     }
+ #  define LEAD_CASE(n)                                                         \
+-  case BT_LEAD##n:                                                             \
+  case BT_LEAD##n: /* NOTE: The encoding has already been validated. */        \
+     START_NAME ptr += (n - MINBPC(enc));                                       \
+     break;
+       LEAD_CASE(2)
+@@ -1730,7 +1734,7 @@ PREFIX(nameLength)(const ENCODING *enc, const char *ptr) {
+     switch (BYTE_TYPE(enc, ptr)) {
+ #  define LEAD_CASE(n)                                                         \
+   case BT_LEAD##n:                                                             \
+-    ptr += n;                                                                  \
+    ptr += n; /* NOTE: The encoding has already been validated. */             \
+     break;
+       LEAD_CASE(2)
+       LEAD_CASE(3)
+@@ -1775,7 +1779,7 @@ PREFIX(updatePosition)(const ENCODING *enc, const char *ptr, const char *end,
+     switch (BYTE_TYPE(enc, ptr)) {
+ #  define LEAD_CASE(n)                                                         \
+   case BT_LEAD##n:                                                             \
+-    ptr += n;                                                                  \
+    ptr += n; /* NOTE: The encoding has already been validated. */             \
+     pos->columnNumber++;                                                       \
+     break;
+       LEAD_CASE(2)
+diff --git a/tests/runtests.c b/tests/runtests.c
+index bc5344b1..9b155b82 100644
+--- a/tests/runtests.c
+++ b/tests/runtests.c
+@@ -5998,6 +5998,105 @@ START_TEST(test_utf8_in_cdata_section_2) {
+ }
+ END_TEST
+ 
+START_TEST(test_utf8_in_start_tags) {
+  struct test_case {
+    bool goodName;
+    bool goodNameStart;
+    const char *tagName;
+  };
+
+  // The idea with the tests below is this:
+  // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
+  // go to isNever and are hence not a concern.
+  //
+  // We start with a character that is a valid name character
+  // (or even name-start character, see XML 1.0r4 spec) and then we flip
+  // single bits at places where (1) the result leaves the UTF-8 encoding space
+  // and (2) we stay in the same n-byte sequence family.
+  //
+  // The flipped bits are highlighted in angle brackets in comments,
+  // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
+  // the most significant bit to 1 to leave UTF-8 encoding space.
+  struct test_case cases[] = {
+      // 1-byte UTF-8: [0xxx xxxx]
+      {true, true, "\x3A"},   // [0011 1010] = ASCII colon ':'
+      {false, false, "\xBA"}, // [<1>011 1010]
+      {true, false, "\x39"},  // [0011 1001] = ASCII nine '9'
+      {false, false, "\xB9"}, // [<1>011 1001]
+
+      // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
+      {true, true, "\xDB\xA5"},   // [1101 1011] [1010 0101] =
+                                  // Arabic small waw U+06E5
+      {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
+      {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
+      {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
+      {true, false, "\xCC\x81"},  // [1100 1100] [1000 0001] =
+                                  // combining char U+0301
+      {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
+      {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
+      {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
+
+      // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
+      {true, true, "\xE0\xA4\x85"},   // [1110 0000] [1010 0100] [1000 0101] =
+                                      // Devanagari Letter A U+0905
+      {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
+      {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
+      {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
+      {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
+      {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
+      {true, false, "\xE0\xA4\x81"},  // [1110 0000] [1010 0100] [1000 0001] =
+                                      // combining char U+0901
+      {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
+      {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
+      {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
+      {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
+      {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
+  };
+  const bool atNameStart[] = {true, false};
+
+  size_t i = 0;
+  char doc[1024];
+  size_t failCount = 0;
+
+  for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
+    size_t j = 0;
+    for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
+      const bool expectedSuccess
+          = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
+      sprintf(doc, "<%s%s><!--", atNameStart[j] ? "" : "a", cases[i].tagName);
+      XML_Parser parser = XML_ParserCreate(NULL);
+
+      const enum XML_Status status
+          = XML_Parse(parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
+
+      bool success = true;
+      if ((status == XML_STATUS_OK) != expectedSuccess) {
+        success = false;
+      }
+      if ((status == XML_STATUS_ERROR)
+          && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
+        success = false;
+      }
+
+      if (! success) {
+        fprintf(
+            stderr,
+            "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
+            (unsigned)i + 1u, atNameStart[j] ? "    " : "not ",
+            (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
+        failCount++;
+      }
+
+      XML_ParserFree(parser);
+    }
+  }
+
+  if (failCount > 0) {
+    fail("UTF-8 regression detected");
+  }
+}
+END_TEST
+
+ /* Test trailing spaces in elements are accepted */
+ static void XMLCALL
+ record_element_end_handler(void *userData, const XML_Char *name) {
+@@ -6175,6 +6274,14 @@ START_TEST(test_bad_doctype) {
+ }
+ END_TEST
+ 
+START_TEST(test_bad_doctype_utf8) {
+  const char *text = "<!DOCTYPE \xDB\x25"
+                     "doc><doc/>"; // [1101 1011] [<0>010 0101]
+  expect_failure(text, XML_ERROR_INVALID_TOKEN,
+                 "Invalid UTF-8 in DOCTYPE not faulted");
+}
+END_TEST
+
+ START_TEST(test_bad_doctype_utf16) {
+   const char text[] =
+       /* <!DOCTYPE doc [ \x06f2 ]><doc/>
+@@ -11870,6 +11977,7 @@ make_suite(void) {
+   tcase_add_test(tc_basic, test_ext_entity_utf8_non_bom);
+   tcase_add_test(tc_basic, test_utf8_in_cdata_section);
+   tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
+  tcase_add_test(tc_basic, test_utf8_in_start_tags);
+   tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
+   tcase_add_test(tc_basic, test_utf16_attribute);
+   tcase_add_test(tc_basic, test_utf16_second_attr);
+@@ -11878,6 +11986,7 @@ make_suite(void) {
+   tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
+   tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
+   tcase_add_test(tc_basic, test_bad_doctype);
+  tcase_add_test(tc_basic, test_bad_doctype_utf8);
+   tcase_add_test(tc_basic, test_bad_doctype_utf16);
+   tcase_add_test(tc_basic, test_bad_doctype_plus);
+   tcase_add_test(tc_basic, test_bad_doctype_star);
diff --git a/meta/recipes-core/expat/expat_2.2.10.bb b/meta/recipes-core/expat/expat_2.2.10.bb
index a851e54b2a..0b3331981c 100644
--- a/meta/recipes-core/expat/expat_2.2.10.bb
+++ b/meta/recipes-core/expat/expat_2.2.10.bb
@@ -17,6 +17,7 @@ SRC_URI = "https://github.com/libexpat/libexpat/releases/download/R_${VERSION_TA
           file://CVE-2021-46143.patch \
           file://CVE-2022-23852.patch \
           file://CVE-2022-23990.patch \
+           file://CVE-2022-25235.patch \
           "
 UPSTREAM_CHECK_URI = "https://github.com/libexpat/libexpat/releases/"
author	Kai Kang <kai.kang@windriver.com>	2022-03-11 21:58:34 +0800
committer	Richard Purdie <richard.purdie@linuxfoundation.org>	2022-03-22 22:18:51 +0000
commit	37a86b19585fefbb83bc94572c5742c2a60c4490 (patch)
tree	d5da85ad5ca0f3637445a0f463722adc268fbcdc
parent	02a3d2d460c8172251c783d52b00ca256d23f351 (diff)
download	poky-37a86b19585fefbb83bc94572c5742c2a60c4490.tar.gz

diff --git a/meta/recipes-core/expat/expat/CVE-2022-25235.patch b/meta/recipes-core/expat/expat/CVE-2022-25235.patch new file mode 100644 index 0000000000..9febeae609 --- /dev/null +++ b/meta/recipes-core/expat/expat/CVE-2022-25235.patch
@@ -0,0 +1,261 @@
		1	Upstream-Status: Backport [https://github.com/libexpat/libexpat/commit/306b721]
		2	CVE: CVE-2022-25235
		3
		4	The commit is a merge commit, and this patch is created by:
		5
		6	$ git show -m -p --stat 306b72134f157bbfd1637b20a22cabf4acfa136a
		7
		8	Remove modification for expat/Changes which fails to be applied.
		9
		10	Signed-off-by: Kai Kang <kai.kang@windriver.com>
		11
		12	commit 306b72134f157bbfd1637b20a22cabf4acfa136a (from 2cc97e875ef84da4bcf55156c83599116f7523b4)
		13	Merge: 2cc97e87 c16300f0
		14	Author: Sebastian Pipping <sebastian@pipping.org>
		15	Date: Fri Feb 18 20:12:32 2022 +0100
		16
		17	Merge pull request #562 from libexpat/utf8-security
		18
		19	[CVE-2022-25235] lib: Protect against malformed encoding (e.g. malformed UTF-8)
		20	---
		21	expat/Changes \| 7 ++++
		22	expat/lib/xmltok.c \| 5 ---
		23	expat/lib/xmltok_impl.c \| 18 ++++----
		24	expat/tests/runtests.c \| 109 ++++++++++++++++++++++++++++++++++++++++++++++++
		25	4 files changed, 127 insertions(+), 12 deletions(-)
		26
		27	diff --git a/lib/xmltok.c b/lib/xmltok.c
		28	index a72200e8..3bddf125 100644
		29	--- a/lib/xmltok.c
		30	+++ b/lib/xmltok.c
		31	@@ -98,11 +98,6 @@
		32	+ ((((byte)[1]) & 3) << 1) + ((((byte)[2]) >> 5) & 1)] \
		33	& (1u << (((byte)[2]) & 0x1F)))
		34
		35	-#define UTF8_GET_NAMING(pages, p, n) \
		36	- ((n) == 2 \
		37	- ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \
		38	- : ((n) == 3 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) : 0))
		39	-
		40	/* Detection of invalid UTF-8 sequences is based on Table 3.1B
		41	of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/
		42	with the additional restriction of not allowing the Unicode
		43	diff --git a/lib/xmltok_impl.c b/lib/xmltok_impl.c
		44	index 0430591b..84ff35f9 100644
		45	--- a/lib/xmltok_impl.c
		46	+++ b/lib/xmltok_impl.c
		47	@@ -69,7 +69,7 @@
		48	case BT_LEAD##n: \
		49	if (end - ptr < n) \
		50	return XML_TOK_PARTIAL_CHAR; \
		51	- if (! IS_NAME_CHAR(enc, ptr, n)) { \
		52	+ if (IS_INVALID_CHAR(enc, ptr, n) \|\| ! IS_NAME_CHAR(enc, ptr, n)) { \
		53	*nextTokPtr = ptr; \
		54	return XML_TOK_INVALID; \
		55	} \
		56	@@ -98,7 +98,7 @@
		57	case BT_LEAD##n: \
		58	if (end - ptr < n) \
		59	return XML_TOK_PARTIAL_CHAR; \
		60	- if (! IS_NMSTRT_CHAR(enc, ptr, n)) { \
		61	+ if (IS_INVALID_CHAR(enc, ptr, n) \|\| ! IS_NMSTRT_CHAR(enc, ptr, n)) { \
		62	*nextTokPtr = ptr; \
		63	return XML_TOK_INVALID; \
		64	} \
		65	@@ -1142,6 +1142,10 @@ PREFIX(prologTok)(const ENCODING enc, const char ptr, const char *end,
		66	case BT_LEAD##n: \
		67	if (end - ptr < n) \
		68	return XML_TOK_PARTIAL_CHAR; \
		69	+ if (IS_INVALID_CHAR(enc, ptr, n)) { \
		70	+ *nextTokPtr = ptr; \
		71	+ return XML_TOK_INVALID; \
		72	+ } \
		73	if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
		74	ptr += n; \
		75	tok = XML_TOK_NAME; \
		76	@@ -1270,7 +1274,7 @@ PREFIX(attributeValueTok)(const ENCODING enc, const char ptr, const char *end,
		77	switch (BYTE_TYPE(enc, ptr)) {
		78	# define LEAD_CASE(n) \
		79	case BT_LEAD##n: \
		80	- ptr += n; \
		81	+ ptr += n; /* NOTE: The encoding has already been validated. */ \
		82	break;
		83	LEAD_CASE(2)
		84	LEAD_CASE(3)
		85	@@ -1339,7 +1343,7 @@ PREFIX(entityValueTok)(const ENCODING enc, const char ptr, const char *end,
		86	switch (BYTE_TYPE(enc, ptr)) {
		87	# define LEAD_CASE(n) \
		88	case BT_LEAD##n: \
		89	- ptr += n; \
		90	+ ptr += n; /* NOTE: The encoding has already been validated. */ \
		91	break;
		92	LEAD_CASE(2)
		93	LEAD_CASE(3)
		94	@@ -1518,7 +1522,7 @@ PREFIX(getAtts)(const ENCODING enc, const char ptr, int attsMax,
		95	state = inName; \
		96	}
		97	# define LEAD_CASE(n) \
		98	- case BT_LEAD##n: \
		99	+ case BT_LEAD##n: /* NOTE: The encoding has already been validated. */ \
		100	START_NAME ptr += (n - MINBPC(enc)); \
		101	break;
		102	LEAD_CASE(2)
		103	@@ -1730,7 +1734,7 @@ PREFIX(nameLength)(const ENCODING enc, const char ptr) {
		104	switch (BYTE_TYPE(enc, ptr)) {
		105	# define LEAD_CASE(n) \
		106	case BT_LEAD##n: \
		107	- ptr += n; \
		108	+ ptr += n; /* NOTE: The encoding has already been validated. */ \
		109	break;
		110	LEAD_CASE(2)
		111	LEAD_CASE(3)
		112	@@ -1775,7 +1779,7 @@ PREFIX(updatePosition)(const ENCODING enc, const char ptr, const char *end,
		113	switch (BYTE_TYPE(enc, ptr)) {
		114	# define LEAD_CASE(n) \
		115	case BT_LEAD##n: \
		116	- ptr += n; \
		117	+ ptr += n; /* NOTE: The encoding has already been validated. */ \
		118	pos->columnNumber++; \
		119	break;
		120	LEAD_CASE(2)
		121	diff --git a/tests/runtests.c b/tests/runtests.c
		122	index bc5344b1..9b155b82 100644
		123	--- a/tests/runtests.c
		124	+++ b/tests/runtests.c
		125	@@ -5998,6 +5998,105 @@ START_TEST(test_utf8_in_cdata_section_2) {
		126	}
		127	END_TEST
		128
		129	+START_TEST(test_utf8_in_start_tags) {
		130	+ struct test_case {
		131	+ bool goodName;
		132	+ bool goodNameStart;
		133	+ const char *tagName;
		134	+ };
		135	+
		136	+ // The idea with the tests below is this:
		137	+ // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
		138	+ // go to isNever and are hence not a concern.
		139	+ //
		140	+ // We start with a character that is a valid name character
		141	+ // (or even name-start character, see XML 1.0r4 spec) and then we flip
		142	+ // single bits at places where (1) the result leaves the UTF-8 encoding space
		143	+ // and (2) we stay in the same n-byte sequence family.
		144	+ //
		145	+ // The flipped bits are highlighted in angle brackets in comments,
		146	+ // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
		147	+ // the most significant bit to 1 to leave UTF-8 encoding space.
		148	+ struct test_case cases[] = {
		149	+ // 1-byte UTF-8: [0xxx xxxx]
		150	+ {true, true, "\x3A"}, // [0011 1010] = ASCII colon ':'
		151	+ {false, false, "\xBA"}, // [<1>011 1010]
		152	+ {true, false, "\x39"}, // [0011 1001] = ASCII nine '9'
		153	+ {false, false, "\xB9"}, // [<1>011 1001]
		154	+
		155	+ // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
		156	+ {true, true, "\xDB\xA5"}, // [1101 1011] [1010 0101] =
		157	+ // Arabic small waw U+06E5
		158	+ {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
		159	+ {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
		160	+ {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
		161	+ {true, false, "\xCC\x81"}, // [1100 1100] [1000 0001] =
		162	+ // combining char U+0301
		163	+ {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
		164	+ {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
		165	+ {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
		166	+
		167	+ // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
		168	+ {true, true, "\xE0\xA4\x85"}, // [1110 0000] [1010 0100] [1000 0101] =
		169	+ // Devanagari Letter A U+0905
		170	+ {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
		171	+ {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
		172	+ {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
		173	+ {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
		174	+ {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
		175	+ {true, false, "\xE0\xA4\x81"}, // [1110 0000] [1010 0100] [1000 0001] =
		176	+ // combining char U+0901
		177	+ {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
		178	+ {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
		179	+ {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
		180	+ {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
		181	+ {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
		182	+ };
		183	+ const bool atNameStart[] = {true, false};
		184	+
		185	+ size_t i = 0;
		186	+ char doc[1024];
		187	+ size_t failCount = 0;
		188	+
		189	+ for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
		190	+ size_t j = 0;
		191	+ for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
		192	+ const bool expectedSuccess
		193	+ = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
		194	+ sprintf(doc, "<%s%s><!--", atNameStart[j] ? "" : "a", cases[i].tagName);
		195	+ XML_Parser parser = XML_ParserCreate(NULL);
		196	+
		197	+ const enum XML_Status status
		198	+ = XML_Parse(parser, doc, (int)strlen(doc), /isFinal=/XML_FALSE);
		199	+
		200	+ bool success = true;
		201	+ if ((status == XML_STATUS_OK) != expectedSuccess) {
		202	+ success = false;
		203	+ }
		204	+ if ((status == XML_STATUS_ERROR)
		205	+ && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
		206	+ success = false;
		207	+ }
		208	+
		209	+ if (! success) {
		210	+ fprintf(
		211	+ stderr,
		212	+ "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
		213	+ (unsigned)i + 1u, atNameStart[j] ? " " : "not ",
		214	+ (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
		215	+ failCount++;
		216	+ }
		217	+
		218	+ XML_ParserFree(parser);
		219	+ }
		220	+ }
		221	+
		222	+ if (failCount > 0) {
		223	+ fail("UTF-8 regression detected");
		224	+ }
		225	+}
		226	+END_TEST
		227	+
		228	/* Test trailing spaces in elements are accepted */
		229	static void XMLCALL
		230	record_element_end_handler(void userData, const XML_Char name) {
		231	@@ -6175,6 +6274,14 @@ START_TEST(test_bad_doctype) {
		232	}
		233	END_TEST
		234
		235	+START_TEST(test_bad_doctype_utf8) {
		236	+ const char *text = "<!DOCTYPE \xDB\x25"
		237	+ "doc><doc/>"; // [1101 1011] [<0>010 0101]
		238	+ expect_failure(text, XML_ERROR_INVALID_TOKEN,
		239	+ "Invalid UTF-8 in DOCTYPE not faulted");
		240	+}
		241	+END_TEST
		242	+
		243	START_TEST(test_bad_doctype_utf16) {
		244	const char text[] =
		245	/* <!DOCTYPE doc [ \x06f2 ]><doc/>
		246	@@ -11870,6 +11977,7 @@ make_suite(void) {
		247	tcase_add_test(tc_basic, test_ext_entity_utf8_non_bom);
		248	tcase_add_test(tc_basic, test_utf8_in_cdata_section);
		249	tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
		250	+ tcase_add_test(tc_basic, test_utf8_in_start_tags);
		251	tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
		252	tcase_add_test(tc_basic, test_utf16_attribute);
		253	tcase_add_test(tc_basic, test_utf16_second_attr);
		254	@@ -11878,6 +11986,7 @@ make_suite(void) {
		255	tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
		256	tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
		257	tcase_add_test(tc_basic, test_bad_doctype);
		258	+ tcase_add_test(tc_basic, test_bad_doctype_utf8);
		259	tcase_add_test(tc_basic, test_bad_doctype_utf16);
		260	tcase_add_test(tc_basic, test_bad_doctype_plus);
		261	tcase_add_test(tc_basic, test_bad_doctype_star);


diff --git a/meta/recipes-core/expat/expat_2.2.10.bb b/meta/recipes-core/expat/expat_2.2.10.bb index a851e54b2a..0b3331981c 100644 --- a/meta/recipes-core/expat/expat_2.2.10.bb +++ b/meta/recipes-core/expat/expat_2.2.10.bb
@@ -17,6 +17,7 @@ SRC_URI = "https://github.com/libexpat/libexpat/releases/download/R_${VERSION_TA
17	file://CVE-2021-46143.patch \	17	file://CVE-2021-46143.patch \
18	file://CVE-2022-23852.patch \	18	file://CVE-2022-23852.patch \
19	file://CVE-2022-23990.patch \	19	file://CVE-2022-23990.patch \
		20	file://CVE-2022-25235.patch \
20	"	21	"
21		22
22	UPSTREAM_CHECK_URI = "https://github.com/libexpat/libexpat/releases/"	23	UPSTREAM_CHECK_URI = "https://github.com/libexpat/libexpat/releases/"