summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKai Kang <kai.kang@windriver.com>2022-03-11 21:58:34 +0800
committerRichard Purdie <richard.purdie@linuxfoundation.org>2022-03-22 22:18:51 +0000
commit37a86b19585fefbb83bc94572c5742c2a60c4490 (patch)
treed5da85ad5ca0f3637445a0f463722adc268fbcdc
parent02a3d2d460c8172251c783d52b00ca256d23f351 (diff)
downloadpoky-37a86b19585fefbb83bc94572c5742c2a60c4490.tar.gz
expat: fix CVE-2022-25235
Backport patch to fix CVE-2022-25235 for expat. CVE: CVE-2022-25235 (From OE-Core rev: 60dd7d2deeda838346f30b6f8de28dfac7efac0d) Signed-off-by: Kai Kang <kai.kang@windriver.com> Signed-off-by: Anuj Mittal <anuj.mittal@intel.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
-rw-r--r--meta/recipes-core/expat/expat/CVE-2022-25235.patch261
-rw-r--r--meta/recipes-core/expat/expat_2.2.10.bb1
2 files changed, 262 insertions, 0 deletions
diff --git a/meta/recipes-core/expat/expat/CVE-2022-25235.patch b/meta/recipes-core/expat/expat/CVE-2022-25235.patch
new file mode 100644
index 0000000000..9febeae609
--- /dev/null
+++ b/meta/recipes-core/expat/expat/CVE-2022-25235.patch
@@ -0,0 +1,261 @@
1Upstream-Status: Backport [https://github.com/libexpat/libexpat/commit/306b721]
2CVE: CVE-2022-25235
3
4The commit is a merge commit, and this patch is created by:
5
6$ git show -m -p --stat 306b72134f157bbfd1637b20a22cabf4acfa136a
7
8Remove modification for expat/Changes which fails to be applied.
9
10Signed-off-by: Kai Kang <kai.kang@windriver.com>
11
12commit 306b72134f157bbfd1637b20a22cabf4acfa136a (from 2cc97e875ef84da4bcf55156c83599116f7523b4)
13Merge: 2cc97e87 c16300f0
14Author: Sebastian Pipping <sebastian@pipping.org>
15Date: Fri Feb 18 20:12:32 2022 +0100
16
17 Merge pull request #562 from libexpat/utf8-security
18
19 [CVE-2022-25235] lib: Protect against malformed encoding (e.g. malformed UTF-8)
20---
21 expat/Changes | 7 ++++
22 expat/lib/xmltok.c | 5 ---
23 expat/lib/xmltok_impl.c | 18 ++++----
24 expat/tests/runtests.c | 109 ++++++++++++++++++++++++++++++++++++++++++++++++
25 4 files changed, 127 insertions(+), 12 deletions(-)
26
27diff --git a/lib/xmltok.c b/lib/xmltok.c
28index a72200e8..3bddf125 100644
29--- a/lib/xmltok.c
30+++ b/lib/xmltok.c
31@@ -98,11 +98,6 @@
32 + ((((byte)[1]) & 3) << 1) + ((((byte)[2]) >> 5) & 1)] \
33 & (1u << (((byte)[2]) & 0x1F)))
34
35-#define UTF8_GET_NAMING(pages, p, n) \
36- ((n) == 2 \
37- ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \
38- : ((n) == 3 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) : 0))
39-
40 /* Detection of invalid UTF-8 sequences is based on Table 3.1B
41 of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/
42 with the additional restriction of not allowing the Unicode
43diff --git a/lib/xmltok_impl.c b/lib/xmltok_impl.c
44index 0430591b..84ff35f9 100644
45--- a/lib/xmltok_impl.c
46+++ b/lib/xmltok_impl.c
47@@ -69,7 +69,7 @@
48 case BT_LEAD##n: \
49 if (end - ptr < n) \
50 return XML_TOK_PARTIAL_CHAR; \
51- if (! IS_NAME_CHAR(enc, ptr, n)) { \
52+ if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NAME_CHAR(enc, ptr, n)) { \
53 *nextTokPtr = ptr; \
54 return XML_TOK_INVALID; \
55 } \
56@@ -98,7 +98,7 @@
57 case BT_LEAD##n: \
58 if (end - ptr < n) \
59 return XML_TOK_PARTIAL_CHAR; \
60- if (! IS_NMSTRT_CHAR(enc, ptr, n)) { \
61+ if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NMSTRT_CHAR(enc, ptr, n)) { \
62 *nextTokPtr = ptr; \
63 return XML_TOK_INVALID; \
64 } \
65@@ -1142,6 +1142,10 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
66 case BT_LEAD##n: \
67 if (end - ptr < n) \
68 return XML_TOK_PARTIAL_CHAR; \
69+ if (IS_INVALID_CHAR(enc, ptr, n)) { \
70+ *nextTokPtr = ptr; \
71+ return XML_TOK_INVALID; \
72+ } \
73 if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
74 ptr += n; \
75 tok = XML_TOK_NAME; \
76@@ -1270,7 +1274,7 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end,
77 switch (BYTE_TYPE(enc, ptr)) {
78 # define LEAD_CASE(n) \
79 case BT_LEAD##n: \
80- ptr += n; \
81+ ptr += n; /* NOTE: The encoding has already been validated. */ \
82 break;
83 LEAD_CASE(2)
84 LEAD_CASE(3)
85@@ -1339,7 +1343,7 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end,
86 switch (BYTE_TYPE(enc, ptr)) {
87 # define LEAD_CASE(n) \
88 case BT_LEAD##n: \
89- ptr += n; \
90+ ptr += n; /* NOTE: The encoding has already been validated. */ \
91 break;
92 LEAD_CASE(2)
93 LEAD_CASE(3)
94@@ -1518,7 +1522,7 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr, int attsMax,
95 state = inName; \
96 }
97 # define LEAD_CASE(n) \
98- case BT_LEAD##n: \
99+ case BT_LEAD##n: /* NOTE: The encoding has already been validated. */ \
100 START_NAME ptr += (n - MINBPC(enc)); \
101 break;
102 LEAD_CASE(2)
103@@ -1730,7 +1734,7 @@ PREFIX(nameLength)(const ENCODING *enc, const char *ptr) {
104 switch (BYTE_TYPE(enc, ptr)) {
105 # define LEAD_CASE(n) \
106 case BT_LEAD##n: \
107- ptr += n; \
108+ ptr += n; /* NOTE: The encoding has already been validated. */ \
109 break;
110 LEAD_CASE(2)
111 LEAD_CASE(3)
112@@ -1775,7 +1779,7 @@ PREFIX(updatePosition)(const ENCODING *enc, const char *ptr, const char *end,
113 switch (BYTE_TYPE(enc, ptr)) {
114 # define LEAD_CASE(n) \
115 case BT_LEAD##n: \
116- ptr += n; \
117+ ptr += n; /* NOTE: The encoding has already been validated. */ \
118 pos->columnNumber++; \
119 break;
120 LEAD_CASE(2)
121diff --git a/tests/runtests.c b/tests/runtests.c
122index bc5344b1..9b155b82 100644
123--- a/tests/runtests.c
124+++ b/tests/runtests.c
125@@ -5998,6 +5998,105 @@ START_TEST(test_utf8_in_cdata_section_2) {
126 }
127 END_TEST
128
129+START_TEST(test_utf8_in_start_tags) {
130+ struct test_case {
131+ bool goodName;
132+ bool goodNameStart;
133+ const char *tagName;
134+ };
135+
136+ // The idea with the tests below is this:
137+ // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
138+ // go to isNever and are hence not a concern.
139+ //
140+ // We start with a character that is a valid name character
141+ // (or even name-start character, see XML 1.0r4 spec) and then we flip
142+ // single bits at places where (1) the result leaves the UTF-8 encoding space
143+ // and (2) we stay in the same n-byte sequence family.
144+ //
145+ // The flipped bits are highlighted in angle brackets in comments,
146+ // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
147+ // the most significant bit to 1 to leave UTF-8 encoding space.
148+ struct test_case cases[] = {
149+ // 1-byte UTF-8: [0xxx xxxx]
150+ {true, true, "\x3A"}, // [0011 1010] = ASCII colon ':'
151+ {false, false, "\xBA"}, // [<1>011 1010]
152+ {true, false, "\x39"}, // [0011 1001] = ASCII nine '9'
153+ {false, false, "\xB9"}, // [<1>011 1001]
154+
155+ // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
156+ {true, true, "\xDB\xA5"}, // [1101 1011] [1010 0101] =
157+ // Arabic small waw U+06E5
158+ {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
159+ {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
160+ {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
161+ {true, false, "\xCC\x81"}, // [1100 1100] [1000 0001] =
162+ // combining char U+0301
163+ {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
164+ {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
165+ {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
166+
167+ // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
168+ {true, true, "\xE0\xA4\x85"}, // [1110 0000] [1010 0100] [1000 0101] =
169+ // Devanagari Letter A U+0905
170+ {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
171+ {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
172+ {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
173+ {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
174+ {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
175+ {true, false, "\xE0\xA4\x81"}, // [1110 0000] [1010 0100] [1000 0001] =
176+ // combining char U+0901
177+ {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
178+ {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
179+ {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
180+ {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
181+ {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
182+ };
183+ const bool atNameStart[] = {true, false};
184+
185+ size_t i = 0;
186+ char doc[1024];
187+ size_t failCount = 0;
188+
189+ for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
190+ size_t j = 0;
191+ for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
192+ const bool expectedSuccess
193+ = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
194+ sprintf(doc, "<%s%s><!--", atNameStart[j] ? "" : "a", cases[i].tagName);
195+ XML_Parser parser = XML_ParserCreate(NULL);
196+
197+ const enum XML_Status status
198+ = XML_Parse(parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
199+
200+ bool success = true;
201+ if ((status == XML_STATUS_OK) != expectedSuccess) {
202+ success = false;
203+ }
204+ if ((status == XML_STATUS_ERROR)
205+ && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
206+ success = false;
207+ }
208+
209+ if (! success) {
210+ fprintf(
211+ stderr,
212+ "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
213+ (unsigned)i + 1u, atNameStart[j] ? " " : "not ",
214+ (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
215+ failCount++;
216+ }
217+
218+ XML_ParserFree(parser);
219+ }
220+ }
221+
222+ if (failCount > 0) {
223+ fail("UTF-8 regression detected");
224+ }
225+}
226+END_TEST
227+
228 /* Test trailing spaces in elements are accepted */
229 static void XMLCALL
230 record_element_end_handler(void *userData, const XML_Char *name) {
231@@ -6175,6 +6274,14 @@ START_TEST(test_bad_doctype) {
232 }
233 END_TEST
234
235+START_TEST(test_bad_doctype_utf8) {
236+ const char *text = "<!DOCTYPE \xDB\x25"
237+ "doc><doc/>"; // [1101 1011] [<0>010 0101]
238+ expect_failure(text, XML_ERROR_INVALID_TOKEN,
239+ "Invalid UTF-8 in DOCTYPE not faulted");
240+}
241+END_TEST
242+
243 START_TEST(test_bad_doctype_utf16) {
244 const char text[] =
245 /* <!DOCTYPE doc [ \x06f2 ]><doc/>
246@@ -11870,6 +11977,7 @@ make_suite(void) {
247 tcase_add_test(tc_basic, test_ext_entity_utf8_non_bom);
248 tcase_add_test(tc_basic, test_utf8_in_cdata_section);
249 tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
250+ tcase_add_test(tc_basic, test_utf8_in_start_tags);
251 tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
252 tcase_add_test(tc_basic, test_utf16_attribute);
253 tcase_add_test(tc_basic, test_utf16_second_attr);
254@@ -11878,6 +11986,7 @@ make_suite(void) {
255 tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
256 tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
257 tcase_add_test(tc_basic, test_bad_doctype);
258+ tcase_add_test(tc_basic, test_bad_doctype_utf8);
259 tcase_add_test(tc_basic, test_bad_doctype_utf16);
260 tcase_add_test(tc_basic, test_bad_doctype_plus);
261 tcase_add_test(tc_basic, test_bad_doctype_star);
diff --git a/meta/recipes-core/expat/expat_2.2.10.bb b/meta/recipes-core/expat/expat_2.2.10.bb
index a851e54b2a..0b3331981c 100644
--- a/meta/recipes-core/expat/expat_2.2.10.bb
+++ b/meta/recipes-core/expat/expat_2.2.10.bb
@@ -17,6 +17,7 @@ SRC_URI = "https://github.com/libexpat/libexpat/releases/download/R_${VERSION_TA
17 file://CVE-2021-46143.patch \ 17 file://CVE-2021-46143.patch \
18 file://CVE-2022-23852.patch \ 18 file://CVE-2022-23852.patch \
19 file://CVE-2022-23990.patch \ 19 file://CVE-2022-23990.patch \
20 file://CVE-2022-25235.patch \
20 " 21 "
21 22
22UPSTREAM_CHECK_URI = "https://github.com/libexpat/libexpat/releases/" 23UPSTREAM_CHECK_URI = "https://github.com/libexpat/libexpat/releases/"