summaryrefslogtreecommitdiffstats
path: root/meta/recipes-core/expat/expat/CVE-2022-25235.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta/recipes-core/expat/expat/CVE-2022-25235.patch')
-rw-r--r--meta/recipes-core/expat/expat/CVE-2022-25235.patch283
1 files changed, 283 insertions, 0 deletions
diff --git a/meta/recipes-core/expat/expat/CVE-2022-25235.patch b/meta/recipes-core/expat/expat/CVE-2022-25235.patch
new file mode 100644
index 0000000000..be9182a5c1
--- /dev/null
+++ b/meta/recipes-core/expat/expat/CVE-2022-25235.patch
@@ -0,0 +1,283 @@
1From ee2a5b50e7d1940ba8745715b62ceb9efd3a96da Mon Sep 17 00:00:00 2001
2From: Sebastian Pipping <sebastian@pipping.org>
3Date: Tue, 8 Feb 2022 17:37:14 +0100
4Subject: [PATCH] lib: Drop unused macro UTF8_GET_NAMING
5
6Upstream-Status: Backport
7https://github.com/libexpat/libexpat/pull/562/commits
8
9CVE: CVE-2022-25235
10
11Signed-off-by: Steve Sakoman <steve@sakoman.com>
12
13---
14 expat/lib/xmltok.c | 5 -----
15 1 file changed, 5 deletions(-)
16
17diff --git a/lib/xmltok.c b/lib/xmltok.c
18index a72200e8..3bddf125 100644
19--- a/lib/xmltok.c
20+++ b/lib/xmltok.c
21@@ -95,11 +95,6 @@
22 + ((((byte)[1]) & 3) << 1) + ((((byte)[2]) >> 5) & 1)] \
23 & (1u << (((byte)[2]) & 0x1F)))
24
25-#define UTF8_GET_NAMING(pages, p, n) \
26- ((n) == 2 \
27- ? UTF8_GET_NAMING2(pages, (const unsigned char *)(p)) \
28- : ((n) == 3 ? UTF8_GET_NAMING3(pages, (const unsigned char *)(p)) : 0))
29-
30 /* Detection of invalid UTF-8 sequences is based on Table 3.1B
31 of Unicode 3.2: http://www.unicode.org/unicode/reports/tr28/
32 with the additional restriction of not allowing the Unicode
33From 3f0a0cb644438d4d8e3294cd0b1245d0edb0c6c6 Mon Sep 17 00:00:00 2001
34From: Sebastian Pipping <sebastian@pipping.org>
35Date: Tue, 8 Feb 2022 04:32:20 +0100
36Subject: [PATCH] lib: Add missing validation of encoding (CVE-2022-25235)
37
38---
39 expat/lib/xmltok_impl.c | 8 ++++++--
40 1 file changed, 6 insertions(+), 2 deletions(-)
41
42diff --git a/lib/xmltok_impl.c b/lib/xmltok_impl.c
43index 0430591b4..64a3b2c15 100644
44--- a/lib/xmltok_impl.c
45+++ b/lib/xmltok_impl.c
46@@ -61,7 +61,7 @@
47 case BT_LEAD##n: \
48 if (end - ptr < n) \
49 return XML_TOK_PARTIAL_CHAR; \
50- if (! IS_NAME_CHAR(enc, ptr, n)) { \
51+ if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NAME_CHAR(enc, ptr, n)) { \
52 *nextTokPtr = ptr; \
53 return XML_TOK_INVALID; \
54 } \
55@@ -90,7 +90,7 @@
56 case BT_LEAD##n: \
57 if (end - ptr < n) \
58 return XML_TOK_PARTIAL_CHAR; \
59- if (! IS_NMSTRT_CHAR(enc, ptr, n)) { \
60+ if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NMSTRT_CHAR(enc, ptr, n)) { \
61 *nextTokPtr = ptr; \
62 return XML_TOK_INVALID; \
63 } \
64@@ -1134,6 +1134,10 @@ PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
65 case BT_LEAD##n: \
66 if (end - ptr < n) \
67 return XML_TOK_PARTIAL_CHAR; \
68+ if (IS_INVALID_CHAR(enc, ptr, n)) { \
69+ *nextTokPtr = ptr; \
70+ return XML_TOK_INVALID; \
71+ } \
72 if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
73 ptr += n; \
74 tok = XML_TOK_NAME; \
75From c85a3025e7a1be086dc34e7559fbc543914d047f Mon Sep 17 00:00:00 2001
76From: Sebastian Pipping <sebastian@pipping.org>
77Date: Wed, 9 Feb 2022 01:00:38 +0100
78Subject: [PATCH] lib: Add comments to BT_LEAD* cases where encoding has
79 already been validated
80
81---
82 expat/lib/xmltok_impl.c | 10 +++++-----
83 1 file changed, 5 insertions(+), 5 deletions(-)
84
85diff --git a/lib/xmltok_impl.c b/lib/xmltok_impl.c
86index 64a3b2c1..84ff35f9 100644
87--- a/lib/xmltok_impl.c
88+++ b/lib/xmltok_impl.c
89@@ -1266,7 +1266,7 @@ PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end,
90 switch (BYTE_TYPE(enc, ptr)) {
91 # define LEAD_CASE(n) \
92 case BT_LEAD##n: \
93- ptr += n; \
94+ ptr += n; /* NOTE: The encoding has already been validated. */ \
95 break;
96 LEAD_CASE(2)
97 LEAD_CASE(3)
98@@ -1335,7 +1335,7 @@ PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end,
99 switch (BYTE_TYPE(enc, ptr)) {
100 # define LEAD_CASE(n) \
101 case BT_LEAD##n: \
102- ptr += n; \
103+ ptr += n; /* NOTE: The encoding has already been validated. */ \
104 break;
105 LEAD_CASE(2)
106 LEAD_CASE(3)
107@@ -1514,7 +1514,7 @@ PREFIX(getAtts)(const ENCODING *enc, const char *ptr, int attsMax,
108 state = inName; \
109 }
110 # define LEAD_CASE(n) \
111- case BT_LEAD##n: \
112+ case BT_LEAD##n: /* NOTE: The encoding has already been validated. */ \
113 START_NAME ptr += (n - MINBPC(enc)); \
114 break;
115 LEAD_CASE(2)
116@@ -1726,7 +1726,7 @@ PREFIX(nameLength)(const ENCODING *enc, const char *ptr) {
117 switch (BYTE_TYPE(enc, ptr)) {
118 # define LEAD_CASE(n) \
119 case BT_LEAD##n: \
120- ptr += n; \
121+ ptr += n; /* NOTE: The encoding has already been validated. */ \
122 break;
123 LEAD_CASE(2)
124 LEAD_CASE(3)
125@@ -1771,7 +1771,7 @@ PREFIX(updatePosition)(const ENCODING *enc, const char *ptr, const char *end,
126 switch (BYTE_TYPE(enc, ptr)) {
127 # define LEAD_CASE(n) \
128 case BT_LEAD##n: \
129- ptr += n; \
130+ ptr += n; /* NOTE: The encoding has already been validated. */ \
131 break;
132 LEAD_CASE(2)
133 LEAD_CASE(3)
134From 6a5510bc6b7efe743356296724e0b38300f05379 Mon Sep 17 00:00:00 2001
135From: Sebastian Pipping <sebastian@pipping.org>
136Date: Tue, 8 Feb 2022 04:06:21 +0100
137Subject: [PATCH] tests: Cover missing validation of encoding (CVE-2022-25235)
138
139---
140 expat/tests/runtests.c | 109 +++++++++++++++++++++++++++++++++++++++++
141 1 file changed, 109 insertions(+)
142
143diff --git a/tests/runtests.c b/tests/runtests.c
144index bc5344b1..9b155b82 100644
145--- a/tests/runtests.c
146+++ b/tests/runtests.c
147@@ -5998,6 +5998,105 @@ START_TEST(test_utf8_in_cdata_section_2) {
148 }
149 END_TEST
150
151+START_TEST(test_utf8_in_start_tags) {
152+ struct test_case {
153+ bool goodName;
154+ bool goodNameStart;
155+ const char *tagName;
156+ };
157+
158+ // The idea with the tests below is this:
159+ // We want to cover 1-, 2- and 3-byte sequences, 4-byte sequences
160+ // go to isNever and are hence not a concern.
161+ //
162+ // We start with a character that is a valid name character
163+ // (or even name-start character, see XML 1.0r4 spec) and then we flip
164+ // single bits at places where (1) the result leaves the UTF-8 encoding space
165+ // and (2) we stay in the same n-byte sequence family.
166+ //
167+ // The flipped bits are highlighted in angle brackets in comments,
168+ // e.g. "[<1>011 1001]" means we had [0011 1001] but we now flipped
169+ // the most significant bit to 1 to leave UTF-8 encoding space.
170+ struct test_case cases[] = {
171+ // 1-byte UTF-8: [0xxx xxxx]
172+ {true, true, "\x3A"}, // [0011 1010] = ASCII colon ':'
173+ {false, false, "\xBA"}, // [<1>011 1010]
174+ {true, false, "\x39"}, // [0011 1001] = ASCII nine '9'
175+ {false, false, "\xB9"}, // [<1>011 1001]
176+
177+ // 2-byte UTF-8: [110x xxxx] [10xx xxxx]
178+ {true, true, "\xDB\xA5"}, // [1101 1011] [1010 0101] =
179+ // Arabic small waw U+06E5
180+ {false, false, "\x9B\xA5"}, // [1<0>01 1011] [1010 0101]
181+ {false, false, "\xDB\x25"}, // [1101 1011] [<0>010 0101]
182+ {false, false, "\xDB\xE5"}, // [1101 1011] [1<1>10 0101]
183+ {true, false, "\xCC\x81"}, // [1100 1100] [1000 0001] =
184+ // combining char U+0301
185+ {false, false, "\x8C\x81"}, // [1<0>00 1100] [1000 0001]
186+ {false, false, "\xCC\x01"}, // [1100 1100] [<0>000 0001]
187+ {false, false, "\xCC\xC1"}, // [1100 1100] [1<1>00 0001]
188+
189+ // 3-byte UTF-8: [1110 xxxx] [10xx xxxx] [10xxxxxx]
190+ {true, true, "\xE0\xA4\x85"}, // [1110 0000] [1010 0100] [1000 0101] =
191+ // Devanagari Letter A U+0905
192+ {false, false, "\xA0\xA4\x85"}, // [1<0>10 0000] [1010 0100] [1000 0101]
193+ {false, false, "\xE0\x24\x85"}, // [1110 0000] [<0>010 0100] [1000 0101]
194+ {false, false, "\xE0\xE4\x85"}, // [1110 0000] [1<1>10 0100] [1000 0101]
195+ {false, false, "\xE0\xA4\x05"}, // [1110 0000] [1010 0100] [<0>000 0101]
196+ {false, false, "\xE0\xA4\xC5"}, // [1110 0000] [1010 0100] [1<1>00 0101]
197+ {true, false, "\xE0\xA4\x81"}, // [1110 0000] [1010 0100] [1000 0001] =
198+ // combining char U+0901
199+ {false, false, "\xA0\xA4\x81"}, // [1<0>10 0000] [1010 0100] [1000 0001]
200+ {false, false, "\xE0\x24\x81"}, // [1110 0000] [<0>010 0100] [1000 0001]
201+ {false, false, "\xE0\xE4\x81"}, // [1110 0000] [1<1>10 0100] [1000 0001]
202+ {false, false, "\xE0\xA4\x01"}, // [1110 0000] [1010 0100] [<0>000 0001]
203+ {false, false, "\xE0\xA4\xC1"}, // [1110 0000] [1010 0100] [1<1>00 0001]
204+ };
205+ const bool atNameStart[] = {true, false};
206+
207+ size_t i = 0;
208+ char doc[1024];
209+ size_t failCount = 0;
210+
211+ for (; i < sizeof(cases) / sizeof(cases[0]); i++) {
212+ size_t j = 0;
213+ for (; j < sizeof(atNameStart) / sizeof(atNameStart[0]); j++) {
214+ const bool expectedSuccess
215+ = atNameStart[j] ? cases[i].goodNameStart : cases[i].goodName;
216+ sprintf(doc, "<%s%s><!--", atNameStart[j] ? "" : "a", cases[i].tagName);
217+ XML_Parser parser = XML_ParserCreate(NULL);
218+
219+ const enum XML_Status status
220+ = XML_Parse(parser, doc, (int)strlen(doc), /*isFinal=*/XML_FALSE);
221+
222+ bool success = true;
223+ if ((status == XML_STATUS_OK) != expectedSuccess) {
224+ success = false;
225+ }
226+ if ((status == XML_STATUS_ERROR)
227+ && (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)) {
228+ success = false;
229+ }
230+
231+ if (! success) {
232+ fprintf(
233+ stderr,
234+ "FAIL case %2u (%sat name start, %u-byte sequence, error code %d)\n",
235+ (unsigned)i + 1u, atNameStart[j] ? " " : "not ",
236+ (unsigned)strlen(cases[i].tagName), XML_GetErrorCode(parser));
237+ failCount++;
238+ }
239+
240+ XML_ParserFree(parser);
241+ }
242+ }
243+
244+ if (failCount > 0) {
245+ fail("UTF-8 regression detected");
246+ }
247+}
248+END_TEST
249+
250 /* Test trailing spaces in elements are accepted */
251 static void XMLCALL
252 record_element_end_handler(void *userData, const XML_Char *name) {
253@@ -6175,6 +6274,14 @@ START_TEST(test_bad_doctype) {
254 }
255 END_TEST
256
257+START_TEST(test_bad_doctype_utf8) {
258+ const char *text = "<!DOCTYPE \xDB\x25"
259+ "doc><doc/>"; // [1101 1011] [<0>010 0101]
260+ expect_failure(text, XML_ERROR_INVALID_TOKEN,
261+ "Invalid UTF-8 in DOCTYPE not faulted");
262+}
263+END_TEST
264+
265 START_TEST(test_bad_doctype_utf16) {
266 const char text[] =
267 /* <!DOCTYPE doc [ \x06f2 ]><doc/>
268@@ -11870,6 +11977,7 @@ make_suite(void) {
269 tcase_add_test(tc_basic, test_ext_entity_utf8_non_bom);
270 tcase_add_test(tc_basic, test_utf8_in_cdata_section);
271 tcase_add_test(tc_basic, test_utf8_in_cdata_section_2);
272+ tcase_add_test(tc_basic, test_utf8_in_start_tags);
273 tcase_add_test(tc_basic, test_trailing_spaces_in_elements);
274 tcase_add_test(tc_basic, test_utf16_attribute);
275 tcase_add_test(tc_basic, test_utf16_second_attr);
276@@ -11878,6 +11986,7 @@ make_suite(void) {
277 tcase_add_test(tc_basic, test_bad_attr_desc_keyword);
278 tcase_add_test(tc_basic, test_bad_attr_desc_keyword_utf16);
279 tcase_add_test(tc_basic, test_bad_doctype);
280+ tcase_add_test(tc_basic, test_bad_doctype_utf8);
281 tcase_add_test(tc_basic, test_bad_doctype_utf16);
282 tcase_add_test(tc_basic, test_bad_doctype_plus);
283 tcase_add_test(tc_basic, test_bad_doctype_star);