Upstream-Status: Backport [https://github.com/libexpat/libexpat/commit/f178826b] CVE: CVE-2022-25236 The commit is a merge commit, and this patch is created by: $ git show -m -p --stat f178826b Remove changes for expat/Changes and reference.html which fail to be applied. Signed-off-by: Kai Kang commit f178826bb1e9c8ee23202f1be55ad4ac7b649e84 (from c99e0e7f2b15b48848038992ecbb4480f957cfe9) Merge: c99e0e7f 9579f7ea Author: Sebastian Pipping Date: Fri Mar 4 18:43:39 2022 +0100 Merge pull request #577 from libexpat/namesep lib: Relax fix to CVE-2022-25236 with regard to RFC 3986 URI characters (fixes #572) --- expat/Changes | 16 ++++++ expat/doc/reference.html | 8 +++ expat/lib/expat.h | 11 ++++ expat/lib/xmlparse.c | 139 ++++++++++++++++++++++++++++++++++++++++++++--- expat/tests/runtests.c | 8 ++- 5 files changed, 171 insertions(+), 11 deletions(-) diff --git a/lib/expat.h b/lib/expat.h index 5ab493f7..181fc960 100644 --- a/lib/expat.h +++ b/lib/expat.h @@ -239,6 +239,17 @@ XML_ParserCreate(const XML_Char *encoding); and the local part will be concatenated without any separator. It is a programming error to use the separator '\0' with namespace triplets (see XML_SetReturnNSTriplet). + If a namespace separator is chosen that can be part of a URI or + part of an XML name, splitting an expanded name back into its + 1, 2 or 3 original parts on application level in the element handler + may end up vulnerable, so these are advised against; sane choices for + a namespace separator are e.g. '\n' (line feed) and '|' (pipe). + + Note that Expat does not validate namespace URIs (beyond encoding) + against RFC 3986 today (and is not required to do so with regard to + the XML 1.0 namespaces specification) but it may start doing that + in future releases. Before that, an application using Expat must + be ready to receive namespace URIs containing non-URI characters. */ XMLPARSEAPI(XML_Parser) XML_ParserCreateNS(const XML_Char *encoding, XML_Char namespaceSeparator); diff --git a/lib/xmlparse.c b/lib/xmlparse.c index 59da19c8..6fe2cf1e 100644 --- a/lib/xmlparse.c +++ b/lib/xmlparse.c @@ -3705,6 +3705,117 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const char *attStr, return XML_ERROR_NONE; } +static XML_Bool +is_rfc3986_uri_char(XML_Char candidate) { + // For the RFC 3986 ANBF grammar see + // https://datatracker.ietf.org/doc/html/rfc3986#appendix-A + + switch (candidate) { + // From rule "ALPHA" (uppercase half) + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + + // From rule "ALPHA" (lowercase half) + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + + // From rule "DIGIT" + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + + // From rule "pct-encoded" + case '%': + + // From rule "unreserved" + case '-': + case '.': + case '_': + case '~': + + // From rule "gen-delims" + case ':': + case '/': + case '?': + case '#': + case '[': + case ']': + case '@': + + // From rule "sub-delims" + case '!': + case '$': + case '&': + case '\'': + case '(': + case ')': + case '*': + case '+': + case ',': + case ';': + case '=': + return XML_TRUE; + + default: + return XML_FALSE; + } +} + /* addBinding() overwrites the value of prefix->binding without checking. Therefore one must keep track of the old value outside of addBinding(). */ @@ -3763,14 +3874,26 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, && (len > xmlnsLen || uri[len] != xmlnsNamespace[len])) isXMLNS = XML_FALSE; - // NOTE: While Expat does not validate namespace URIs against RFC 3986, - // we have to at least make sure that the XML processor on top of - // Expat (that is splitting tag names by namespace separator into - // 2- or 3-tuples (uri-local or uri-local-prefix)) cannot be confused - // by an attacker putting additional namespace separator characters - // into namespace declarations. That would be ambiguous and not to - // be expected. - if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator)) { + // NOTE: While Expat does not validate namespace URIs against RFC 3986 + // today (and is not REQUIRED to do so with regard to the XML 1.0 + // namespaces specification) we have to at least make sure, that + // the application on top of Expat (that is likely splitting expanded + // element names ("qualified names") of form + // "[uri sep] local [sep prefix] '\0'" back into 1, 2 or 3 pieces + // in its element handler code) cannot be confused by an attacker + // putting additional namespace separator characters into namespace + // declarations. That would be ambiguous and not to be expected. + // + // While the HTML API docs of function XML_ParserCreateNS have been + // advising against use of a namespace separator character that can + // appear in a URI for >20 years now, some widespread applications + // are using URI characters (':' (colon) in particular) for a + // namespace separator, in practice. To keep these applications + // functional, we only reject namespaces URIs containing the + // application-chosen namespace separator if the chosen separator + // is a non-URI character with regard to RFC 3986. + if (parser->m_ns && (uri[len] == parser->m_namespaceSeparator) + && ! is_rfc3986_uri_char(uri[len])) { return XML_ERROR_SYNTAX; } } diff --git a/tests/runtests.c b/tests/runtests.c index 60da868e..712706c4 100644 --- a/tests/runtests.c +++ b/tests/runtests.c @@ -7406,16 +7406,18 @@ START_TEST(test_ns_separator_in_uri) { struct test_case { enum XML_Status expectedStatus; const char *doc; + XML_Char namesep; }; struct test_case cases[] = { - {XML_STATUS_OK, ""}, - {XML_STATUS_ERROR, ""}, + {XML_STATUS_OK, "", XCS('\n')}, + {XML_STATUS_ERROR, "", XCS('\n')}, + {XML_STATUS_OK, "", XCS(':')}, }; size_t i = 0; size_t failCount = 0; for (; i < sizeof(cases) / sizeof(cases[0]); i++) { - XML_Parser parser = XML_ParserCreateNS(NULL, '\n'); + XML_Parser parser = XML_ParserCreateNS(NULL, cases[i].namesep); XML_SetElementHandler(parser, dummy_start_element, dummy_end_element); if (XML_Parse(parser, cases[i].doc, (int)strlen(cases[i].doc), /*isFinal*/ XML_TRUE)