From ecd456ab88d379514b116ef9293318b74e5ed3ee Mon Sep 17 00:00:00 2001 From: Martin Blech <78768+martinblech@users.noreply.github.com> Date: Thu, 4 Sep 2025 17:25:39 -0700 Subject: [PATCH] Prevent XML injection: reject '<'/'>' in element/attr names (incl. @xmlns) * Add tests for tag names, attribute names, and @xmlns prefixes; confirm attr values are escaped. CVE: CVE-2025-9375 Upstream-Status: Backport https://github.com/martinblech/xmltodict/commit/ecd456ab88d379514b116ef9293318b74e5ed3ee https://git.launchpad.net/ubuntu/+source/python-xmltodict/commit/?id=e8110a20e00d80db31d5fc9f8f4577328385d6b6 Signed-off-by: Saravanan --- tests/test_dicttoxml.py | 32 ++++++++++++++++++++++++++++++++ xmltodict.py | 20 +++++++++++++++++++- 2 files changed, 51 insertions(+), 1 deletion(-) Index: python-xmltodict-0.13.0/tests/test_dicttoxml.py =================================================================== --- python-xmltodict-0.13.0.orig/tests/test_dicttoxml.py +++ python-xmltodict-0.13.0/tests/test_dicttoxml.py @@ -213,3 +213,35 @@ xmlns:b="http://b.com/"> expected_xml = '\nfalse' xml = unparse(dict(x=False)) self.assertEqual(xml, expected_xml) + + def test_rejects_tag_name_with_angle_brackets(self): + # Minimal guard: disallow '<' or '>' to prevent breaking tag context + with self.assertRaises(ValueError): + unparse({"m>contentcontent2", "#text": "x"}}, full_document=False) + # The generated XML should contain escaped '<' and '>' within the attribute value + self.assertIn('attr="1<middle>2"', xml) Index: python-xmltodict-0.13.0/xmltodict.py =================================================================== --- python-xmltodict-0.13.0.orig/xmltodict.py +++ python-xmltodict-0.13.0/xmltodict.py @@ -379,6 +379,14 @@ def parse(xml_input, encoding=None, expa return handler.item +def _has_angle_brackets(value): + """Return True if value (a str) contains '<' or '>'. + + Non-string values return False. Uses fast substring checks implemented in C. + """ + return isinstance(value, str) and ("<" in value or ">" in value) + + def _process_namespace(name, namespaces, ns_sep=':', attr_prefix='@'): if not namespaces: return name @@ -412,6 +420,9 @@ def _emit(key, value, content_handler, if result is None: return key, value = result + # Minimal validation to avoid breaking out of tag context + if _has_angle_brackets(key): + raise ValueError('Invalid element name: "<" or ">" not allowed') if (not hasattr(value, '__iter__') or isinstance(value, _basestring) or isinstance(value, dict)): @@ -445,12 +456,19 @@ def _emit(key, value, content_handler, attr_prefix) if ik == '@xmlns' and isinstance(iv, dict): for k, v in iv.items(): + if _has_angle_brackets(k): + raise ValueError( + 'Invalid attribute name: "<" or ">" not allowed' + ) attr = 'xmlns{}'.format(':{}'.format(k) if k else '') attrs[attr] = _unicode(v) continue if not isinstance(iv, _unicode): iv = _unicode(iv) - attrs[ik[len(attr_prefix):]] = iv + attr_name = ik[len(attr_prefix) :] + if _has_angle_brackets(attr_name): + raise ValueError('Invalid attribute name: "<" or ">" not allowed') + attrs[attr_name] = iv continue children.append((ik, iv)) if pretty: