1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
|
From f98c90f071228ed73df997807298e1df4f790c33 Mon Sep 17 00:00:00 2001
From: Martin Blech <78768+martinblech@users.noreply.github.com>
Date: Mon, 8 Sep 2025 11:18:33 -0700
Subject: [PATCH] Enhance unparse() XML name validation with stricter rules and
tests
Extend existing validation (previously only for "<" and ">") to also
reject element, attribute, and xmlns prefix names that are non-string,
start with "?" or "!", or contain "/", spaces, tabs, or newlines.
Update _emit and namespace handling to use _validate_name. Add tests
covering these new invalid name cases.
CVE: CVE-2025-9375
Upstream-Status: Backport
https://github.com/martinblech/xmltodict/commit/f98c90f071228ed73df997807298e1df4f790c33
https://git.launchpad.net/ubuntu/+source/python-xmltodict/commit/?id=e8110a20e00d80db31d5fc9f8f4577328385d6b6
Signed-off-by: Saravanan <saravanan.kadambathursubramaniyam@windriver.com
---
tests/test_dicttoxml.py | 60 +++++++++++++++++++++++++++++++++++++++++
xmltodict.py | 48 ++++++++++++++++++++++++++-------
2 files changed, 99 insertions(+), 9 deletions(-)
Index: python-xmltodict-0.13.0/tests/test_dicttoxml.py
===================================================================
--- python-xmltodict-0.13.0.orig/tests/test_dicttoxml.py
+++ python-xmltodict-0.13.0/tests/test_dicttoxml.py
@@ -245,3 +245,63 @@ xmlns:b="http://b.com/"><x a:attr="val">
xml = unparse({"a": {"@attr": "1<middle>2", "#text": "x"}}, full_document=False)
# The generated XML should contain escaped '<' and '>' within the attribute value
self.assertIn('attr="1<middle>2"', xml)
+
+ def test_rejects_tag_name_starting_with_question(self):
+ with self.assertRaises(ValueError):
+ unparse({"?pi": "data"}, full_document=False)
+
+ def test_rejects_tag_name_starting_with_bang(self):
+ with self.assertRaises(ValueError):
+ unparse({"!decl": "data"}, full_document=False)
+
+ def test_rejects_attribute_name_starting_with_question(self):
+ with self.assertRaises(ValueError):
+ unparse({"a": {"@?weird": "x"}}, full_document=False)
+
+ def test_rejects_attribute_name_starting_with_bang(self):
+ with self.assertRaises(ValueError):
+ unparse({"a": {"@!weird": "x"}}, full_document=False)
+
+ def test_rejects_xmlns_prefix_starting_with_question_or_bang(self):
+ with self.assertRaises(ValueError):
+ unparse({"a": {"@xmlns": {"?p": "http://e/"}}}, full_document=False)
+ with self.assertRaises(ValueError):
+ unparse({"a": {"@xmlns": {"!p": "http://e/"}}}, full_document=False)
+
+ def test_rejects_non_string_names(self):
+ class Weird:
+ def __str__(self):
+ return "bad>name"
+
+ # Non-string element key
+ with self.assertRaises(ValueError):
+ unparse({Weird(): "x"}, full_document=False)
+ # Non-string attribute key
+ with self.assertRaises(ValueError):
+ unparse({"a": {Weird(): "x"}}, full_document=False)
+
+ def test_rejects_tag_name_with_slash(self):
+ with self.assertRaises(ValueError):
+ unparse({"bad/name": "x"}, full_document=False)
+
+ def test_rejects_tag_name_with_whitespace(self):
+ for name in ["bad name", "bad\tname", "bad\nname"]:
+ with self.assertRaises(ValueError):
+ unparse({name: "x"}, full_document=False)
+
+ def test_rejects_attribute_name_with_slash(self):
+ with self.assertRaises(ValueError):
+ unparse({"a": {"@bad/name": "x"}}, full_document=False)
+
+ def test_rejects_attribute_name_with_whitespace(self):
+ for name in ["@bad name", "@bad\tname", "@bad\nname"]:
+ with self.assertRaises(ValueError):
+ unparse({"a": {name: "x"}}, full_document=False)
+
+ def test_rejects_xmlns_prefix_with_slash_or_whitespace(self):
+ # Slash
+ with self.assertRaises(ValueError):
+ unparse({"a": {"@xmlns": {"bad/prefix": "http://e/"}}}, full_document=False)
+ # Whitespace
+ with self.assertRaises(ValueError):
+ unparse({"a": {"@xmlns": {"bad prefix": "http://e/"}}}, full_document=False)
Index: python-xmltodict-0.13.0/xmltodict.py
===================================================================
--- python-xmltodict-0.13.0.orig/xmltodict.py
+++ python-xmltodict-0.13.0/xmltodict.py
@@ -387,7 +387,42 @@ def _has_angle_brackets(value):
return isinstance(value, str) and ("<" in value or ">" in value)
+def _has_invalid_name_chars(value):
+ """Return True if value (a str) contains any disallowed name characters.
+
+ Disallowed: '<', '>', '/', or any whitespace character.
+ Non-string values return False.
+ """
+ if not isinstance(value, str):
+ return False
+ if "<" in value or ">" in value or "/" in value:
+ return True
+ # Check for any whitespace (spaces, tabs, newlines, etc.)
+ return any(ch.isspace() for ch in value)
+
+
+def _validate_name(value, kind):
+ """Validate an element/attribute name for XML safety.
+
+ Raises ValueError with a specific reason when invalid.
+
+ kind: 'element' or 'attribute' (used in error messages)
+ """
+ if not isinstance(value, str):
+ raise ValueError(f"{kind} name must be a string")
+ if value.startswith("?") or value.startswith("!"):
+ raise ValueError(f'Invalid {kind} name: cannot start with "?" or "!"')
+ if "<" in value or ">" in value:
+ raise ValueError(f'Invalid {kind} name: "<" or ">" not allowed')
+ if "/" in value:
+ raise ValueError(f'Invalid {kind} name: "/" not allowed')
+ if any(ch.isspace() for ch in value):
+ raise ValueError(f"Invalid {kind} name: whitespace not allowed")
+
+
def _process_namespace(name, namespaces, ns_sep=':', attr_prefix='@'):
+ if not isinstance(name, str):
+ return name
if not namespaces:
return name
try:
@@ -421,8 +456,7 @@ def _emit(key, value, content_handler,
return
key, value = result
# Minimal validation to avoid breaking out of tag context
- if _has_angle_brackets(key):
- raise ValueError('Invalid element name: "<" or ">" not allowed')
+ _validate_name(key, "element")
if (not hasattr(value, '__iter__')
or isinstance(value, _basestring)
or isinstance(value, dict)):
@@ -451,23 +485,19 @@ def _emit(key, value, content_handler,
if ik == cdata_key:
cdata = iv
continue
- if ik.startswith(attr_prefix):
+ if isinstance(ik, str) and ik.startswith(attr_prefix):
ik = _process_namespace(ik, namespaces, namespace_separator,
attr_prefix)
if ik == '@xmlns' and isinstance(iv, dict):
for k, v in iv.items():
- if _has_angle_brackets(k):
- raise ValueError(
- 'Invalid attribute name: "<" or ">" not allowed'
- )
+ _validate_name(k, "attribute")
attr = 'xmlns{}'.format(':{}'.format(k) if k else '')
attrs[attr] = _unicode(v)
continue
if not isinstance(iv, _unicode):
iv = _unicode(iv)
attr_name = ik[len(attr_prefix) :]
- if _has_angle_brackets(attr_name):
- raise ValueError('Invalid attribute name: "<" or ">" not allowed')
+ _validate_name(attr_name, "attribute")
attrs[attr_name] = iv
continue
children.append((ik, iv))
|