diff options
Diffstat (limited to 'bitbake/lib/bs4/tests/test_soup.py')
-rw-r--r-- | bitbake/lib/bs4/tests/test_soup.py | 479 |
1 files changed, 0 insertions, 479 deletions
diff --git a/bitbake/lib/bs4/tests/test_soup.py b/bitbake/lib/bs4/tests/test_soup.py deleted file mode 100644 index 6ad3cb3765..0000000000 --- a/bitbake/lib/bs4/tests/test_soup.py +++ /dev/null | |||
@@ -1,479 +0,0 @@ | |||
1 | # -*- coding: utf-8 -*- | ||
2 | """Tests of Beautiful Soup as a whole.""" | ||
3 | |||
4 | import logging | ||
5 | import unittest | ||
6 | import sys | ||
7 | import tempfile | ||
8 | |||
9 | from bs4 import BeautifulSoup | ||
10 | from bs4.element import ( | ||
11 | CharsetMetaAttributeValue, | ||
12 | ContentMetaAttributeValue, | ||
13 | SoupStrainer, | ||
14 | NamespacedAttribute, | ||
15 | ) | ||
16 | import bs4.dammit | ||
17 | from bs4.dammit import ( | ||
18 | EntitySubstitution, | ||
19 | UnicodeDammit, | ||
20 | EncodingDetector, | ||
21 | ) | ||
22 | from bs4.testing import ( | ||
23 | SoupTest, | ||
24 | skipIf, | ||
25 | ) | ||
26 | import warnings | ||
27 | |||
28 | try: | ||
29 | from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML | ||
30 | LXML_PRESENT = True | ||
31 | except ImportError as e: | ||
32 | LXML_PRESENT = False | ||
33 | |||
34 | PYTHON_2_PRE_2_7 = (sys.version_info < (2,7)) | ||
35 | PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2)) | ||
36 | |||
37 | class TestConstructor(SoupTest): | ||
38 | |||
39 | def test_short_unicode_input(self): | ||
40 | data = "<h1>éé</h1>" | ||
41 | soup = self.soup(data) | ||
42 | self.assertEqual("éé", soup.h1.string) | ||
43 | |||
44 | def test_embedded_null(self): | ||
45 | data = "<h1>foo\0bar</h1>" | ||
46 | soup = self.soup(data) | ||
47 | self.assertEqual("foo\0bar", soup.h1.string) | ||
48 | |||
49 | def test_exclude_encodings(self): | ||
50 | utf8_data = "Räksmörgås".encode("utf-8") | ||
51 | soup = self.soup(utf8_data, exclude_encodings=["utf-8"]) | ||
52 | self.assertEqual("windows-1252", soup.original_encoding) | ||
53 | |||
54 | |||
55 | class TestWarnings(SoupTest): | ||
56 | |||
57 | def _no_parser_specified(self, s, is_there=True): | ||
58 | v = s.startswith(BeautifulSoup.NO_PARSER_SPECIFIED_WARNING[:80]) | ||
59 | self.assertTrue(v) | ||
60 | |||
61 | def test_warning_if_no_parser_specified(self): | ||
62 | with warnings.catch_warnings(record=True) as w: | ||
63 | soup = self.soup("<a><b></b></a>") | ||
64 | msg = str(w[0].message) | ||
65 | self._assert_no_parser_specified(msg) | ||
66 | |||
67 | def test_warning_if_parser_specified_too_vague(self): | ||
68 | with warnings.catch_warnings(record=True) as w: | ||
69 | soup = self.soup("<a><b></b></a>", "html") | ||
70 | msg = str(w[0].message) | ||
71 | self._assert_no_parser_specified(msg) | ||
72 | |||
73 | def test_no_warning_if_explicit_parser_specified(self): | ||
74 | with warnings.catch_warnings(record=True) as w: | ||
75 | soup = self.soup("<a><b></b></a>", "html.parser") | ||
76 | self.assertEqual([], w) | ||
77 | |||
78 | def test_parseOnlyThese_renamed_to_parse_only(self): | ||
79 | with warnings.catch_warnings(record=True) as w: | ||
80 | soup = self.soup("<a><b></b></a>", parseOnlyThese=SoupStrainer("b")) | ||
81 | msg = str(w[0].message) | ||
82 | self.assertTrue("parseOnlyThese" in msg) | ||
83 | self.assertTrue("parse_only" in msg) | ||
84 | self.assertEqual(b"<b></b>", soup.encode()) | ||
85 | |||
86 | def test_fromEncoding_renamed_to_from_encoding(self): | ||
87 | with warnings.catch_warnings(record=True) as w: | ||
88 | utf8 = b"\xc3\xa9" | ||
89 | soup = self.soup(utf8, fromEncoding="utf8") | ||
90 | msg = str(w[0].message) | ||
91 | self.assertTrue("fromEncoding" in msg) | ||
92 | self.assertTrue("from_encoding" in msg) | ||
93 | self.assertEqual("utf8", soup.original_encoding) | ||
94 | |||
95 | def test_unrecognized_keyword_argument(self): | ||
96 | self.assertRaises( | ||
97 | TypeError, self.soup, "<a>", no_such_argument=True) | ||
98 | |||
99 | class TestWarnings(SoupTest): | ||
100 | |||
101 | def test_disk_file_warning(self): | ||
102 | filehandle = tempfile.NamedTemporaryFile() | ||
103 | filename = filehandle.name | ||
104 | try: | ||
105 | with warnings.catch_warnings(record=True) as w: | ||
106 | soup = self.soup(filename) | ||
107 | msg = str(w[0].message) | ||
108 | self.assertTrue("looks like a filename" in msg) | ||
109 | finally: | ||
110 | filehandle.close() | ||
111 | |||
112 | # The file no longer exists, so Beautiful Soup will no longer issue the warning. | ||
113 | with warnings.catch_warnings(record=True) as w: | ||
114 | soup = self.soup(filename) | ||
115 | self.assertEqual(0, len(w)) | ||
116 | |||
117 | def test_url_warning(self): | ||
118 | with warnings.catch_warnings(record=True) as w: | ||
119 | soup = self.soup("http://www.crummy.com/") | ||
120 | msg = str(w[0].message) | ||
121 | self.assertTrue("looks like a URL" in msg) | ||
122 | |||
123 | with warnings.catch_warnings(record=True) as w: | ||
124 | soup = self.soup("http://www.crummy.com/ is great") | ||
125 | self.assertEqual(0, len(w)) | ||
126 | |||
127 | class TestSelectiveParsing(SoupTest): | ||
128 | |||
129 | def test_parse_with_soupstrainer(self): | ||
130 | markup = "No<b>Yes</b><a>No<b>Yes <c>Yes</c></b>" | ||
131 | strainer = SoupStrainer("b") | ||
132 | soup = self.soup(markup, parse_only=strainer) | ||
133 | self.assertEqual(soup.encode(), b"<b>Yes</b><b>Yes <c>Yes</c></b>") | ||
134 | |||
135 | |||
136 | class TestEntitySubstitution(unittest.TestCase): | ||
137 | """Standalone tests of the EntitySubstitution class.""" | ||
138 | def setUp(self): | ||
139 | self.sub = EntitySubstitution | ||
140 | |||
141 | def test_simple_html_substitution(self): | ||
142 | # Unicode characters corresponding to named HTML entites | ||
143 | # are substituted, and no others. | ||
144 | s = "foo\u2200\N{SNOWMAN}\u00f5bar" | ||
145 | self.assertEqual(self.sub.substitute_html(s), | ||
146 | "foo∀\N{SNOWMAN}õbar") | ||
147 | |||
148 | def test_smart_quote_substitution(self): | ||
149 | # MS smart quotes are a common source of frustration, so we | ||
150 | # give them a special test. | ||
151 | quotes = b"\x91\x92foo\x93\x94" | ||
152 | dammit = UnicodeDammit(quotes) | ||
153 | self.assertEqual(self.sub.substitute_html(dammit.markup), | ||
154 | "‘’foo“”") | ||
155 | |||
156 | def test_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_false(self): | ||
157 | s = 'Welcome to "my bar"' | ||
158 | self.assertEqual(self.sub.substitute_xml(s, False), s) | ||
159 | |||
160 | def test_xml_attribute_quoting_normally_uses_double_quotes(self): | ||
161 | self.assertEqual(self.sub.substitute_xml("Welcome", True), | ||
162 | '"Welcome"') | ||
163 | self.assertEqual(self.sub.substitute_xml("Bob's Bar", True), | ||
164 | '"Bob\'s Bar"') | ||
165 | |||
166 | def test_xml_attribute_quoting_uses_single_quotes_when_value_contains_double_quotes(self): | ||
167 | s = 'Welcome to "my bar"' | ||
168 | self.assertEqual(self.sub.substitute_xml(s, True), | ||
169 | "'Welcome to \"my bar\"'") | ||
170 | |||
171 | def test_xml_attribute_quoting_escapes_single_quotes_when_value_contains_both_single_and_double_quotes(self): | ||
172 | s = 'Welcome to "Bob\'s Bar"' | ||
173 | self.assertEqual( | ||
174 | self.sub.substitute_xml(s, True), | ||
175 | '"Welcome to "Bob\'s Bar""') | ||
176 | |||
177 | def test_xml_quotes_arent_escaped_when_value_is_not_being_quoted(self): | ||
178 | quoted = 'Welcome to "Bob\'s Bar"' | ||
179 | self.assertEqual(self.sub.substitute_xml(quoted), quoted) | ||
180 | |||
181 | def test_xml_quoting_handles_angle_brackets(self): | ||
182 | self.assertEqual( | ||
183 | self.sub.substitute_xml("foo<bar>"), | ||
184 | "foo<bar>") | ||
185 | |||
186 | def test_xml_quoting_handles_ampersands(self): | ||
187 | self.assertEqual(self.sub.substitute_xml("AT&T"), "AT&T") | ||
188 | |||
189 | def test_xml_quoting_including_ampersands_when_they_are_part_of_an_entity(self): | ||
190 | self.assertEqual( | ||
191 | self.sub.substitute_xml("ÁT&T"), | ||
192 | "&Aacute;T&T") | ||
193 | |||
194 | def test_xml_quoting_ignoring_ampersands_when_they_are_part_of_an_entity(self): | ||
195 | self.assertEqual( | ||
196 | self.sub.substitute_xml_containing_entities("ÁT&T"), | ||
197 | "ÁT&T") | ||
198 | |||
199 | def test_quotes_not_html_substituted(self): | ||
200 | """There's no need to do this except inside attribute values.""" | ||
201 | text = 'Bob\'s "bar"' | ||
202 | self.assertEqual(self.sub.substitute_html(text), text) | ||
203 | |||
204 | |||
205 | class TestEncodingConversion(SoupTest): | ||
206 | # Test Beautiful Soup's ability to decode and encode from various | ||
207 | # encodings. | ||
208 | |||
209 | def setUp(self): | ||
210 | super(TestEncodingConversion, self).setUp() | ||
211 | self.unicode_data = '<html><head><meta charset="utf-8"/></head><body><foo>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</foo></body></html>' | ||
212 | self.utf8_data = self.unicode_data.encode("utf-8") | ||
213 | # Just so you know what it looks like. | ||
214 | self.assertEqual( | ||
215 | self.utf8_data, | ||
216 | b'<html><head><meta charset="utf-8"/></head><body><foo>Sacr\xc3\xa9 bleu!</foo></body></html>') | ||
217 | |||
218 | def test_ascii_in_unicode_out(self): | ||
219 | # ASCII input is converted to Unicode. The original_encoding | ||
220 | # attribute is set to 'utf-8', a superset of ASCII. | ||
221 | chardet = bs4.dammit.chardet_dammit | ||
222 | logging.disable(logging.WARNING) | ||
223 | try: | ||
224 | def noop(str): | ||
225 | return None | ||
226 | # Disable chardet, which will realize that the ASCII is ASCII. | ||
227 | bs4.dammit.chardet_dammit = noop | ||
228 | ascii = b"<foo>a</foo>" | ||
229 | soup_from_ascii = self.soup(ascii) | ||
230 | unicode_output = soup_from_ascii.decode() | ||
231 | self.assertTrue(isinstance(unicode_output, str)) | ||
232 | self.assertEqual(unicode_output, self.document_for(ascii.decode())) | ||
233 | self.assertEqual(soup_from_ascii.original_encoding.lower(), "utf-8") | ||
234 | finally: | ||
235 | logging.disable(logging.NOTSET) | ||
236 | bs4.dammit.chardet_dammit = chardet | ||
237 | |||
238 | def test_unicode_in_unicode_out(self): | ||
239 | # Unicode input is left alone. The original_encoding attribute | ||
240 | # is not set. | ||
241 | soup_from_unicode = self.soup(self.unicode_data) | ||
242 | self.assertEqual(soup_from_unicode.decode(), self.unicode_data) | ||
243 | self.assertEqual(soup_from_unicode.foo.string, 'Sacr\xe9 bleu!') | ||
244 | self.assertEqual(soup_from_unicode.original_encoding, None) | ||
245 | |||
246 | def test_utf8_in_unicode_out(self): | ||
247 | # UTF-8 input is converted to Unicode. The original_encoding | ||
248 | # attribute is set. | ||
249 | soup_from_utf8 = self.soup(self.utf8_data) | ||
250 | self.assertEqual(soup_from_utf8.decode(), self.unicode_data) | ||
251 | self.assertEqual(soup_from_utf8.foo.string, 'Sacr\xe9 bleu!') | ||
252 | |||
253 | def test_utf8_out(self): | ||
254 | # The internal data structures can be encoded as UTF-8. | ||
255 | soup_from_unicode = self.soup(self.unicode_data) | ||
256 | self.assertEqual(soup_from_unicode.encode('utf-8'), self.utf8_data) | ||
257 | |||
258 | @skipIf( | ||
259 | PYTHON_2_PRE_2_7 or PYTHON_3_PRE_3_2, | ||
260 | "Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.") | ||
261 | def test_attribute_name_containing_unicode_characters(self): | ||
262 | markup = '<div><a \N{SNOWMAN}="snowman"></a></div>' | ||
263 | self.assertEqual(self.soup(markup).div.encode("utf8"), markup.encode("utf8")) | ||
264 | |||
265 | class TestUnicodeDammit(unittest.TestCase): | ||
266 | """Standalone tests of UnicodeDammit.""" | ||
267 | |||
268 | def test_unicode_input(self): | ||
269 | markup = "I'm already Unicode! \N{SNOWMAN}" | ||
270 | dammit = UnicodeDammit(markup) | ||
271 | self.assertEqual(dammit.unicode_markup, markup) | ||
272 | |||
273 | def test_smart_quotes_to_unicode(self): | ||
274 | markup = b"<foo>\x91\x92\x93\x94</foo>" | ||
275 | dammit = UnicodeDammit(markup) | ||
276 | self.assertEqual( | ||
277 | dammit.unicode_markup, "<foo>\u2018\u2019\u201c\u201d</foo>") | ||
278 | |||
279 | def test_smart_quotes_to_xml_entities(self): | ||
280 | markup = b"<foo>\x91\x92\x93\x94</foo>" | ||
281 | dammit = UnicodeDammit(markup, smart_quotes_to="xml") | ||
282 | self.assertEqual( | ||
283 | dammit.unicode_markup, "<foo>‘’“”</foo>") | ||
284 | |||
285 | def test_smart_quotes_to_html_entities(self): | ||
286 | markup = b"<foo>\x91\x92\x93\x94</foo>" | ||
287 | dammit = UnicodeDammit(markup, smart_quotes_to="html") | ||
288 | self.assertEqual( | ||
289 | dammit.unicode_markup, "<foo>‘’“”</foo>") | ||
290 | |||
291 | def test_smart_quotes_to_ascii(self): | ||
292 | markup = b"<foo>\x91\x92\x93\x94</foo>" | ||
293 | dammit = UnicodeDammit(markup, smart_quotes_to="ascii") | ||
294 | self.assertEqual( | ||
295 | dammit.unicode_markup, """<foo>''""</foo>""") | ||
296 | |||
297 | def test_detect_utf8(self): | ||
298 | utf8 = b"Sacr\xc3\xa9 bleu! \xe2\x98\x83" | ||
299 | dammit = UnicodeDammit(utf8) | ||
300 | self.assertEqual(dammit.original_encoding.lower(), 'utf-8') | ||
301 | self.assertEqual(dammit.unicode_markup, 'Sacr\xe9 bleu! \N{SNOWMAN}') | ||
302 | |||
303 | |||
304 | def test_convert_hebrew(self): | ||
305 | hebrew = b"\xed\xe5\xec\xf9" | ||
306 | dammit = UnicodeDammit(hebrew, ["iso-8859-8"]) | ||
307 | self.assertEqual(dammit.original_encoding.lower(), 'iso-8859-8') | ||
308 | self.assertEqual(dammit.unicode_markup, '\u05dd\u05d5\u05dc\u05e9') | ||
309 | |||
310 | def test_dont_see_smart_quotes_where_there_are_none(self): | ||
311 | utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch" | ||
312 | dammit = UnicodeDammit(utf_8) | ||
313 | self.assertEqual(dammit.original_encoding.lower(), 'utf-8') | ||
314 | self.assertEqual(dammit.unicode_markup.encode("utf-8"), utf_8) | ||
315 | |||
316 | def test_ignore_inappropriate_codecs(self): | ||
317 | utf8_data = "Räksmörgås".encode("utf-8") | ||
318 | dammit = UnicodeDammit(utf8_data, ["iso-8859-8"]) | ||
319 | self.assertEqual(dammit.original_encoding.lower(), 'utf-8') | ||
320 | |||
321 | def test_ignore_invalid_codecs(self): | ||
322 | utf8_data = "Räksmörgås".encode("utf-8") | ||
323 | for bad_encoding in ['.utf8', '...', 'utF---16.!']: | ||
324 | dammit = UnicodeDammit(utf8_data, [bad_encoding]) | ||
325 | self.assertEqual(dammit.original_encoding.lower(), 'utf-8') | ||
326 | |||
327 | def test_exclude_encodings(self): | ||
328 | # This is UTF-8. | ||
329 | utf8_data = "Räksmörgås".encode("utf-8") | ||
330 | |||
331 | # But if we exclude UTF-8 from consideration, the guess is | ||
332 | # Windows-1252. | ||
333 | dammit = UnicodeDammit(utf8_data, exclude_encodings=["utf-8"]) | ||
334 | self.assertEqual(dammit.original_encoding.lower(), 'windows-1252') | ||
335 | |||
336 | # And if we exclude that, there is no valid guess at all. | ||
337 | dammit = UnicodeDammit( | ||
338 | utf8_data, exclude_encodings=["utf-8", "windows-1252"]) | ||
339 | self.assertEqual(dammit.original_encoding, None) | ||
340 | |||
341 | def test_encoding_detector_replaces_junk_in_encoding_name_with_replacement_character(self): | ||
342 | detected = EncodingDetector( | ||
343 | b'<?xml version="1.0" encoding="UTF-\xdb" ?>') | ||
344 | encodings = list(detected.encodings) | ||
345 | assert 'utf-\N{REPLACEMENT CHARACTER}' in encodings | ||
346 | |||
347 | def test_detect_html5_style_meta_tag(self): | ||
348 | |||
349 | for data in ( | ||
350 | b'<html><meta charset="euc-jp" /></html>', | ||
351 | b"<html><meta charset='euc-jp' /></html>", | ||
352 | b"<html><meta charset=euc-jp /></html>", | ||
353 | b"<html><meta charset=euc-jp/></html>"): | ||
354 | dammit = UnicodeDammit(data, is_html=True) | ||
355 | self.assertEqual( | ||
356 | "euc-jp", dammit.original_encoding) | ||
357 | |||
358 | def test_last_ditch_entity_replacement(self): | ||
359 | # This is a UTF-8 document that contains bytestrings | ||
360 | # completely incompatible with UTF-8 (ie. encoded with some other | ||
361 | # encoding). | ||
362 | # | ||
363 | # Since there is no consistent encoding for the document, | ||
364 | # Unicode, Dammit will eventually encode the document as UTF-8 | ||
365 | # and encode the incompatible characters as REPLACEMENT | ||
366 | # CHARACTER. | ||
367 | # | ||
368 | # If chardet is installed, it will detect that the document | ||
369 | # can be converted into ISO-8859-1 without errors. This happens | ||
370 | # to be the wrong encoding, but it is a consistent encoding, so the | ||
371 | # code we're testing here won't run. | ||
372 | # | ||
373 | # So we temporarily disable chardet if it's present. | ||
374 | doc = b"""\357\273\277<?xml version="1.0" encoding="UTF-8"?> | ||
375 | <html><b>\330\250\330\252\330\261</b> | ||
376 | <i>\310\322\321\220\312\321\355\344</i></html>""" | ||
377 | chardet = bs4.dammit.chardet_dammit | ||
378 | logging.disable(logging.WARNING) | ||
379 | try: | ||
380 | def noop(str): | ||
381 | return None | ||
382 | bs4.dammit.chardet_dammit = noop | ||
383 | dammit = UnicodeDammit(doc) | ||
384 | self.assertEqual(True, dammit.contains_replacement_characters) | ||
385 | self.assertTrue("\ufffd" in dammit.unicode_markup) | ||
386 | |||
387 | soup = BeautifulSoup(doc, "html.parser") | ||
388 | self.assertTrue(soup.contains_replacement_characters) | ||
389 | finally: | ||
390 | logging.disable(logging.NOTSET) | ||
391 | bs4.dammit.chardet_dammit = chardet | ||
392 | |||
393 | def test_byte_order_mark_removed(self): | ||
394 | # A document written in UTF-16LE will have its byte order marker stripped. | ||
395 | data = b'\xff\xfe<\x00a\x00>\x00\xe1\x00\xe9\x00<\x00/\x00a\x00>\x00' | ||
396 | dammit = UnicodeDammit(data) | ||
397 | self.assertEqual("<a>áé</a>", dammit.unicode_markup) | ||
398 | self.assertEqual("utf-16le", dammit.original_encoding) | ||
399 | |||
400 | def test_detwingle(self): | ||
401 | # Here's a UTF8 document. | ||
402 | utf8 = ("\N{SNOWMAN}" * 3).encode("utf8") | ||
403 | |||
404 | # Here's a Windows-1252 document. | ||
405 | windows_1252 = ( | ||
406 | "\N{LEFT DOUBLE QUOTATION MARK}Hi, I like Windows!" | ||
407 | "\N{RIGHT DOUBLE QUOTATION MARK}").encode("windows_1252") | ||
408 | |||
409 | # Through some unholy alchemy, they've been stuck together. | ||
410 | doc = utf8 + windows_1252 + utf8 | ||
411 | |||
412 | # The document can't be turned into UTF-8: | ||
413 | self.assertRaises(UnicodeDecodeError, doc.decode, "utf8") | ||
414 | |||
415 | # Unicode, Dammit thinks the whole document is Windows-1252, | ||
416 | # and decodes it into "☃☃☃“Hi, I like Windows!”☃☃☃" | ||
417 | |||
418 | # But if we run it through fix_embedded_windows_1252, it's fixed: | ||
419 | |||
420 | fixed = UnicodeDammit.detwingle(doc) | ||
421 | self.assertEqual( | ||
422 | "☃☃☃“Hi, I like Windows!”☃☃☃", fixed.decode("utf8")) | ||
423 | |||
424 | def test_detwingle_ignores_multibyte_characters(self): | ||
425 | # Each of these characters has a UTF-8 representation ending | ||
426 | # in \x93. \x93 is a smart quote if interpreted as | ||
427 | # Windows-1252. But our code knows to skip over multibyte | ||
428 | # UTF-8 characters, so they'll survive the process unscathed. | ||
429 | for tricky_unicode_char in ( | ||
430 | "\N{LATIN SMALL LIGATURE OE}", # 2-byte char '\xc5\x93' | ||
431 | "\N{LATIN SUBSCRIPT SMALL LETTER X}", # 3-byte char '\xe2\x82\x93' | ||
432 | "\xf0\x90\x90\x93", # This is a CJK character, not sure which one. | ||
433 | ): | ||
434 | input = tricky_unicode_char.encode("utf8") | ||
435 | self.assertTrue(input.endswith(b'\x93')) | ||
436 | output = UnicodeDammit.detwingle(input) | ||
437 | self.assertEqual(output, input) | ||
438 | |||
439 | class TestNamedspacedAttribute(SoupTest): | ||
440 | |||
441 | def test_name_may_be_none(self): | ||
442 | a = NamespacedAttribute("xmlns", None) | ||
443 | self.assertEqual(a, "xmlns") | ||
444 | |||
445 | def test_attribute_is_equivalent_to_colon_separated_string(self): | ||
446 | a = NamespacedAttribute("a", "b") | ||
447 | self.assertEqual("a:b", a) | ||
448 | |||
449 | def test_attributes_are_equivalent_if_prefix_and_name_identical(self): | ||
450 | a = NamespacedAttribute("a", "b", "c") | ||
451 | b = NamespacedAttribute("a", "b", "c") | ||
452 | self.assertEqual(a, b) | ||
453 | |||
454 | # The actual namespace is not considered. | ||
455 | c = NamespacedAttribute("a", "b", None) | ||
456 | self.assertEqual(a, c) | ||
457 | |||
458 | # But name and prefix are important. | ||
459 | d = NamespacedAttribute("a", "z", "c") | ||
460 | self.assertNotEqual(a, d) | ||
461 | |||
462 | e = NamespacedAttribute("z", "b", "c") | ||
463 | self.assertNotEqual(a, e) | ||
464 | |||
465 | |||
466 | class TestAttributeValueWithCharsetSubstitution(unittest.TestCase): | ||
467 | |||
468 | def test_content_meta_attribute_value(self): | ||
469 | value = CharsetMetaAttributeValue("euc-jp") | ||
470 | self.assertEqual("euc-jp", value) | ||
471 | self.assertEqual("euc-jp", value.original_value) | ||
472 | self.assertEqual("utf8", value.encode("utf8")) | ||
473 | |||
474 | |||
475 | def test_content_meta_attribute_value(self): | ||
476 | value = ContentMetaAttributeValue("text/html; charset=euc-jp") | ||
477 | self.assertEqual("text/html; charset=euc-jp", value) | ||
478 | self.assertEqual("text/html; charset=euc-jp", value.original_value) | ||
479 | self.assertEqual("text/html; charset=utf8", value.encode("utf8")) | ||