summaryrefslogtreecommitdiffstats
path: root/bitbake/lib/bs4/testing.py
diff options
context:
space:
mode:
Diffstat (limited to 'bitbake/lib/bs4/testing.py')
-rw-r--r--bitbake/lib/bs4/testing.py129
1 files changed, 112 insertions, 17 deletions
diff --git a/bitbake/lib/bs4/testing.py b/bitbake/lib/bs4/testing.py
index fd4495ac58..3a2f260e24 100644
--- a/bitbake/lib/bs4/testing.py
+++ b/bitbake/lib/bs4/testing.py
@@ -1,5 +1,8 @@
1"""Helper classes for tests.""" 1"""Helper classes for tests."""
2 2
3__license__ = "MIT"
4
5import pickle
3import copy 6import copy
4import functools 7import functools
5import unittest 8import unittest
@@ -43,6 +46,16 @@ class SoupTest(unittest.TestCase):
43 46
44 self.assertEqual(obj.decode(), self.document_for(compare_parsed_to)) 47 self.assertEqual(obj.decode(), self.document_for(compare_parsed_to))
45 48
49 def assertConnectedness(self, element):
50 """Ensure that next_element and previous_element are properly
51 set for all descendants of the given element.
52 """
53 earlier = None
54 for e in element.descendants:
55 if earlier:
56 self.assertEqual(e, earlier.next_element)
57 self.assertEqual(earlier, e.previous_element)
58 earlier = e
46 59
47class HTMLTreeBuilderSmokeTest(object): 60class HTMLTreeBuilderSmokeTest(object):
48 61
@@ -54,6 +67,15 @@ class HTMLTreeBuilderSmokeTest(object):
54 markup in these tests, there's not much room for interpretation. 67 markup in these tests, there's not much room for interpretation.
55 """ 68 """
56 69
70 def test_pickle_and_unpickle_identity(self):
71 # Pickling a tree, then unpickling it, yields a tree identical
72 # to the original.
73 tree = self.soup("<a><b>foo</a>")
74 dumped = pickle.dumps(tree, 2)
75 loaded = pickle.loads(dumped)
76 self.assertEqual(loaded.__class__, BeautifulSoup)
77 self.assertEqual(loaded.decode(), tree.decode())
78
57 def assertDoctypeHandled(self, doctype_fragment): 79 def assertDoctypeHandled(self, doctype_fragment):
58 """Assert that a given doctype string is handled correctly.""" 80 """Assert that a given doctype string is handled correctly."""
59 doctype_str, soup = self._document_with_doctype(doctype_fragment) 81 doctype_str, soup = self._document_with_doctype(doctype_fragment)
@@ -114,6 +136,11 @@ class HTMLTreeBuilderSmokeTest(object):
114 soup.encode("utf-8").replace(b"\n", b""), 136 soup.encode("utf-8").replace(b"\n", b""),
115 markup.replace(b"\n", b"")) 137 markup.replace(b"\n", b""))
116 138
139 def test_processing_instruction(self):
140 markup = b"""<?PITarget PIContent?>"""
141 soup = self.soup(markup)
142 self.assertEqual(markup, soup.encode("utf8"))
143
117 def test_deepcopy(self): 144 def test_deepcopy(self):
118 """Make sure you can copy the tree builder. 145 """Make sure you can copy the tree builder.
119 146
@@ -155,6 +182,23 @@ class HTMLTreeBuilderSmokeTest(object):
155 def test_nested_formatting_elements(self): 182 def test_nested_formatting_elements(self):
156 self.assertSoupEquals("<em><em></em></em>") 183 self.assertSoupEquals("<em><em></em></em>")
157 184
185 def test_double_head(self):
186 html = '''<!DOCTYPE html>
187<html>
188<head>
189<title>Ordinary HEAD element test</title>
190</head>
191<script type="text/javascript">
192alert("Help!");
193</script>
194<body>
195Hello, world!
196</body>
197</html>
198'''
199 soup = self.soup(html)
200 self.assertEqual("text/javascript", soup.find('script')['type'])
201
158 def test_comment(self): 202 def test_comment(self):
159 # Comments are represented as Comment objects. 203 # Comments are represented as Comment objects.
160 markup = "<p>foo<!--foobar-->baz</p>" 204 markup = "<p>foo<!--foobar-->baz</p>"
@@ -221,18 +265,26 @@ class HTMLTreeBuilderSmokeTest(object):
221 soup = self.soup(markup) 265 soup = self.soup(markup)
222 self.assertEqual(["css"], soup.div.div['class']) 266 self.assertEqual(["css"], soup.div.div['class'])
223 267
268 def test_multivalued_attribute_on_html(self):
269 # html5lib uses a different API to set the attributes ot the
270 # <html> tag. This has caused problems with multivalued
271 # attributes.
272 markup = '<html class="a b"></html>'
273 soup = self.soup(markup)
274 self.assertEqual(["a", "b"], soup.html['class'])
275
224 def test_angle_brackets_in_attribute_values_are_escaped(self): 276 def test_angle_brackets_in_attribute_values_are_escaped(self):
225 self.assertSoupEquals('<a b="<a>"></a>', '<a b="&lt;a&gt;"></a>') 277 self.assertSoupEquals('<a b="<a>"></a>', '<a b="&lt;a&gt;"></a>')
226 278
227 def test_entities_in_attributes_converted_to_unicode(self): 279 def test_entities_in_attributes_converted_to_unicode(self):
228 expect = u'<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>' 280 expect = '<p id="pi\N{LATIN SMALL LETTER N WITH TILDE}ata"></p>'
229 self.assertSoupEquals('<p id="pi&#241;ata"></p>', expect) 281 self.assertSoupEquals('<p id="pi&#241;ata"></p>', expect)
230 self.assertSoupEquals('<p id="pi&#xf1;ata"></p>', expect) 282 self.assertSoupEquals('<p id="pi&#xf1;ata"></p>', expect)
231 self.assertSoupEquals('<p id="pi&#Xf1;ata"></p>', expect) 283 self.assertSoupEquals('<p id="pi&#Xf1;ata"></p>', expect)
232 self.assertSoupEquals('<p id="pi&ntilde;ata"></p>', expect) 284 self.assertSoupEquals('<p id="pi&ntilde;ata"></p>', expect)
233 285
234 def test_entities_in_text_converted_to_unicode(self): 286 def test_entities_in_text_converted_to_unicode(self):
235 expect = u'<p>pi\N{LATIN SMALL LETTER N WITH TILDE}ata</p>' 287 expect = '<p>pi\N{LATIN SMALL LETTER N WITH TILDE}ata</p>'
236 self.assertSoupEquals("<p>pi&#241;ata</p>", expect) 288 self.assertSoupEquals("<p>pi&#241;ata</p>", expect)
237 self.assertSoupEquals("<p>pi&#xf1;ata</p>", expect) 289 self.assertSoupEquals("<p>pi&#xf1;ata</p>", expect)
238 self.assertSoupEquals("<p>pi&#Xf1;ata</p>", expect) 290 self.assertSoupEquals("<p>pi&#Xf1;ata</p>", expect)
@@ -243,7 +295,7 @@ class HTMLTreeBuilderSmokeTest(object):
243 '<p>I said "good day!"</p>') 295 '<p>I said "good day!"</p>')
244 296
245 def test_out_of_range_entity(self): 297 def test_out_of_range_entity(self):
246 expect = u"\N{REPLACEMENT CHARACTER}" 298 expect = "\N{REPLACEMENT CHARACTER}"
247 self.assertSoupEquals("&#10000000000000;", expect) 299 self.assertSoupEquals("&#10000000000000;", expect)
248 self.assertSoupEquals("&#x10000000000000;", expect) 300 self.assertSoupEquals("&#x10000000000000;", expect)
249 self.assertSoupEquals("&#1000000000;", expect) 301 self.assertSoupEquals("&#1000000000;", expect)
@@ -253,6 +305,35 @@ class HTMLTreeBuilderSmokeTest(object):
253 soup = self.soup("<html><h2>\nfoo</h2><p></p></html>") 305 soup = self.soup("<html><h2>\nfoo</h2><p></p></html>")
254 self.assertEqual("p", soup.h2.string.next_element.name) 306 self.assertEqual("p", soup.h2.string.next_element.name)
255 self.assertEqual("p", soup.p.name) 307 self.assertEqual("p", soup.p.name)
308 self.assertConnectedness(soup)
309
310 def test_head_tag_between_head_and_body(self):
311 "Prevent recurrence of a bug in the html5lib treebuilder."
312 content = """<html><head></head>
313 <link></link>
314 <body>foo</body>
315</html>
316"""
317 soup = self.soup(content)
318 self.assertNotEqual(None, soup.html.body)
319 self.assertConnectedness(soup)
320
321 def test_multiple_copies_of_a_tag(self):
322 "Prevent recurrence of a bug in the html5lib treebuilder."
323 content = """<!DOCTYPE html>
324<html>
325 <body>
326 <article id="a" >
327 <div><a href="1"></div>
328 <footer>
329 <a href="2"></a>
330 </footer>
331 </article>
332 </body>
333</html>
334"""
335 soup = self.soup(content)
336 self.assertConnectedness(soup.article)
256 337
257 def test_basic_namespaces(self): 338 def test_basic_namespaces(self):
258 """Parsers don't need to *understand* namespaces, but at the 339 """Parsers don't need to *understand* namespaces, but at the
@@ -285,9 +366,9 @@ class HTMLTreeBuilderSmokeTest(object):
285 # A seemingly innocuous document... but it's in Unicode! And 366 # A seemingly innocuous document... but it's in Unicode! And
286 # it contains characters that can't be represented in the 367 # it contains characters that can't be represented in the
287 # encoding found in the declaration! The horror! 368 # encoding found in the declaration! The horror!
288 markup = u'<html><head><meta encoding="euc-jp"></head><body>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</body>' 369 markup = '<html><head><meta encoding="euc-jp"></head><body>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</body>'
289 soup = self.soup(markup) 370 soup = self.soup(markup)
290 self.assertEqual(u'Sacr\xe9 bleu!', soup.body.string) 371 self.assertEqual('Sacr\xe9 bleu!', soup.body.string)
291 372
292 def test_soupstrainer(self): 373 def test_soupstrainer(self):
293 """Parsers should be able to work with SoupStrainers.""" 374 """Parsers should be able to work with SoupStrainers."""
@@ -327,7 +408,7 @@ class HTMLTreeBuilderSmokeTest(object):
327 # Both XML and HTML entities are converted to Unicode characters 408 # Both XML and HTML entities are converted to Unicode characters
328 # during parsing. 409 # during parsing.
329 text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>" 410 text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
330 expected = u"<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>" 411 expected = "<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>"
331 self.assertSoupEquals(text, expected) 412 self.assertSoupEquals(text, expected)
332 413
333 def test_smart_quotes_converted_on_the_way_in(self): 414 def test_smart_quotes_converted_on_the_way_in(self):
@@ -337,15 +418,15 @@ class HTMLTreeBuilderSmokeTest(object):
337 soup = self.soup(quote) 418 soup = self.soup(quote)
338 self.assertEqual( 419 self.assertEqual(
339 soup.p.string, 420 soup.p.string,
340 u"\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}") 421 "\N{LEFT SINGLE QUOTATION MARK}Foo\N{RIGHT SINGLE QUOTATION MARK}")
341 422
342 def test_non_breaking_spaces_converted_on_the_way_in(self): 423 def test_non_breaking_spaces_converted_on_the_way_in(self):
343 soup = self.soup("<a>&nbsp;&nbsp;</a>") 424 soup = self.soup("<a>&nbsp;&nbsp;</a>")
344 self.assertEqual(soup.a.string, u"\N{NO-BREAK SPACE}" * 2) 425 self.assertEqual(soup.a.string, "\N{NO-BREAK SPACE}" * 2)
345 426
346 def test_entities_converted_on_the_way_out(self): 427 def test_entities_converted_on_the_way_out(self):
347 text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>" 428 text = "<p>&lt;&lt;sacr&eacute;&#32;bleu!&gt;&gt;</p>"
348 expected = u"<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>".encode("utf-8") 429 expected = "<p>&lt;&lt;sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</p>".encode("utf-8")
349 soup = self.soup(text) 430 soup = self.soup(text)
350 self.assertEqual(soup.p.encode("utf-8"), expected) 431 self.assertEqual(soup.p.encode("utf-8"), expected)
351 432
@@ -354,7 +435,7 @@ class HTMLTreeBuilderSmokeTest(object):
354 # easy-to-understand document. 435 # easy-to-understand document.
355 436
356 # Here it is in Unicode. Note that it claims to be in ISO-Latin-1. 437 # Here it is in Unicode. Note that it claims to be in ISO-Latin-1.
357 unicode_html = u'<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>' 438 unicode_html = '<html><head><meta content="text/html; charset=ISO-Latin-1" http-equiv="Content-type"/></head><body><p>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</p></body></html>'
358 439
359 # That's because we're going to encode it into ISO-Latin-1, and use 440 # That's because we're going to encode it into ISO-Latin-1, and use
360 # that to test. 441 # that to test.
@@ -463,11 +544,25 @@ class HTMLTreeBuilderSmokeTest(object):
463 544
464class XMLTreeBuilderSmokeTest(object): 545class XMLTreeBuilderSmokeTest(object):
465 546
547 def test_pickle_and_unpickle_identity(self):
548 # Pickling a tree, then unpickling it, yields a tree identical
549 # to the original.
550 tree = self.soup("<a><b>foo</a>")
551 dumped = pickle.dumps(tree, 2)
552 loaded = pickle.loads(dumped)
553 self.assertEqual(loaded.__class__, BeautifulSoup)
554 self.assertEqual(loaded.decode(), tree.decode())
555
466 def test_docstring_generated(self): 556 def test_docstring_generated(self):
467 soup = self.soup("<root/>") 557 soup = self.soup("<root/>")
468 self.assertEqual( 558 self.assertEqual(
469 soup.encode(), b'<?xml version="1.0" encoding="utf-8"?>\n<root/>') 559 soup.encode(), b'<?xml version="1.0" encoding="utf-8"?>\n<root/>')
470 560
561 def test_xml_declaration(self):
562 markup = b"""<?xml version="1.0" encoding="utf8"?>\n<foo/>"""
563 soup = self.soup(markup)
564 self.assertEqual(markup, soup.encode("utf8"))
565
471 def test_real_xhtml_document(self): 566 def test_real_xhtml_document(self):
472 """A real XHTML document should come out *exactly* the same as it went in.""" 567 """A real XHTML document should come out *exactly* the same as it went in."""
473 markup = b"""<?xml version="1.0" encoding="utf-8"?> 568 markup = b"""<?xml version="1.0" encoding="utf-8"?>
@@ -485,7 +580,7 @@ class XMLTreeBuilderSmokeTest(object):
485 <script type="text/javascript"> 580 <script type="text/javascript">
486 </script> 581 </script>
487""" 582"""
488 soup = BeautifulSoup(doc, "xml") 583 soup = BeautifulSoup(doc, "lxml-xml")
489 # lxml would have stripped this while parsing, but we can add 584 # lxml would have stripped this while parsing, but we can add
490 # it later. 585 # it later.
491 soup.script.string = 'console.log("< < hey > > ");' 586 soup.script.string = 'console.log("< < hey > > ");'
@@ -493,15 +588,15 @@ class XMLTreeBuilderSmokeTest(object):
493 self.assertTrue(b"&lt; &lt; hey &gt; &gt;" in encoded) 588 self.assertTrue(b"&lt; &lt; hey &gt; &gt;" in encoded)
494 589
495 def test_can_parse_unicode_document(self): 590 def test_can_parse_unicode_document(self):
496 markup = u'<?xml version="1.0" encoding="euc-jp"><root>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</root>' 591 markup = '<?xml version="1.0" encoding="euc-jp"><root>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</root>'
497 soup = self.soup(markup) 592 soup = self.soup(markup)
498 self.assertEqual(u'Sacr\xe9 bleu!', soup.root.string) 593 self.assertEqual('Sacr\xe9 bleu!', soup.root.string)
499 594
500 def test_popping_namespaced_tag(self): 595 def test_popping_namespaced_tag(self):
501 markup = '<rss xmlns:dc="foo"><dc:creator>b</dc:creator><dc:date>2012-07-02T20:33:42Z</dc:date><dc:rights>c</dc:rights><image>d</image></rss>' 596 markup = '<rss xmlns:dc="foo"><dc:creator>b</dc:creator><dc:date>2012-07-02T20:33:42Z</dc:date><dc:rights>c</dc:rights><image>d</image></rss>'
502 soup = self.soup(markup) 597 soup = self.soup(markup)
503 self.assertEqual( 598 self.assertEqual(
504 unicode(soup.rss), markup) 599 str(soup.rss), markup)
505 600
506 def test_docstring_includes_correct_encoding(self): 601 def test_docstring_includes_correct_encoding(self):
507 soup = self.soup("<root/>") 602 soup = self.soup("<root/>")
@@ -532,17 +627,17 @@ class XMLTreeBuilderSmokeTest(object):
532 def test_closing_namespaced_tag(self): 627 def test_closing_namespaced_tag(self):
533 markup = '<p xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:date>20010504</dc:date></p>' 628 markup = '<p xmlns:dc="http://purl.org/dc/elements/1.1/"><dc:date>20010504</dc:date></p>'
534 soup = self.soup(markup) 629 soup = self.soup(markup)
535 self.assertEqual(unicode(soup.p), markup) 630 self.assertEqual(str(soup.p), markup)
536 631
537 def test_namespaced_attributes(self): 632 def test_namespaced_attributes(self):
538 markup = '<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><bar xsi:schemaLocation="http://www.example.com"/></foo>' 633 markup = '<foo xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><bar xsi:schemaLocation="http://www.example.com"/></foo>'
539 soup = self.soup(markup) 634 soup = self.soup(markup)
540 self.assertEqual(unicode(soup.foo), markup) 635 self.assertEqual(str(soup.foo), markup)
541 636
542 def test_namespaced_attributes_xml_namespace(self): 637 def test_namespaced_attributes_xml_namespace(self):
543 markup = '<foo xml:lang="fr">bar</foo>' 638 markup = '<foo xml:lang="fr">bar</foo>'
544 soup = self.soup(markup) 639 soup = self.soup(markup)
545 self.assertEqual(unicode(soup.foo), markup) 640 self.assertEqual(str(soup.foo), markup)
546 641
547class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest): 642class HTML5TreeBuilderSmokeTest(HTMLTreeBuilderSmokeTest):
548 """Smoke test for a tree builder that supports HTML5.""" 643 """Smoke test for a tree builder that supports HTML5."""