diff options
| author | Richard Purdie <richard.purdie@linuxfoundation.org> | 2016-05-06 09:06:51 +0100 |
|---|---|---|
| committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2016-06-02 08:24:02 +0100 |
| commit | 822eabf32dd69346071bd25fc3639db252d2f346 (patch) | |
| tree | edac6d1d0d5114a4e3c72fea5589c069453b72d2 /bitbake/lib/bs4/tests | |
| parent | 4f8959324df3b89487973bd4e8de21debb0a12ef (diff) | |
| download | poky-822eabf32dd69346071bd25fc3639db252d2f346.tar.gz | |
bitbake: bitbake/bs4: Upgrade 4.3.2 -> 4.4.1 (python 3 version)
Upgrade to 4.4.1 which has been run through 2to3 as per the maintainers
recommendation for v3 use.
(Bitbake rev: 2f4b98af93c971a8c466ffaf3c09cca0edb6e3ad)
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'bitbake/lib/bs4/tests')
| -rw-r--r-- | bitbake/lib/bs4/tests/test_builder_registry.py | 14 | ||||
| -rw-r--r-- | bitbake/lib/bs4/tests/test_html5lib.py | 19 | ||||
| -rw-r--r-- | bitbake/lib/bs4/tests/test_htmlparser.py | 13 | ||||
| -rw-r--r-- | bitbake/lib/bs4/tests/test_lxml.py | 19 | ||||
| -rw-r--r-- | bitbake/lib/bs4/tests/test_soup.py | 107 | ||||
| -rw-r--r-- | bitbake/lib/bs4/tests/test_tree.py | 294 |
6 files changed, 357 insertions, 109 deletions
diff --git a/bitbake/lib/bs4/tests/test_builder_registry.py b/bitbake/lib/bs4/tests/test_builder_registry.py index 92ad10fb04..90cad82933 100644 --- a/bitbake/lib/bs4/tests/test_builder_registry.py +++ b/bitbake/lib/bs4/tests/test_builder_registry.py | |||
| @@ -1,6 +1,7 @@ | |||
| 1 | """Tests of the builder registry.""" | 1 | """Tests of the builder registry.""" |
| 2 | 2 | ||
| 3 | import unittest | 3 | import unittest |
| 4 | import warnings | ||
| 4 | 5 | ||
| 5 | from bs4 import BeautifulSoup | 6 | from bs4 import BeautifulSoup |
| 6 | from bs4.builder import ( | 7 | from bs4.builder import ( |
| @@ -67,10 +68,15 @@ class BuiltInRegistryTest(unittest.TestCase): | |||
| 67 | HTMLParserTreeBuilder) | 68 | HTMLParserTreeBuilder) |
| 68 | 69 | ||
| 69 | def test_beautifulsoup_constructor_does_lookup(self): | 70 | def test_beautifulsoup_constructor_does_lookup(self): |
| 70 | # You can pass in a string. | 71 | |
| 71 | BeautifulSoup("", features="html") | 72 | with warnings.catch_warnings(record=True) as w: |
| 72 | # Or a list of strings. | 73 | # This will create a warning about not explicitly |
| 73 | BeautifulSoup("", features=["html", "fast"]) | 74 | # specifying a parser, but we'll ignore it. |
| 75 | |||
| 76 | # You can pass in a string. | ||
| 77 | BeautifulSoup("", features="html") | ||
| 78 | # Or a list of strings. | ||
| 79 | BeautifulSoup("", features=["html", "fast"]) | ||
| 74 | 80 | ||
| 75 | # You'll get an exception if BS can't find an appropriate | 81 | # You'll get an exception if BS can't find an appropriate |
| 76 | # builder. | 82 | # builder. |
diff --git a/bitbake/lib/bs4/tests/test_html5lib.py b/bitbake/lib/bs4/tests/test_html5lib.py index 594c3e1f26..a7494ca5ba 100644 --- a/bitbake/lib/bs4/tests/test_html5lib.py +++ b/bitbake/lib/bs4/tests/test_html5lib.py | |||
| @@ -5,7 +5,7 @@ import warnings | |||
| 5 | try: | 5 | try: |
| 6 | from bs4.builder import HTML5TreeBuilder | 6 | from bs4.builder import HTML5TreeBuilder |
| 7 | HTML5LIB_PRESENT = True | 7 | HTML5LIB_PRESENT = True |
| 8 | except ImportError, e: | 8 | except ImportError as e: |
| 9 | HTML5LIB_PRESENT = False | 9 | HTML5LIB_PRESENT = False |
| 10 | from bs4.element import SoupStrainer | 10 | from bs4.element import SoupStrainer |
| 11 | from bs4.testing import ( | 11 | from bs4.testing import ( |
| @@ -74,12 +74,25 @@ class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest): | |||
| 74 | def test_reparented_markup(self): | 74 | def test_reparented_markup(self): |
| 75 | markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>' | 75 | markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>' |
| 76 | soup = self.soup(markup) | 76 | soup = self.soup(markup) |
| 77 | self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p></body>", soup.body.decode()) | 77 | self.assertEqual("<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p></body>", soup.body.decode()) |
| 78 | self.assertEqual(2, len(soup.find_all('p'))) | 78 | self.assertEqual(2, len(soup.find_all('p'))) |
| 79 | 79 | ||
| 80 | 80 | ||
| 81 | def test_reparented_markup_ends_with_whitespace(self): | 81 | def test_reparented_markup_ends_with_whitespace(self): |
| 82 | markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>\n' | 82 | markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>\n' |
| 83 | soup = self.soup(markup) | 83 | soup = self.soup(markup) |
| 84 | self.assertEqual(u"<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode()) | 84 | self.assertEqual("<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode()) |
| 85 | self.assertEqual(2, len(soup.find_all('p'))) | 85 | self.assertEqual(2, len(soup.find_all('p'))) |
| 86 | |||
| 87 | def test_processing_instruction(self): | ||
| 88 | """Processing instructions become comments.""" | ||
| 89 | markup = b"""<?PITarget PIContent?>""" | ||
| 90 | soup = self.soup(markup) | ||
| 91 | assert str(soup).startswith("<!--?PITarget PIContent?-->") | ||
| 92 | |||
| 93 | def test_cloned_multivalue_node(self): | ||
| 94 | markup = b"""<a class="my_class"><p></a>""" | ||
| 95 | soup = self.soup(markup) | ||
| 96 | a1, a2 = soup.find_all('a') | ||
| 97 | self.assertEqual(a1, a2) | ||
| 98 | assert a1 is not a2 | ||
diff --git a/bitbake/lib/bs4/tests/test_htmlparser.py b/bitbake/lib/bs4/tests/test_htmlparser.py index bcb5ed232f..b45e35f999 100644 --- a/bitbake/lib/bs4/tests/test_htmlparser.py +++ b/bitbake/lib/bs4/tests/test_htmlparser.py | |||
| @@ -1,6 +1,8 @@ | |||
| 1 | """Tests to ensure that the html.parser tree builder generates good | 1 | """Tests to ensure that the html.parser tree builder generates good |
| 2 | trees.""" | 2 | trees.""" |
| 3 | 3 | ||
| 4 | from pdb import set_trace | ||
| 5 | import pickle | ||
| 4 | from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest | 6 | from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest |
| 5 | from bs4.builder import HTMLParserTreeBuilder | 7 | from bs4.builder import HTMLParserTreeBuilder |
| 6 | 8 | ||
| @@ -17,3 +19,14 @@ class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): | |||
| 17 | def test_namespaced_public_doctype(self): | 19 | def test_namespaced_public_doctype(self): |
| 18 | # html.parser can't handle namespaced doctypes, so skip this one. | 20 | # html.parser can't handle namespaced doctypes, so skip this one. |
| 19 | pass | 21 | pass |
| 22 | |||
| 23 | def test_builder_is_pickled(self): | ||
| 24 | """Unlike most tree builders, HTMLParserTreeBuilder and will | ||
| 25 | be restored after pickling. | ||
| 26 | """ | ||
| 27 | tree = self.soup("<a><b>foo</a>") | ||
| 28 | dumped = pickle.dumps(tree, 2) | ||
| 29 | loaded = pickle.loads(dumped) | ||
| 30 | self.assertTrue(isinstance(loaded.builder, type(tree.builder))) | ||
| 31 | |||
| 32 | |||
diff --git a/bitbake/lib/bs4/tests/test_lxml.py b/bitbake/lib/bs4/tests/test_lxml.py index 2b2e9b7e78..6c2a1d73eb 100644 --- a/bitbake/lib/bs4/tests/test_lxml.py +++ b/bitbake/lib/bs4/tests/test_lxml.py | |||
| @@ -7,7 +7,7 @@ try: | |||
| 7 | import lxml.etree | 7 | import lxml.etree |
| 8 | LXML_PRESENT = True | 8 | LXML_PRESENT = True |
| 9 | LXML_VERSION = lxml.etree.LXML_VERSION | 9 | LXML_VERSION = lxml.etree.LXML_VERSION |
| 10 | except ImportError, e: | 10 | except ImportError as e: |
| 11 | LXML_PRESENT = False | 11 | LXML_PRESENT = False |
| 12 | LXML_VERSION = (0,) | 12 | LXML_VERSION = (0,) |
| 13 | 13 | ||
| @@ -62,24 +62,9 @@ class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest): | |||
| 62 | # if one is installed. | 62 | # if one is installed. |
| 63 | with warnings.catch_warnings(record=True) as w: | 63 | with warnings.catch_warnings(record=True) as w: |
| 64 | soup = BeautifulStoneSoup("<b />") | 64 | soup = BeautifulStoneSoup("<b />") |
| 65 | self.assertEqual(u"<b/>", unicode(soup.b)) | 65 | self.assertEqual("<b/>", str(soup.b)) |
| 66 | self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message)) | 66 | self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message)) |
| 67 | 67 | ||
| 68 | def test_real_xhtml_document(self): | ||
| 69 | """lxml strips the XML definition from an XHTML doc, which is fine.""" | ||
| 70 | markup = b"""<?xml version="1.0" encoding="utf-8"?> | ||
| 71 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"> | ||
| 72 | <html xmlns="http://www.w3.org/1999/xhtml"> | ||
| 73 | <head><title>Hello.</title></head> | ||
| 74 | <body>Goodbye.</body> | ||
| 75 | </html>""" | ||
| 76 | soup = self.soup(markup) | ||
| 77 | self.assertEqual( | ||
| 78 | soup.encode("utf-8").replace(b"\n", b''), | ||
| 79 | markup.replace(b'\n', b'').replace( | ||
| 80 | b'<?xml version="1.0" encoding="utf-8"?>', b'')) | ||
| 81 | |||
| 82 | |||
| 83 | @skipIf( | 68 | @skipIf( |
| 84 | not LXML_PRESENT, | 69 | not LXML_PRESENT, |
| 85 | "lxml seems not to be present, not testing its XML tree builder.") | 70 | "lxml seems not to be present, not testing its XML tree builder.") |
diff --git a/bitbake/lib/bs4/tests/test_soup.py b/bitbake/lib/bs4/tests/test_soup.py index 47ac245f99..f87949e3d3 100644 --- a/bitbake/lib/bs4/tests/test_soup.py +++ b/bitbake/lib/bs4/tests/test_soup.py | |||
| @@ -1,6 +1,7 @@ | |||
| 1 | # -*- coding: utf-8 -*- | 1 | # -*- coding: utf-8 -*- |
| 2 | """Tests of Beautiful Soup as a whole.""" | 2 | """Tests of Beautiful Soup as a whole.""" |
| 3 | 3 | ||
| 4 | from pdb import set_trace | ||
| 4 | import logging | 5 | import logging |
| 5 | import unittest | 6 | import unittest |
| 6 | import sys | 7 | import sys |
| @@ -20,6 +21,7 @@ import bs4.dammit | |||
| 20 | from bs4.dammit import ( | 21 | from bs4.dammit import ( |
| 21 | EntitySubstitution, | 22 | EntitySubstitution, |
| 22 | UnicodeDammit, | 23 | UnicodeDammit, |
| 24 | EncodingDetector, | ||
| 23 | ) | 25 | ) |
| 24 | from bs4.testing import ( | 26 | from bs4.testing import ( |
| 25 | SoupTest, | 27 | SoupTest, |
| @@ -30,7 +32,7 @@ import warnings | |||
| 30 | try: | 32 | try: |
| 31 | from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML | 33 | from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML |
| 32 | LXML_PRESENT = True | 34 | LXML_PRESENT = True |
| 33 | except ImportError, e: | 35 | except ImportError as e: |
| 34 | LXML_PRESENT = False | 36 | LXML_PRESENT = False |
| 35 | 37 | ||
| 36 | PYTHON_2_PRE_2_7 = (sys.version_info < (2,7)) | 38 | PYTHON_2_PRE_2_7 = (sys.version_info < (2,7)) |
| @@ -39,17 +41,43 @@ PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2)) | |||
| 39 | class TestConstructor(SoupTest): | 41 | class TestConstructor(SoupTest): |
| 40 | 42 | ||
| 41 | def test_short_unicode_input(self): | 43 | def test_short_unicode_input(self): |
| 42 | data = u"<h1>éé</h1>" | 44 | data = "<h1>éé</h1>" |
| 43 | soup = self.soup(data) | 45 | soup = self.soup(data) |
| 44 | self.assertEqual(u"éé", soup.h1.string) | 46 | self.assertEqual("éé", soup.h1.string) |
| 45 | 47 | ||
| 46 | def test_embedded_null(self): | 48 | def test_embedded_null(self): |
| 47 | data = u"<h1>foo\0bar</h1>" | 49 | data = "<h1>foo\0bar</h1>" |
| 48 | soup = self.soup(data) | 50 | soup = self.soup(data) |
| 49 | self.assertEqual(u"foo\0bar", soup.h1.string) | 51 | self.assertEqual("foo\0bar", soup.h1.string) |
| 50 | 52 | ||
| 53 | def test_exclude_encodings(self): | ||
| 54 | utf8_data = "Räksmörgås".encode("utf-8") | ||
| 55 | soup = self.soup(utf8_data, exclude_encodings=["utf-8"]) | ||
| 56 | self.assertEqual("windows-1252", soup.original_encoding) | ||
| 51 | 57 | ||
| 52 | class TestDeprecatedConstructorArguments(SoupTest): | 58 | |
| 59 | class TestWarnings(SoupTest): | ||
| 60 | |||
| 61 | def _no_parser_specified(self, s, is_there=True): | ||
| 62 | v = s.startswith(BeautifulSoup.NO_PARSER_SPECIFIED_WARNING[:80]) | ||
| 63 | self.assertTrue(v) | ||
| 64 | |||
| 65 | def test_warning_if_no_parser_specified(self): | ||
| 66 | with warnings.catch_warnings(record=True) as w: | ||
| 67 | soup = self.soup("<a><b></b></a>") | ||
| 68 | msg = str(w[0].message) | ||
| 69 | self._assert_no_parser_specified(msg) | ||
| 70 | |||
| 71 | def test_warning_if_parser_specified_too_vague(self): | ||
| 72 | with warnings.catch_warnings(record=True) as w: | ||
| 73 | soup = self.soup("<a><b></b></a>", "html") | ||
| 74 | msg = str(w[0].message) | ||
| 75 | self._assert_no_parser_specified(msg) | ||
| 76 | |||
| 77 | def test_no_warning_if_explicit_parser_specified(self): | ||
| 78 | with warnings.catch_warnings(record=True) as w: | ||
| 79 | soup = self.soup("<a><b></b></a>", "html.parser") | ||
| 80 | self.assertEqual([], w) | ||
| 53 | 81 | ||
| 54 | def test_parseOnlyThese_renamed_to_parse_only(self): | 82 | def test_parseOnlyThese_renamed_to_parse_only(self): |
| 55 | with warnings.catch_warnings(record=True) as w: | 83 | with warnings.catch_warnings(record=True) as w: |
| @@ -117,9 +145,9 @@ class TestEntitySubstitution(unittest.TestCase): | |||
| 117 | def test_simple_html_substitution(self): | 145 | def test_simple_html_substitution(self): |
| 118 | # Unicode characters corresponding to named HTML entites | 146 | # Unicode characters corresponding to named HTML entites |
| 119 | # are substituted, and no others. | 147 | # are substituted, and no others. |
| 120 | s = u"foo\u2200\N{SNOWMAN}\u00f5bar" | 148 | s = "foo\u2200\N{SNOWMAN}\u00f5bar" |
| 121 | self.assertEqual(self.sub.substitute_html(s), | 149 | self.assertEqual(self.sub.substitute_html(s), |
| 122 | u"foo∀\N{SNOWMAN}õbar") | 150 | "foo∀\N{SNOWMAN}õbar") |
| 123 | 151 | ||
| 124 | def test_smart_quote_substitution(self): | 152 | def test_smart_quote_substitution(self): |
| 125 | # MS smart quotes are a common source of frustration, so we | 153 | # MS smart quotes are a common source of frustration, so we |
| @@ -184,7 +212,7 @@ class TestEncodingConversion(SoupTest): | |||
| 184 | 212 | ||
| 185 | def setUp(self): | 213 | def setUp(self): |
| 186 | super(TestEncodingConversion, self).setUp() | 214 | super(TestEncodingConversion, self).setUp() |
| 187 | self.unicode_data = u'<html><head><meta charset="utf-8"/></head><body><foo>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</foo></body></html>' | 215 | self.unicode_data = '<html><head><meta charset="utf-8"/></head><body><foo>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</foo></body></html>' |
| 188 | self.utf8_data = self.unicode_data.encode("utf-8") | 216 | self.utf8_data = self.unicode_data.encode("utf-8") |
| 189 | # Just so you know what it looks like. | 217 | # Just so you know what it looks like. |
| 190 | self.assertEqual( | 218 | self.assertEqual( |
| @@ -204,7 +232,7 @@ class TestEncodingConversion(SoupTest): | |||
| 204 | ascii = b"<foo>a</foo>" | 232 | ascii = b"<foo>a</foo>" |
| 205 | soup_from_ascii = self.soup(ascii) | 233 | soup_from_ascii = self.soup(ascii) |
| 206 | unicode_output = soup_from_ascii.decode() | 234 | unicode_output = soup_from_ascii.decode() |
| 207 | self.assertTrue(isinstance(unicode_output, unicode)) | 235 | self.assertTrue(isinstance(unicode_output, str)) |
| 208 | self.assertEqual(unicode_output, self.document_for(ascii.decode())) | 236 | self.assertEqual(unicode_output, self.document_for(ascii.decode())) |
| 209 | self.assertEqual(soup_from_ascii.original_encoding.lower(), "utf-8") | 237 | self.assertEqual(soup_from_ascii.original_encoding.lower(), "utf-8") |
| 210 | finally: | 238 | finally: |
| @@ -216,7 +244,7 @@ class TestEncodingConversion(SoupTest): | |||
| 216 | # is not set. | 244 | # is not set. |
| 217 | soup_from_unicode = self.soup(self.unicode_data) | 245 | soup_from_unicode = self.soup(self.unicode_data) |
| 218 | self.assertEqual(soup_from_unicode.decode(), self.unicode_data) | 246 | self.assertEqual(soup_from_unicode.decode(), self.unicode_data) |
| 219 | self.assertEqual(soup_from_unicode.foo.string, u'Sacr\xe9 bleu!') | 247 | self.assertEqual(soup_from_unicode.foo.string, 'Sacr\xe9 bleu!') |
| 220 | self.assertEqual(soup_from_unicode.original_encoding, None) | 248 | self.assertEqual(soup_from_unicode.original_encoding, None) |
| 221 | 249 | ||
| 222 | def test_utf8_in_unicode_out(self): | 250 | def test_utf8_in_unicode_out(self): |
| @@ -224,7 +252,7 @@ class TestEncodingConversion(SoupTest): | |||
| 224 | # attribute is set. | 252 | # attribute is set. |
| 225 | soup_from_utf8 = self.soup(self.utf8_data) | 253 | soup_from_utf8 = self.soup(self.utf8_data) |
| 226 | self.assertEqual(soup_from_utf8.decode(), self.unicode_data) | 254 | self.assertEqual(soup_from_utf8.decode(), self.unicode_data) |
| 227 | self.assertEqual(soup_from_utf8.foo.string, u'Sacr\xe9 bleu!') | 255 | self.assertEqual(soup_from_utf8.foo.string, 'Sacr\xe9 bleu!') |
| 228 | 256 | ||
| 229 | def test_utf8_out(self): | 257 | def test_utf8_out(self): |
| 230 | # The internal data structures can be encoded as UTF-8. | 258 | # The internal data structures can be encoded as UTF-8. |
| @@ -235,14 +263,14 @@ class TestEncodingConversion(SoupTest): | |||
| 235 | PYTHON_2_PRE_2_7 or PYTHON_3_PRE_3_2, | 263 | PYTHON_2_PRE_2_7 or PYTHON_3_PRE_3_2, |
| 236 | "Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.") | 264 | "Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.") |
| 237 | def test_attribute_name_containing_unicode_characters(self): | 265 | def test_attribute_name_containing_unicode_characters(self): |
| 238 | markup = u'<div><a \N{SNOWMAN}="snowman"></a></div>' | 266 | markup = '<div><a \N{SNOWMAN}="snowman"></a></div>' |
| 239 | self.assertEqual(self.soup(markup).div.encode("utf8"), markup.encode("utf8")) | 267 | self.assertEqual(self.soup(markup).div.encode("utf8"), markup.encode("utf8")) |
| 240 | 268 | ||
| 241 | class TestUnicodeDammit(unittest.TestCase): | 269 | class TestUnicodeDammit(unittest.TestCase): |
| 242 | """Standalone tests of UnicodeDammit.""" | 270 | """Standalone tests of UnicodeDammit.""" |
| 243 | 271 | ||
| 244 | def test_unicode_input(self): | 272 | def test_unicode_input(self): |
| 245 | markup = u"I'm already Unicode! \N{SNOWMAN}" | 273 | markup = "I'm already Unicode! \N{SNOWMAN}" |
| 246 | dammit = UnicodeDammit(markup) | 274 | dammit = UnicodeDammit(markup) |
| 247 | self.assertEqual(dammit.unicode_markup, markup) | 275 | self.assertEqual(dammit.unicode_markup, markup) |
| 248 | 276 | ||
| @@ -250,7 +278,7 @@ class TestUnicodeDammit(unittest.TestCase): | |||
| 250 | markup = b"<foo>\x91\x92\x93\x94</foo>" | 278 | markup = b"<foo>\x91\x92\x93\x94</foo>" |
| 251 | dammit = UnicodeDammit(markup) | 279 | dammit = UnicodeDammit(markup) |
| 252 | self.assertEqual( | 280 | self.assertEqual( |
| 253 | dammit.unicode_markup, u"<foo>\u2018\u2019\u201c\u201d</foo>") | 281 | dammit.unicode_markup, "<foo>\u2018\u2019\u201c\u201d</foo>") |
| 254 | 282 | ||
| 255 | def test_smart_quotes_to_xml_entities(self): | 283 | def test_smart_quotes_to_xml_entities(self): |
| 256 | markup = b"<foo>\x91\x92\x93\x94</foo>" | 284 | markup = b"<foo>\x91\x92\x93\x94</foo>" |
| @@ -271,16 +299,17 @@ class TestUnicodeDammit(unittest.TestCase): | |||
| 271 | dammit.unicode_markup, """<foo>''""</foo>""") | 299 | dammit.unicode_markup, """<foo>''""</foo>""") |
| 272 | 300 | ||
| 273 | def test_detect_utf8(self): | 301 | def test_detect_utf8(self): |
| 274 | utf8 = b"\xc3\xa9" | 302 | utf8 = b"Sacr\xc3\xa9 bleu! \xe2\x98\x83" |
| 275 | dammit = UnicodeDammit(utf8) | 303 | dammit = UnicodeDammit(utf8) |
| 276 | self.assertEqual(dammit.unicode_markup, u'\xe9') | ||
| 277 | self.assertEqual(dammit.original_encoding.lower(), 'utf-8') | 304 | self.assertEqual(dammit.original_encoding.lower(), 'utf-8') |
| 305 | self.assertEqual(dammit.unicode_markup, 'Sacr\xe9 bleu! \N{SNOWMAN}') | ||
| 306 | |||
| 278 | 307 | ||
| 279 | def test_convert_hebrew(self): | 308 | def test_convert_hebrew(self): |
| 280 | hebrew = b"\xed\xe5\xec\xf9" | 309 | hebrew = b"\xed\xe5\xec\xf9" |
| 281 | dammit = UnicodeDammit(hebrew, ["iso-8859-8"]) | 310 | dammit = UnicodeDammit(hebrew, ["iso-8859-8"]) |
| 282 | self.assertEqual(dammit.original_encoding.lower(), 'iso-8859-8') | 311 | self.assertEqual(dammit.original_encoding.lower(), 'iso-8859-8') |
| 283 | self.assertEqual(dammit.unicode_markup, u'\u05dd\u05d5\u05dc\u05e9') | 312 | self.assertEqual(dammit.unicode_markup, '\u05dd\u05d5\u05dc\u05e9') |
| 284 | 313 | ||
| 285 | def test_dont_see_smart_quotes_where_there_are_none(self): | 314 | def test_dont_see_smart_quotes_where_there_are_none(self): |
| 286 | utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch" | 315 | utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch" |
| @@ -289,16 +318,36 @@ class TestUnicodeDammit(unittest.TestCase): | |||
| 289 | self.assertEqual(dammit.unicode_markup.encode("utf-8"), utf_8) | 318 | self.assertEqual(dammit.unicode_markup.encode("utf-8"), utf_8) |
| 290 | 319 | ||
| 291 | def test_ignore_inappropriate_codecs(self): | 320 | def test_ignore_inappropriate_codecs(self): |
| 292 | utf8_data = u"Räksmörgås".encode("utf-8") | 321 | utf8_data = "Räksmörgås".encode("utf-8") |
| 293 | dammit = UnicodeDammit(utf8_data, ["iso-8859-8"]) | 322 | dammit = UnicodeDammit(utf8_data, ["iso-8859-8"]) |
| 294 | self.assertEqual(dammit.original_encoding.lower(), 'utf-8') | 323 | self.assertEqual(dammit.original_encoding.lower(), 'utf-8') |
| 295 | 324 | ||
| 296 | def test_ignore_invalid_codecs(self): | 325 | def test_ignore_invalid_codecs(self): |
| 297 | utf8_data = u"Räksmörgås".encode("utf-8") | 326 | utf8_data = "Räksmörgås".encode("utf-8") |
| 298 | for bad_encoding in ['.utf8', '...', 'utF---16.!']: | 327 | for bad_encoding in ['.utf8', '...', 'utF---16.!']: |
| 299 | dammit = UnicodeDammit(utf8_data, [bad_encoding]) | 328 | dammit = UnicodeDammit(utf8_data, [bad_encoding]) |
| 300 | self.assertEqual(dammit.original_encoding.lower(), 'utf-8') | 329 | self.assertEqual(dammit.original_encoding.lower(), 'utf-8') |
| 301 | 330 | ||
| 331 | def test_exclude_encodings(self): | ||
| 332 | # This is UTF-8. | ||
| 333 | utf8_data = "Räksmörgås".encode("utf-8") | ||
| 334 | |||
| 335 | # But if we exclude UTF-8 from consideration, the guess is | ||
| 336 | # Windows-1252. | ||
| 337 | dammit = UnicodeDammit(utf8_data, exclude_encodings=["utf-8"]) | ||
| 338 | self.assertEqual(dammit.original_encoding.lower(), 'windows-1252') | ||
| 339 | |||
| 340 | # And if we exclude that, there is no valid guess at all. | ||
| 341 | dammit = UnicodeDammit( | ||
| 342 | utf8_data, exclude_encodings=["utf-8", "windows-1252"]) | ||
| 343 | self.assertEqual(dammit.original_encoding, None) | ||
| 344 | |||
| 345 | def test_encoding_detector_replaces_junk_in_encoding_name_with_replacement_character(self): | ||
| 346 | detected = EncodingDetector( | ||
| 347 | b'<?xml version="1.0" encoding="UTF-\xdb" ?>') | ||
| 348 | encodings = list(detected.encodings) | ||
| 349 | assert 'utf-\N{REPLACEMENT CHARACTER}' in encodings | ||
| 350 | |||
| 302 | def test_detect_html5_style_meta_tag(self): | 351 | def test_detect_html5_style_meta_tag(self): |
| 303 | 352 | ||
| 304 | for data in ( | 353 | for data in ( |
| @@ -337,7 +386,7 @@ class TestUnicodeDammit(unittest.TestCase): | |||
| 337 | bs4.dammit.chardet_dammit = noop | 386 | bs4.dammit.chardet_dammit = noop |
| 338 | dammit = UnicodeDammit(doc) | 387 | dammit = UnicodeDammit(doc) |
| 339 | self.assertEqual(True, dammit.contains_replacement_characters) | 388 | self.assertEqual(True, dammit.contains_replacement_characters) |
| 340 | self.assertTrue(u"\ufffd" in dammit.unicode_markup) | 389 | self.assertTrue("\ufffd" in dammit.unicode_markup) |
| 341 | 390 | ||
| 342 | soup = BeautifulSoup(doc, "html.parser") | 391 | soup = BeautifulSoup(doc, "html.parser") |
| 343 | self.assertTrue(soup.contains_replacement_characters) | 392 | self.assertTrue(soup.contains_replacement_characters) |
| @@ -349,17 +398,17 @@ class TestUnicodeDammit(unittest.TestCase): | |||
| 349 | # A document written in UTF-16LE will have its byte order marker stripped. | 398 | # A document written in UTF-16LE will have its byte order marker stripped. |
| 350 | data = b'\xff\xfe<\x00a\x00>\x00\xe1\x00\xe9\x00<\x00/\x00a\x00>\x00' | 399 | data = b'\xff\xfe<\x00a\x00>\x00\xe1\x00\xe9\x00<\x00/\x00a\x00>\x00' |
| 351 | dammit = UnicodeDammit(data) | 400 | dammit = UnicodeDammit(data) |
| 352 | self.assertEqual(u"<a>áé</a>", dammit.unicode_markup) | 401 | self.assertEqual("<a>áé</a>", dammit.unicode_markup) |
| 353 | self.assertEqual("utf-16le", dammit.original_encoding) | 402 | self.assertEqual("utf-16le", dammit.original_encoding) |
| 354 | 403 | ||
| 355 | def test_detwingle(self): | 404 | def test_detwingle(self): |
| 356 | # Here's a UTF8 document. | 405 | # Here's a UTF8 document. |
| 357 | utf8 = (u"\N{SNOWMAN}" * 3).encode("utf8") | 406 | utf8 = ("\N{SNOWMAN}" * 3).encode("utf8") |
| 358 | 407 | ||
| 359 | # Here's a Windows-1252 document. | 408 | # Here's a Windows-1252 document. |
| 360 | windows_1252 = ( | 409 | windows_1252 = ( |
| 361 | u"\N{LEFT DOUBLE QUOTATION MARK}Hi, I like Windows!" | 410 | "\N{LEFT DOUBLE QUOTATION MARK}Hi, I like Windows!" |
| 362 | u"\N{RIGHT DOUBLE QUOTATION MARK}").encode("windows_1252") | 411 | "\N{RIGHT DOUBLE QUOTATION MARK}").encode("windows_1252") |
| 363 | 412 | ||
| 364 | # Through some unholy alchemy, they've been stuck together. | 413 | # Through some unholy alchemy, they've been stuck together. |
| 365 | doc = utf8 + windows_1252 + utf8 | 414 | doc = utf8 + windows_1252 + utf8 |
| @@ -374,7 +423,7 @@ class TestUnicodeDammit(unittest.TestCase): | |||
| 374 | 423 | ||
| 375 | fixed = UnicodeDammit.detwingle(doc) | 424 | fixed = UnicodeDammit.detwingle(doc) |
| 376 | self.assertEqual( | 425 | self.assertEqual( |
| 377 | u"☃☃☃“Hi, I like Windows!”☃☃☃", fixed.decode("utf8")) | 426 | "☃☃☃“Hi, I like Windows!”☃☃☃", fixed.decode("utf8")) |
| 378 | 427 | ||
| 379 | def test_detwingle_ignores_multibyte_characters(self): | 428 | def test_detwingle_ignores_multibyte_characters(self): |
| 380 | # Each of these characters has a UTF-8 representation ending | 429 | # Each of these characters has a UTF-8 representation ending |
| @@ -382,9 +431,9 @@ class TestUnicodeDammit(unittest.TestCase): | |||
| 382 | # Windows-1252. But our code knows to skip over multibyte | 431 | # Windows-1252. But our code knows to skip over multibyte |
| 383 | # UTF-8 characters, so they'll survive the process unscathed. | 432 | # UTF-8 characters, so they'll survive the process unscathed. |
| 384 | for tricky_unicode_char in ( | 433 | for tricky_unicode_char in ( |
| 385 | u"\N{LATIN SMALL LIGATURE OE}", # 2-byte char '\xc5\x93' | 434 | "\N{LATIN SMALL LIGATURE OE}", # 2-byte char '\xc5\x93' |
| 386 | u"\N{LATIN SUBSCRIPT SMALL LETTER X}", # 3-byte char '\xe2\x82\x93' | 435 | "\N{LATIN SUBSCRIPT SMALL LETTER X}", # 3-byte char '\xe2\x82\x93' |
| 387 | u"\xf0\x90\x90\x93", # This is a CJK character, not sure which one. | 436 | "\xf0\x90\x90\x93", # This is a CJK character, not sure which one. |
| 388 | ): | 437 | ): |
| 389 | input = tricky_unicode_char.encode("utf8") | 438 | input = tricky_unicode_char.encode("utf8") |
| 390 | self.assertTrue(input.endswith(b'\x93')) | 439 | self.assertTrue(input.endswith(b'\x93')) |
diff --git a/bitbake/lib/bs4/tests/test_tree.py b/bitbake/lib/bs4/tests/test_tree.py index f8515c0ea1..6d3e67f311 100644 --- a/bitbake/lib/bs4/tests/test_tree.py +++ b/bitbake/lib/bs4/tests/test_tree.py | |||
| @@ -9,6 +9,7 @@ same markup, but all Beautiful Soup trees can be traversed with the | |||
| 9 | methods tested here. | 9 | methods tested here. |
| 10 | """ | 10 | """ |
| 11 | 11 | ||
| 12 | from pdb import set_trace | ||
| 12 | import copy | 13 | import copy |
| 13 | import pickle | 14 | import pickle |
| 14 | import re | 15 | import re |
| @@ -19,8 +20,10 @@ from bs4.builder import ( | |||
| 19 | HTMLParserTreeBuilder, | 20 | HTMLParserTreeBuilder, |
| 20 | ) | 21 | ) |
| 21 | from bs4.element import ( | 22 | from bs4.element import ( |
| 23 | PY3K, | ||
| 22 | CData, | 24 | CData, |
| 23 | Comment, | 25 | Comment, |
| 26 | Declaration, | ||
| 24 | Doctype, | 27 | Doctype, |
| 25 | NavigableString, | 28 | NavigableString, |
| 26 | SoupStrainer, | 29 | SoupStrainer, |
| @@ -67,8 +70,14 @@ class TestFind(TreeTest): | |||
| 67 | self.assertEqual(soup.find("b").string, "2") | 70 | self.assertEqual(soup.find("b").string, "2") |
| 68 | 71 | ||
| 69 | def test_unicode_text_find(self): | 72 | def test_unicode_text_find(self): |
| 70 | soup = self.soup(u'<h1>Räksmörgås</h1>') | 73 | soup = self.soup('<h1>Räksmörgås</h1>') |
| 71 | self.assertEqual(soup.find(text=u'Räksmörgås'), u'Räksmörgås') | 74 | self.assertEqual(soup.find(string='Räksmörgås'), 'Räksmörgås') |
| 75 | |||
| 76 | def test_unicode_attribute_find(self): | ||
| 77 | soup = self.soup('<h1 id="Räksmörgås">here it is</h1>') | ||
| 78 | str(soup) | ||
| 79 | self.assertEqual("here it is", soup.find(id='Räksmörgås').text) | ||
| 80 | |||
| 72 | 81 | ||
| 73 | def test_find_everything(self): | 82 | def test_find_everything(self): |
| 74 | """Test an optimization that finds all tags.""" | 83 | """Test an optimization that finds all tags.""" |
| @@ -87,16 +96,17 @@ class TestFindAll(TreeTest): | |||
| 87 | """You can search the tree for text nodes.""" | 96 | """You can search the tree for text nodes.""" |
| 88 | soup = self.soup("<html>Foo<b>bar</b>\xbb</html>") | 97 | soup = self.soup("<html>Foo<b>bar</b>\xbb</html>") |
| 89 | # Exact match. | 98 | # Exact match. |
| 90 | self.assertEqual(soup.find_all(text="bar"), [u"bar"]) | 99 | self.assertEqual(soup.find_all(string="bar"), ["bar"]) |
| 100 | self.assertEqual(soup.find_all(text="bar"), ["bar"]) | ||
| 91 | # Match any of a number of strings. | 101 | # Match any of a number of strings. |
| 92 | self.assertEqual( | 102 | self.assertEqual( |
| 93 | soup.find_all(text=["Foo", "bar"]), [u"Foo", u"bar"]) | 103 | soup.find_all(text=["Foo", "bar"]), ["Foo", "bar"]) |
| 94 | # Match a regular expression. | 104 | # Match a regular expression. |
| 95 | self.assertEqual(soup.find_all(text=re.compile('.*')), | 105 | self.assertEqual(soup.find_all(text=re.compile('.*')), |
| 96 | [u"Foo", u"bar", u'\xbb']) | 106 | ["Foo", "bar", '\xbb']) |
| 97 | # Match anything. | 107 | # Match anything. |
| 98 | self.assertEqual(soup.find_all(text=True), | 108 | self.assertEqual(soup.find_all(text=True), |
| 99 | [u"Foo", u"bar", u'\xbb']) | 109 | ["Foo", "bar", '\xbb']) |
| 100 | 110 | ||
| 101 | def test_find_all_limit(self): | 111 | def test_find_all_limit(self): |
| 102 | """You can limit the number of items returned by find_all.""" | 112 | """You can limit the number of items returned by find_all.""" |
| @@ -227,8 +237,8 @@ class TestFindAllByAttribute(TreeTest): | |||
| 227 | ["Matching a.", "Matching b."]) | 237 | ["Matching a.", "Matching b."]) |
| 228 | 238 | ||
| 229 | def test_find_all_by_utf8_attribute_value(self): | 239 | def test_find_all_by_utf8_attribute_value(self): |
| 230 | peace = u"םולש".encode("utf8") | 240 | peace = "םולש".encode("utf8") |
| 231 | data = u'<a title="םולש"></a>'.encode("utf8") | 241 | data = '<a title="םולש"></a>'.encode("utf8") |
| 232 | soup = self.soup(data) | 242 | soup = self.soup(data) |
| 233 | self.assertEqual([soup.a], soup.find_all(title=peace)) | 243 | self.assertEqual([soup.a], soup.find_all(title=peace)) |
| 234 | self.assertEqual([soup.a], soup.find_all(title=peace.decode("utf8"))) | 244 | self.assertEqual([soup.a], soup.find_all(title=peace.decode("utf8"))) |
| @@ -688,7 +698,7 @@ class TestTagCreation(SoupTest): | |||
| 688 | 698 | ||
| 689 | def test_tag_inherits_self_closing_rules_from_builder(self): | 699 | def test_tag_inherits_self_closing_rules_from_builder(self): |
| 690 | if XML_BUILDER_PRESENT: | 700 | if XML_BUILDER_PRESENT: |
| 691 | xml_soup = BeautifulSoup("", "xml") | 701 | xml_soup = BeautifulSoup("", "lxml-xml") |
| 692 | xml_br = xml_soup.new_tag("br") | 702 | xml_br = xml_soup.new_tag("br") |
| 693 | xml_p = xml_soup.new_tag("p") | 703 | xml_p = xml_soup.new_tag("p") |
| 694 | 704 | ||
| @@ -697,7 +707,7 @@ class TestTagCreation(SoupTest): | |||
| 697 | self.assertEqual(b"<br/>", xml_br.encode()) | 707 | self.assertEqual(b"<br/>", xml_br.encode()) |
| 698 | self.assertEqual(b"<p/>", xml_p.encode()) | 708 | self.assertEqual(b"<p/>", xml_p.encode()) |
| 699 | 709 | ||
| 700 | html_soup = BeautifulSoup("", "html") | 710 | html_soup = BeautifulSoup("", "html.parser") |
| 701 | html_br = html_soup.new_tag("br") | 711 | html_br = html_soup.new_tag("br") |
| 702 | html_p = html_soup.new_tag("p") | 712 | html_p = html_soup.new_tag("p") |
| 703 | 713 | ||
| @@ -773,6 +783,14 @@ class TestTreeModification(SoupTest): | |||
| 773 | new_a = a.unwrap() | 783 | new_a = a.unwrap() |
| 774 | self.assertEqual(a, new_a) | 784 | self.assertEqual(a, new_a) |
| 775 | 785 | ||
| 786 | def test_replace_with_and_unwrap_give_useful_exception_when_tag_has_no_parent(self): | ||
| 787 | soup = self.soup("<a><b>Foo</b></a><c>Bar</c>") | ||
| 788 | a = soup.a | ||
| 789 | a.extract() | ||
| 790 | self.assertEqual(None, a.parent) | ||
| 791 | self.assertRaises(ValueError, a.unwrap) | ||
| 792 | self.assertRaises(ValueError, a.replace_with, soup.c) | ||
| 793 | |||
| 776 | def test_replace_tag_with_itself(self): | 794 | def test_replace_tag_with_itself(self): |
| 777 | text = "<a><b></b><c>Foo<d></d></c></a><a><e></e></a>" | 795 | text = "<a><b></b><c>Foo<d></d></c></a><a><e></e></a>" |
| 778 | soup = self.soup(text) | 796 | soup = self.soup(text) |
| @@ -1067,6 +1085,31 @@ class TestTreeModification(SoupTest): | |||
| 1067 | self.assertEqual(foo_2, soup.a.string) | 1085 | self.assertEqual(foo_2, soup.a.string) |
| 1068 | self.assertEqual(bar_2, soup.b.string) | 1086 | self.assertEqual(bar_2, soup.b.string) |
| 1069 | 1087 | ||
| 1088 | def test_extract_multiples_of_same_tag(self): | ||
| 1089 | soup = self.soup(""" | ||
| 1090 | <html> | ||
| 1091 | <head> | ||
| 1092 | <script>foo</script> | ||
| 1093 | </head> | ||
| 1094 | <body> | ||
| 1095 | <script>bar</script> | ||
| 1096 | <a></a> | ||
| 1097 | </body> | ||
| 1098 | <script>baz</script> | ||
| 1099 | </html>""") | ||
| 1100 | [soup.script.extract() for i in soup.find_all("script")] | ||
| 1101 | self.assertEqual("<body>\n\n<a></a>\n</body>", str(soup.body)) | ||
| 1102 | |||
| 1103 | |||
| 1104 | def test_extract_works_when_element_is_surrounded_by_identical_strings(self): | ||
| 1105 | soup = self.soup( | ||
| 1106 | '<html>\n' | ||
| 1107 | '<body>hi</body>\n' | ||
| 1108 | '</html>') | ||
| 1109 | soup.find('body').extract() | ||
| 1110 | self.assertEqual(None, soup.find('body')) | ||
| 1111 | |||
| 1112 | |||
| 1070 | def test_clear(self): | 1113 | def test_clear(self): |
| 1071 | """Tag.clear()""" | 1114 | """Tag.clear()""" |
| 1072 | soup = self.soup("<p><a>String <em>Italicized</em></a> and another</p>") | 1115 | soup = self.soup("<p><a>String <em>Italicized</em></a> and another</p>") |
| @@ -1287,27 +1330,72 @@ class TestPersistence(SoupTest): | |||
| 1287 | 1330 | ||
| 1288 | def test_unicode_pickle(self): | 1331 | def test_unicode_pickle(self): |
| 1289 | # A tree containing Unicode characters can be pickled. | 1332 | # A tree containing Unicode characters can be pickled. |
| 1290 | html = u"<b>\N{SNOWMAN}</b>" | 1333 | html = "<b>\N{SNOWMAN}</b>" |
| 1291 | soup = self.soup(html) | 1334 | soup = self.soup(html) |
| 1292 | dumped = pickle.dumps(soup, pickle.HIGHEST_PROTOCOL) | 1335 | dumped = pickle.dumps(soup, pickle.HIGHEST_PROTOCOL) |
| 1293 | loaded = pickle.loads(dumped) | 1336 | loaded = pickle.loads(dumped) |
| 1294 | self.assertEqual(loaded.decode(), soup.decode()) | 1337 | self.assertEqual(loaded.decode(), soup.decode()) |
| 1295 | 1338 | ||
| 1339 | def test_copy_navigablestring_is_not_attached_to_tree(self): | ||
| 1340 | html = "<b>Foo<a></a></b><b>Bar</b>" | ||
| 1341 | soup = self.soup(html) | ||
| 1342 | s1 = soup.find(string="Foo") | ||
| 1343 | s2 = copy.copy(s1) | ||
| 1344 | self.assertEqual(s1, s2) | ||
| 1345 | self.assertEqual(None, s2.parent) | ||
| 1346 | self.assertEqual(None, s2.next_element) | ||
| 1347 | self.assertNotEqual(None, s1.next_sibling) | ||
| 1348 | self.assertEqual(None, s2.next_sibling) | ||
| 1349 | self.assertEqual(None, s2.previous_element) | ||
| 1350 | |||
| 1351 | def test_copy_navigablestring_subclass_has_same_type(self): | ||
| 1352 | html = "<b><!--Foo--></b>" | ||
| 1353 | soup = self.soup(html) | ||
| 1354 | s1 = soup.string | ||
| 1355 | s2 = copy.copy(s1) | ||
| 1356 | self.assertEqual(s1, s2) | ||
| 1357 | self.assertTrue(isinstance(s2, Comment)) | ||
| 1358 | |||
| 1359 | def test_copy_entire_soup(self): | ||
| 1360 | html = "<div><b>Foo<a></a></b><b>Bar</b></div>end" | ||
| 1361 | soup = self.soup(html) | ||
| 1362 | soup_copy = copy.copy(soup) | ||
| 1363 | self.assertEqual(soup, soup_copy) | ||
| 1364 | |||
| 1365 | def test_copy_tag_copies_contents(self): | ||
| 1366 | html = "<div><b>Foo<a></a></b><b>Bar</b></div>end" | ||
| 1367 | soup = self.soup(html) | ||
| 1368 | div = soup.div | ||
| 1369 | div_copy = copy.copy(div) | ||
| 1370 | |||
| 1371 | # The two tags look the same, and evaluate to equal. | ||
| 1372 | self.assertEqual(str(div), str(div_copy)) | ||
| 1373 | self.assertEqual(div, div_copy) | ||
| 1374 | |||
| 1375 | # But they're not the same object. | ||
| 1376 | self.assertFalse(div is div_copy) | ||
| 1377 | |||
| 1378 | # And they don't have the same relation to the parse tree. The | ||
| 1379 | # copy is not associated with a parse tree at all. | ||
| 1380 | self.assertEqual(None, div_copy.parent) | ||
| 1381 | self.assertEqual(None, div_copy.previous_element) | ||
| 1382 | self.assertEqual(None, div_copy.find(string='Bar').next_element) | ||
| 1383 | self.assertNotEqual(None, div.find(string='Bar').next_element) | ||
| 1296 | 1384 | ||
| 1297 | class TestSubstitutions(SoupTest): | 1385 | class TestSubstitutions(SoupTest): |
| 1298 | 1386 | ||
| 1299 | def test_default_formatter_is_minimal(self): | 1387 | def test_default_formatter_is_minimal(self): |
| 1300 | markup = u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>" | 1388 | markup = "<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>" |
| 1301 | soup = self.soup(markup) | 1389 | soup = self.soup(markup) |
| 1302 | decoded = soup.decode(formatter="minimal") | 1390 | decoded = soup.decode(formatter="minimal") |
| 1303 | # The < is converted back into < but the e-with-acute is left alone. | 1391 | # The < is converted back into < but the e-with-acute is left alone. |
| 1304 | self.assertEqual( | 1392 | self.assertEqual( |
| 1305 | decoded, | 1393 | decoded, |
| 1306 | self.document_for( | 1394 | self.document_for( |
| 1307 | u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>")) | 1395 | "<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>")) |
| 1308 | 1396 | ||
| 1309 | def test_formatter_html(self): | 1397 | def test_formatter_html(self): |
| 1310 | markup = u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>" | 1398 | markup = "<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>" |
| 1311 | soup = self.soup(markup) | 1399 | soup = self.soup(markup) |
| 1312 | decoded = soup.decode(formatter="html") | 1400 | decoded = soup.decode(formatter="html") |
| 1313 | self.assertEqual( | 1401 | self.assertEqual( |
| @@ -1315,49 +1403,49 @@ class TestSubstitutions(SoupTest): | |||
| 1315 | self.document_for("<b><<Sacré bleu!>></b>")) | 1403 | self.document_for("<b><<Sacré bleu!>></b>")) |
| 1316 | 1404 | ||
| 1317 | def test_formatter_minimal(self): | 1405 | def test_formatter_minimal(self): |
| 1318 | markup = u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>" | 1406 | markup = "<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>" |
| 1319 | soup = self.soup(markup) | 1407 | soup = self.soup(markup) |
| 1320 | decoded = soup.decode(formatter="minimal") | 1408 | decoded = soup.decode(formatter="minimal") |
| 1321 | # The < is converted back into < but the e-with-acute is left alone. | 1409 | # The < is converted back into < but the e-with-acute is left alone. |
| 1322 | self.assertEqual( | 1410 | self.assertEqual( |
| 1323 | decoded, | 1411 | decoded, |
| 1324 | self.document_for( | 1412 | self.document_for( |
| 1325 | u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>")) | 1413 | "<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>")) |
| 1326 | 1414 | ||
| 1327 | def test_formatter_null(self): | 1415 | def test_formatter_null(self): |
| 1328 | markup = u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>" | 1416 | markup = "<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>" |
| 1329 | soup = self.soup(markup) | 1417 | soup = self.soup(markup) |
| 1330 | decoded = soup.decode(formatter=None) | 1418 | decoded = soup.decode(formatter=None) |
| 1331 | # Neither the angle brackets nor the e-with-acute are converted. | 1419 | # Neither the angle brackets nor the e-with-acute are converted. |
| 1332 | # This is not valid HTML, but it's what the user wanted. | 1420 | # This is not valid HTML, but it's what the user wanted. |
| 1333 | self.assertEqual(decoded, | 1421 | self.assertEqual(decoded, |
| 1334 | self.document_for(u"<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>")) | 1422 | self.document_for("<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>")) |
| 1335 | 1423 | ||
| 1336 | def test_formatter_custom(self): | 1424 | def test_formatter_custom(self): |
| 1337 | markup = u"<b><foo></b><b>bar</b>" | 1425 | markup = "<b><foo></b><b>bar</b>" |
| 1338 | soup = self.soup(markup) | 1426 | soup = self.soup(markup) |
| 1339 | decoded = soup.decode(formatter = lambda x: x.upper()) | 1427 | decoded = soup.decode(formatter = lambda x: x.upper()) |
| 1340 | # Instead of normal entity conversion code, the custom | 1428 | # Instead of normal entity conversion code, the custom |
| 1341 | # callable is called on every string. | 1429 | # callable is called on every string. |
| 1342 | self.assertEqual( | 1430 | self.assertEqual( |
| 1343 | decoded, | 1431 | decoded, |
| 1344 | self.document_for(u"<b><FOO></b><b>BAR</b>")) | 1432 | self.document_for("<b><FOO></b><b>BAR</b>")) |
| 1345 | 1433 | ||
| 1346 | def test_formatter_is_run_on_attribute_values(self): | 1434 | def test_formatter_is_run_on_attribute_values(self): |
| 1347 | markup = u'<a href="http://a.com?a=b&c=é">e</a>' | 1435 | markup = '<a href="http://a.com?a=b&c=é">e</a>' |
| 1348 | soup = self.soup(markup) | 1436 | soup = self.soup(markup) |
| 1349 | a = soup.a | 1437 | a = soup.a |
| 1350 | 1438 | ||
| 1351 | expect_minimal = u'<a href="http://a.com?a=b&c=é">e</a>' | 1439 | expect_minimal = '<a href="http://a.com?a=b&c=é">e</a>' |
| 1352 | 1440 | ||
| 1353 | self.assertEqual(expect_minimal, a.decode()) | 1441 | self.assertEqual(expect_minimal, a.decode()) |
| 1354 | self.assertEqual(expect_minimal, a.decode(formatter="minimal")) | 1442 | self.assertEqual(expect_minimal, a.decode(formatter="minimal")) |
| 1355 | 1443 | ||
| 1356 | expect_html = u'<a href="http://a.com?a=b&c=é">e</a>' | 1444 | expect_html = '<a href="http://a.com?a=b&c=é">e</a>' |
| 1357 | self.assertEqual(expect_html, a.decode(formatter="html")) | 1445 | self.assertEqual(expect_html, a.decode(formatter="html")) |
| 1358 | 1446 | ||
| 1359 | self.assertEqual(markup, a.decode(formatter=None)) | 1447 | self.assertEqual(markup, a.decode(formatter=None)) |
| 1360 | expect_upper = u'<a href="HTTP://A.COM?A=B&C=É">E</a>' | 1448 | expect_upper = '<a href="HTTP://A.COM?A=B&C=É">E</a>' |
| 1361 | self.assertEqual(expect_upper, a.decode(formatter=lambda x: x.upper())) | 1449 | self.assertEqual(expect_upper, a.decode(formatter=lambda x: x.upper())) |
| 1362 | 1450 | ||
| 1363 | def test_formatter_skips_script_tag_for_html_documents(self): | 1451 | def test_formatter_skips_script_tag_for_html_documents(self): |
| @@ -1366,7 +1454,7 @@ class TestSubstitutions(SoupTest): | |||
| 1366 | console.log("< < hey > > "); | 1454 | console.log("< < hey > > "); |
| 1367 | </script> | 1455 | </script> |
| 1368 | """ | 1456 | """ |
| 1369 | encoded = BeautifulSoup(doc).encode() | 1457 | encoded = BeautifulSoup(doc, 'html.parser').encode() |
| 1370 | self.assertTrue(b"< < hey > >" in encoded) | 1458 | self.assertTrue(b"< < hey > >" in encoded) |
| 1371 | 1459 | ||
| 1372 | def test_formatter_skips_style_tag_for_html_documents(self): | 1460 | def test_formatter_skips_style_tag_for_html_documents(self): |
| @@ -1375,7 +1463,7 @@ class TestSubstitutions(SoupTest): | |||
| 1375 | console.log("< < hey > > "); | 1463 | console.log("< < hey > > "); |
| 1376 | </style> | 1464 | </style> |
| 1377 | """ | 1465 | """ |
| 1378 | encoded = BeautifulSoup(doc).encode() | 1466 | encoded = BeautifulSoup(doc, 'html.parser').encode() |
| 1379 | self.assertTrue(b"< < hey > >" in encoded) | 1467 | self.assertTrue(b"< < hey > >" in encoded) |
| 1380 | 1468 | ||
| 1381 | def test_prettify_leaves_preformatted_text_alone(self): | 1469 | def test_prettify_leaves_preformatted_text_alone(self): |
| @@ -1383,24 +1471,24 @@ class TestSubstitutions(SoupTest): | |||
| 1383 | # Everything outside the <pre> tag is reformatted, but everything | 1471 | # Everything outside the <pre> tag is reformatted, but everything |
| 1384 | # inside is left alone. | 1472 | # inside is left alone. |
| 1385 | self.assertEqual( | 1473 | self.assertEqual( |
| 1386 | u'<div>\n foo\n <pre> \tbar\n \n </pre>\n baz\n</div>', | 1474 | '<div>\n foo\n <pre> \tbar\n \n </pre>\n baz\n</div>', |
| 1387 | soup.div.prettify()) | 1475 | soup.div.prettify()) |
| 1388 | 1476 | ||
| 1389 | def test_prettify_accepts_formatter(self): | 1477 | def test_prettify_accepts_formatter(self): |
| 1390 | soup = BeautifulSoup("<html><body>foo</body></html>") | 1478 | soup = BeautifulSoup("<html><body>foo</body></html>", 'html.parser') |
| 1391 | pretty = soup.prettify(formatter = lambda x: x.upper()) | 1479 | pretty = soup.prettify(formatter = lambda x: x.upper()) |
| 1392 | self.assertTrue("FOO" in pretty) | 1480 | self.assertTrue("FOO" in pretty) |
| 1393 | 1481 | ||
| 1394 | def test_prettify_outputs_unicode_by_default(self): | 1482 | def test_prettify_outputs_unicode_by_default(self): |
| 1395 | soup = self.soup("<a></a>") | 1483 | soup = self.soup("<a></a>") |
| 1396 | self.assertEqual(unicode, type(soup.prettify())) | 1484 | self.assertEqual(str, type(soup.prettify())) |
| 1397 | 1485 | ||
| 1398 | def test_prettify_can_encode_data(self): | 1486 | def test_prettify_can_encode_data(self): |
| 1399 | soup = self.soup("<a></a>") | 1487 | soup = self.soup("<a></a>") |
| 1400 | self.assertEqual(bytes, type(soup.prettify("utf-8"))) | 1488 | self.assertEqual(bytes, type(soup.prettify("utf-8"))) |
| 1401 | 1489 | ||
| 1402 | def test_html_entity_substitution_off_by_default(self): | 1490 | def test_html_entity_substitution_off_by_default(self): |
| 1403 | markup = u"<b>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</b>" | 1491 | markup = "<b>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</b>" |
| 1404 | soup = self.soup(markup) | 1492 | soup = self.soup(markup) |
| 1405 | encoded = soup.b.encode("utf-8") | 1493 | encoded = soup.b.encode("utf-8") |
| 1406 | self.assertEqual(encoded, markup.encode('utf-8')) | 1494 | self.assertEqual(encoded, markup.encode('utf-8')) |
| @@ -1444,45 +1532,53 @@ class TestEncoding(SoupTest): | |||
| 1444 | """Test the ability to encode objects into strings.""" | 1532 | """Test the ability to encode objects into strings.""" |
| 1445 | 1533 | ||
| 1446 | def test_unicode_string_can_be_encoded(self): | 1534 | def test_unicode_string_can_be_encoded(self): |
| 1447 | html = u"<b>\N{SNOWMAN}</b>" | 1535 | html = "<b>\N{SNOWMAN}</b>" |
| 1448 | soup = self.soup(html) | 1536 | soup = self.soup(html) |
| 1449 | self.assertEqual(soup.b.string.encode("utf-8"), | 1537 | self.assertEqual(soup.b.string.encode("utf-8"), |
| 1450 | u"\N{SNOWMAN}".encode("utf-8")) | 1538 | "\N{SNOWMAN}".encode("utf-8")) |
| 1451 | 1539 | ||
| 1452 | def test_tag_containing_unicode_string_can_be_encoded(self): | 1540 | def test_tag_containing_unicode_string_can_be_encoded(self): |
| 1453 | html = u"<b>\N{SNOWMAN}</b>" | 1541 | html = "<b>\N{SNOWMAN}</b>" |
| 1454 | soup = self.soup(html) | 1542 | soup = self.soup(html) |
| 1455 | self.assertEqual( | 1543 | self.assertEqual( |
| 1456 | soup.b.encode("utf-8"), html.encode("utf-8")) | 1544 | soup.b.encode("utf-8"), html.encode("utf-8")) |
| 1457 | 1545 | ||
| 1458 | def test_encoding_substitutes_unrecognized_characters_by_default(self): | 1546 | def test_encoding_substitutes_unrecognized_characters_by_default(self): |
| 1459 | html = u"<b>\N{SNOWMAN}</b>" | 1547 | html = "<b>\N{SNOWMAN}</b>" |
| 1460 | soup = self.soup(html) | 1548 | soup = self.soup(html) |
| 1461 | self.assertEqual(soup.b.encode("ascii"), b"<b>☃</b>") | 1549 | self.assertEqual(soup.b.encode("ascii"), b"<b>☃</b>") |
| 1462 | 1550 | ||
| 1463 | def test_encoding_can_be_made_strict(self): | 1551 | def test_encoding_can_be_made_strict(self): |
| 1464 | html = u"<b>\N{SNOWMAN}</b>" | 1552 | html = "<b>\N{SNOWMAN}</b>" |
| 1465 | soup = self.soup(html) | 1553 | soup = self.soup(html) |
| 1466 | self.assertRaises( | 1554 | self.assertRaises( |
| 1467 | UnicodeEncodeError, soup.encode, "ascii", errors="strict") | 1555 | UnicodeEncodeError, soup.encode, "ascii", errors="strict") |
| 1468 | 1556 | ||
| 1469 | def test_decode_contents(self): | 1557 | def test_decode_contents(self): |
| 1470 | html = u"<b>\N{SNOWMAN}</b>" | 1558 | html = "<b>\N{SNOWMAN}</b>" |
| 1471 | soup = self.soup(html) | 1559 | soup = self.soup(html) |
| 1472 | self.assertEqual(u"\N{SNOWMAN}", soup.b.decode_contents()) | 1560 | self.assertEqual("\N{SNOWMAN}", soup.b.decode_contents()) |
| 1473 | 1561 | ||
| 1474 | def test_encode_contents(self): | 1562 | def test_encode_contents(self): |
| 1475 | html = u"<b>\N{SNOWMAN}</b>" | 1563 | html = "<b>\N{SNOWMAN}</b>" |
| 1476 | soup = self.soup(html) | 1564 | soup = self.soup(html) |
| 1477 | self.assertEqual( | 1565 | self.assertEqual( |
| 1478 | u"\N{SNOWMAN}".encode("utf8"), soup.b.encode_contents( | 1566 | "\N{SNOWMAN}".encode("utf8"), soup.b.encode_contents( |
| 1479 | encoding="utf8")) | 1567 | encoding="utf8")) |
| 1480 | 1568 | ||
| 1481 | def test_deprecated_renderContents(self): | 1569 | def test_deprecated_renderContents(self): |
| 1482 | html = u"<b>\N{SNOWMAN}</b>" | 1570 | html = "<b>\N{SNOWMAN}</b>" |
| 1483 | soup = self.soup(html) | 1571 | soup = self.soup(html) |
| 1484 | self.assertEqual( | 1572 | self.assertEqual( |
| 1485 | u"\N{SNOWMAN}".encode("utf8"), soup.b.renderContents()) | 1573 | "\N{SNOWMAN}".encode("utf8"), soup.b.renderContents()) |
| 1574 | |||
| 1575 | def test_repr(self): | ||
| 1576 | html = "<b>\N{SNOWMAN}</b>" | ||
| 1577 | soup = self.soup(html) | ||
| 1578 | if PY3K: | ||
| 1579 | self.assertEqual(html, repr(soup)) | ||
| 1580 | else: | ||
| 1581 | self.assertEqual(b'<b>\\u2603</b>', repr(soup)) | ||
| 1486 | 1582 | ||
| 1487 | class TestNavigableStringSubclasses(SoupTest): | 1583 | class TestNavigableStringSubclasses(SoupTest): |
| 1488 | 1584 | ||
| @@ -1522,6 +1618,9 @@ class TestNavigableStringSubclasses(SoupTest): | |||
| 1522 | soup.insert(1, doctype) | 1618 | soup.insert(1, doctype) |
| 1523 | self.assertEqual(soup.encode(), b"<!DOCTYPE foo>\n") | 1619 | self.assertEqual(soup.encode(), b"<!DOCTYPE foo>\n") |
| 1524 | 1620 | ||
| 1621 | def test_declaration(self): | ||
| 1622 | d = Declaration("foo") | ||
| 1623 | self.assertEqual("<?foo?>", d.output_ready()) | ||
| 1525 | 1624 | ||
| 1526 | class TestSoupSelector(TreeTest): | 1625 | class TestSoupSelector(TreeTest): |
| 1527 | 1626 | ||
| @@ -1534,7 +1633,7 @@ class TestSoupSelector(TreeTest): | |||
| 1534 | <link rel="stylesheet" href="blah.css" type="text/css" id="l1"> | 1633 | <link rel="stylesheet" href="blah.css" type="text/css" id="l1"> |
| 1535 | </head> | 1634 | </head> |
| 1536 | <body> | 1635 | <body> |
| 1537 | 1636 | <custom-dashed-tag class="dashed" id="dash1">Hello there.</custom-dashed-tag> | |
| 1538 | <div id="main" class="fancy"> | 1637 | <div id="main" class="fancy"> |
| 1539 | <div id="inner"> | 1638 | <div id="inner"> |
| 1540 | <h1 id="header1">An H1</h1> | 1639 | <h1 id="header1">An H1</h1> |
| @@ -1552,8 +1651,18 @@ class TestSoupSelector(TreeTest): | |||
| 1552 | <a href="#" id="s2a1">span2a1</a> | 1651 | <a href="#" id="s2a1">span2a1</a> |
| 1553 | </span> | 1652 | </span> |
| 1554 | <span class="span3"></span> | 1653 | <span class="span3"></span> |
| 1654 | <custom-dashed-tag class="dashed" id="dash2"/> | ||
| 1655 | <div data-tag="dashedvalue" id="data1"/> | ||
| 1555 | </span> | 1656 | </span> |
| 1556 | </div> | 1657 | </div> |
| 1658 | <x id="xid"> | ||
| 1659 | <z id="zida"/> | ||
| 1660 | <z id="zidab"/> | ||
| 1661 | <z id="zidac"/> | ||
| 1662 | </x> | ||
| 1663 | <y id="yid"> | ||
| 1664 | <z id="zidb"/> | ||
| 1665 | </y> | ||
| 1557 | <p lang="en" id="lang-en">English</p> | 1666 | <p lang="en" id="lang-en">English</p> |
| 1558 | <p lang="en-gb" id="lang-en-gb">English UK</p> | 1667 | <p lang="en-gb" id="lang-en-gb">English UK</p> |
| 1559 | <p lang="en-us" id="lang-en-us">English US</p> | 1668 | <p lang="en-us" id="lang-en-us">English US</p> |
| @@ -1565,7 +1674,7 @@ class TestSoupSelector(TreeTest): | |||
| 1565 | """ | 1674 | """ |
| 1566 | 1675 | ||
| 1567 | def setUp(self): | 1676 | def setUp(self): |
| 1568 | self.soup = BeautifulSoup(self.HTML) | 1677 | self.soup = BeautifulSoup(self.HTML, 'html.parser') |
| 1569 | 1678 | ||
| 1570 | def assertSelects(self, selector, expected_ids): | 1679 | def assertSelects(self, selector, expected_ids): |
| 1571 | el_ids = [el['id'] for el in self.soup.select(selector)] | 1680 | el_ids = [el['id'] for el in self.soup.select(selector)] |
| @@ -1587,21 +1696,29 @@ class TestSoupSelector(TreeTest): | |||
| 1587 | els = self.soup.select('title') | 1696 | els = self.soup.select('title') |
| 1588 | self.assertEqual(len(els), 1) | 1697 | self.assertEqual(len(els), 1) |
| 1589 | self.assertEqual(els[0].name, 'title') | 1698 | self.assertEqual(els[0].name, 'title') |
| 1590 | self.assertEqual(els[0].contents, [u'The title']) | 1699 | self.assertEqual(els[0].contents, ['The title']) |
| 1591 | 1700 | ||
| 1592 | def test_one_tag_many(self): | 1701 | def test_one_tag_many(self): |
| 1593 | els = self.soup.select('div') | 1702 | els = self.soup.select('div') |
| 1594 | self.assertEqual(len(els), 3) | 1703 | self.assertEqual(len(els), 4) |
| 1595 | for div in els: | 1704 | for div in els: |
| 1596 | self.assertEqual(div.name, 'div') | 1705 | self.assertEqual(div.name, 'div') |
| 1597 | 1706 | ||
| 1707 | el = self.soup.select_one('div') | ||
| 1708 | self.assertEqual('main', el['id']) | ||
| 1709 | |||
| 1710 | def test_select_one_returns_none_if_no_match(self): | ||
| 1711 | match = self.soup.select_one('nonexistenttag') | ||
| 1712 | self.assertEqual(None, match) | ||
| 1713 | |||
| 1714 | |||
| 1598 | def test_tag_in_tag_one(self): | 1715 | def test_tag_in_tag_one(self): |
| 1599 | els = self.soup.select('div div') | 1716 | els = self.soup.select('div div') |
| 1600 | self.assertSelects('div div', ['inner']) | 1717 | self.assertSelects('div div', ['inner', 'data1']) |
| 1601 | 1718 | ||
| 1602 | def test_tag_in_tag_many(self): | 1719 | def test_tag_in_tag_many(self): |
| 1603 | for selector in ('html div', 'html body div', 'body div'): | 1720 | for selector in ('html div', 'html body div', 'body div'): |
| 1604 | self.assertSelects(selector, ['main', 'inner', 'footer']) | 1721 | self.assertSelects(selector, ['data1', 'main', 'inner', 'footer']) |
| 1605 | 1722 | ||
| 1606 | def test_tag_no_match(self): | 1723 | def test_tag_no_match(self): |
| 1607 | self.assertEqual(len(self.soup.select('del')), 0) | 1724 | self.assertEqual(len(self.soup.select('del')), 0) |
| @@ -1609,6 +1726,20 @@ class TestSoupSelector(TreeTest): | |||
| 1609 | def test_invalid_tag(self): | 1726 | def test_invalid_tag(self): |
| 1610 | self.assertRaises(ValueError, self.soup.select, 'tag%t') | 1727 | self.assertRaises(ValueError, self.soup.select, 'tag%t') |
| 1611 | 1728 | ||
| 1729 | def test_select_dashed_tag_ids(self): | ||
| 1730 | self.assertSelects('custom-dashed-tag', ['dash1', 'dash2']) | ||
| 1731 | |||
| 1732 | def test_select_dashed_by_id(self): | ||
| 1733 | dashed = self.soup.select('custom-dashed-tag[id=\"dash2\"]') | ||
| 1734 | self.assertEqual(dashed[0].name, 'custom-dashed-tag') | ||
| 1735 | self.assertEqual(dashed[0]['id'], 'dash2') | ||
| 1736 | |||
| 1737 | def test_dashed_tag_text(self): | ||
| 1738 | self.assertEqual(self.soup.select('body > custom-dashed-tag')[0].text, 'Hello there.') | ||
| 1739 | |||
| 1740 | def test_select_dashed_matches_find_all(self): | ||
| 1741 | self.assertEqual(self.soup.select('custom-dashed-tag'), self.soup.find_all('custom-dashed-tag')) | ||
| 1742 | |||
| 1612 | def test_header_tags(self): | 1743 | def test_header_tags(self): |
| 1613 | self.assertSelectMultiple( | 1744 | self.assertSelectMultiple( |
| 1614 | ('h1', ['header1']), | 1745 | ('h1', ['header1']), |
| @@ -1709,6 +1840,7 @@ class TestSoupSelector(TreeTest): | |||
| 1709 | ('[id^="m"]', ['me', 'main']), | 1840 | ('[id^="m"]', ['me', 'main']), |
| 1710 | ('div[id^="m"]', ['main']), | 1841 | ('div[id^="m"]', ['main']), |
| 1711 | ('a[id^="m"]', ['me']), | 1842 | ('a[id^="m"]', ['me']), |
| 1843 | ('div[data-tag^="dashed"]', ['data1']) | ||
| 1712 | ) | 1844 | ) |
| 1713 | 1845 | ||
| 1714 | def test_attribute_endswith(self): | 1846 | def test_attribute_endswith(self): |
| @@ -1716,8 +1848,8 @@ class TestSoupSelector(TreeTest): | |||
| 1716 | ('[href$=".css"]', ['l1']), | 1848 | ('[href$=".css"]', ['l1']), |
| 1717 | ('link[href$=".css"]', ['l1']), | 1849 | ('link[href$=".css"]', ['l1']), |
| 1718 | ('link[id$="1"]', ['l1']), | 1850 | ('link[id$="1"]', ['l1']), |
| 1719 | ('[id$="1"]', ['l1', 'p1', 'header1', 's1a1', 's2a1', 's1a2s1']), | 1851 | ('[id$="1"]', ['data1', 'l1', 'p1', 'header1', 's1a1', 's2a1', 's1a2s1', 'dash1']), |
| 1720 | ('div[id$="1"]', []), | 1852 | ('div[id$="1"]', ['data1']), |
| 1721 | ('[id$="noending"]', []), | 1853 | ('[id$="noending"]', []), |
| 1722 | ) | 1854 | ) |
| 1723 | 1855 | ||
| @@ -1730,7 +1862,6 @@ class TestSoupSelector(TreeTest): | |||
| 1730 | ('[rel*="notstyle"]', []), | 1862 | ('[rel*="notstyle"]', []), |
| 1731 | ('link[rel*="notstyle"]', []), | 1863 | ('link[rel*="notstyle"]', []), |
| 1732 | ('link[href*="bla"]', ['l1']), | 1864 | ('link[href*="bla"]', ['l1']), |
| 1733 | ('a[href*="http://"]', ['bob', 'me']), | ||
| 1734 | ('[href*="http://"]', ['bob', 'me']), | 1865 | ('[href*="http://"]', ['bob', 'me']), |
| 1735 | ('[id*="p"]', ['pmulti', 'p1']), | 1866 | ('[id*="p"]', ['pmulti', 'p1']), |
| 1736 | ('div[id*="m"]', ['main']), | 1867 | ('div[id*="m"]', ['main']), |
| @@ -1739,8 +1870,8 @@ class TestSoupSelector(TreeTest): | |||
| 1739 | ('[href*=".css"]', ['l1']), | 1870 | ('[href*=".css"]', ['l1']), |
| 1740 | ('link[href*=".css"]', ['l1']), | 1871 | ('link[href*=".css"]', ['l1']), |
| 1741 | ('link[id*="1"]', ['l1']), | 1872 | ('link[id*="1"]', ['l1']), |
| 1742 | ('[id*="1"]', ['l1', 'p1', 'header1', 's1a1', 's1a2', 's2a1', 's1a2s1']), | 1873 | ('[id*="1"]', ['data1', 'l1', 'p1', 'header1', 's1a1', 's1a2', 's2a1', 's1a2s1', 'dash1']), |
| 1743 | ('div[id*="1"]', []), | 1874 | ('div[id*="1"]', ['data1']), |
| 1744 | ('[id*="noending"]', []), | 1875 | ('[id*="noending"]', []), |
| 1745 | # New for this test | 1876 | # New for this test |
| 1746 | ('[href*="."]', ['bob', 'me', 'l1']), | 1877 | ('[href*="."]', ['bob', 'me', 'l1']), |
| @@ -1748,6 +1879,7 @@ class TestSoupSelector(TreeTest): | |||
| 1748 | ('link[href*="."]', ['l1']), | 1879 | ('link[href*="."]', ['l1']), |
| 1749 | ('div[id*="n"]', ['main', 'inner']), | 1880 | ('div[id*="n"]', ['main', 'inner']), |
| 1750 | ('div[id*="nn"]', ['inner']), | 1881 | ('div[id*="nn"]', ['inner']), |
| 1882 | ('div[data-tag*="edval"]', ['data1']) | ||
| 1751 | ) | 1883 | ) |
| 1752 | 1884 | ||
| 1753 | def test_attribute_exact_or_hypen(self): | 1885 | def test_attribute_exact_or_hypen(self): |
| @@ -1767,18 +1899,27 @@ class TestSoupSelector(TreeTest): | |||
| 1767 | ('p[class]', ['p1', 'pmulti']), | 1899 | ('p[class]', ['p1', 'pmulti']), |
| 1768 | ('[blah]', []), | 1900 | ('[blah]', []), |
| 1769 | ('p[blah]', []), | 1901 | ('p[blah]', []), |
| 1902 | ('div[data-tag]', ['data1']) | ||
| 1770 | ) | 1903 | ) |
| 1771 | 1904 | ||
| 1905 | def test_unsupported_pseudoclass(self): | ||
| 1906 | self.assertRaises( | ||
| 1907 | NotImplementedError, self.soup.select, "a:no-such-pseudoclass") | ||
| 1908 | |||
| 1909 | self.assertRaises( | ||
| 1910 | NotImplementedError, self.soup.select, "a:nth-of-type(a)") | ||
| 1911 | |||
| 1912 | |||
| 1772 | def test_nth_of_type(self): | 1913 | def test_nth_of_type(self): |
| 1773 | # Try to select first paragraph | 1914 | # Try to select first paragraph |
| 1774 | els = self.soup.select('div#inner p:nth-of-type(1)') | 1915 | els = self.soup.select('div#inner p:nth-of-type(1)') |
| 1775 | self.assertEqual(len(els), 1) | 1916 | self.assertEqual(len(els), 1) |
| 1776 | self.assertEqual(els[0].string, u'Some text') | 1917 | self.assertEqual(els[0].string, 'Some text') |
| 1777 | 1918 | ||
| 1778 | # Try to select third paragraph | 1919 | # Try to select third paragraph |
| 1779 | els = self.soup.select('div#inner p:nth-of-type(3)') | 1920 | els = self.soup.select('div#inner p:nth-of-type(3)') |
| 1780 | self.assertEqual(len(els), 1) | 1921 | self.assertEqual(len(els), 1) |
| 1781 | self.assertEqual(els[0].string, u'Another') | 1922 | self.assertEqual(els[0].string, 'Another') |
| 1782 | 1923 | ||
| 1783 | # Try to select (non-existent!) fourth paragraph | 1924 | # Try to select (non-existent!) fourth paragraph |
| 1784 | els = self.soup.select('div#inner p:nth-of-type(4)') | 1925 | els = self.soup.select('div#inner p:nth-of-type(4)') |
| @@ -1791,7 +1932,7 @@ class TestSoupSelector(TreeTest): | |||
| 1791 | def test_nth_of_type_direct_descendant(self): | 1932 | def test_nth_of_type_direct_descendant(self): |
| 1792 | els = self.soup.select('div#inner > p:nth-of-type(1)') | 1933 | els = self.soup.select('div#inner > p:nth-of-type(1)') |
| 1793 | self.assertEqual(len(els), 1) | 1934 | self.assertEqual(len(els), 1) |
| 1794 | self.assertEqual(els[0].string, u'Some text') | 1935 | self.assertEqual(els[0].string, 'Some text') |
| 1795 | 1936 | ||
| 1796 | def test_id_child_selector_nth_of_type(self): | 1937 | def test_id_child_selector_nth_of_type(self): |
| 1797 | self.assertSelects('#inner > p:nth-of-type(2)', ['p1']) | 1938 | self.assertSelects('#inner > p:nth-of-type(2)', ['p1']) |
| @@ -1803,7 +1944,7 @@ class TestSoupSelector(TreeTest): | |||
| 1803 | selected = inner.select("div") | 1944 | selected = inner.select("div") |
| 1804 | # The <div id="inner"> tag was selected. The <div id="footer"> | 1945 | # The <div id="inner"> tag was selected. The <div id="footer"> |
| 1805 | # tag was not. | 1946 | # tag was not. |
| 1806 | self.assertSelectsIDs(selected, ['inner']) | 1947 | self.assertSelectsIDs(selected, ['inner', 'data1']) |
| 1807 | 1948 | ||
| 1808 | def test_overspecified_child_id(self): | 1949 | def test_overspecified_child_id(self): |
| 1809 | self.assertSelects(".fancy #inner", ['inner']) | 1950 | self.assertSelects(".fancy #inner", ['inner']) |
| @@ -1827,3 +1968,44 @@ class TestSoupSelector(TreeTest): | |||
| 1827 | 1968 | ||
| 1828 | def test_sibling_combinator_wont_select_same_tag_twice(self): | 1969 | def test_sibling_combinator_wont_select_same_tag_twice(self): |
| 1829 | self.assertSelects('p[lang] ~ p', ['lang-en-gb', 'lang-en-us', 'lang-fr']) | 1970 | self.assertSelects('p[lang] ~ p', ['lang-en-gb', 'lang-en-us', 'lang-fr']) |
| 1971 | |||
| 1972 | # Test the selector grouping operator (the comma) | ||
| 1973 | def test_multiple_select(self): | ||
| 1974 | self.assertSelects('x, y', ['xid', 'yid']) | ||
| 1975 | |||
| 1976 | def test_multiple_select_with_no_space(self): | ||
| 1977 | self.assertSelects('x,y', ['xid', 'yid']) | ||
| 1978 | |||
| 1979 | def test_multiple_select_with_more_space(self): | ||
| 1980 | self.assertSelects('x, y', ['xid', 'yid']) | ||
| 1981 | |||
| 1982 | def test_multiple_select_duplicated(self): | ||
| 1983 | self.assertSelects('x, x', ['xid']) | ||
| 1984 | |||
| 1985 | def test_multiple_select_sibling(self): | ||
| 1986 | self.assertSelects('x, y ~ p[lang=fr]', ['xid', 'lang-fr']) | ||
| 1987 | |||
| 1988 | def test_multiple_select_tag_and_direct_descendant(self): | ||
| 1989 | self.assertSelects('x, y > z', ['xid', 'zidb']) | ||
| 1990 | |||
| 1991 | def test_multiple_select_direct_descendant_and_tags(self): | ||
| 1992 | self.assertSelects('div > x, y, z', ['xid', 'yid', 'zida', 'zidb', 'zidab', 'zidac']) | ||
| 1993 | |||
| 1994 | def test_multiple_select_indirect_descendant(self): | ||
| 1995 | self.assertSelects('div x,y, z', ['xid', 'yid', 'zida', 'zidb', 'zidab', 'zidac']) | ||
| 1996 | |||
| 1997 | def test_invalid_multiple_select(self): | ||
| 1998 | self.assertRaises(ValueError, self.soup.select, ',x, y') | ||
| 1999 | self.assertRaises(ValueError, self.soup.select, 'x,,y') | ||
| 2000 | |||
| 2001 | def test_multiple_select_attrs(self): | ||
| 2002 | self.assertSelects('p[lang=en], p[lang=en-gb]', ['lang-en', 'lang-en-gb']) | ||
| 2003 | |||
| 2004 | def test_multiple_select_ids(self): | ||
| 2005 | self.assertSelects('x, y > z[id=zida], z[id=zidab], z[id=zidb]', ['xid', 'zidb', 'zidab']) | ||
| 2006 | |||
| 2007 | def test_multiple_select_nested(self): | ||
| 2008 | self.assertSelects('body > div > x, y > z', ['xid', 'zidb']) | ||
| 2009 | |||
| 2010 | |||
| 2011 | |||
