summaryrefslogtreecommitdiffstats
path: root/bitbake/lib/bs4/tests
diff options
context:
space:
mode:
Diffstat (limited to 'bitbake/lib/bs4/tests')
-rw-r--r--bitbake/lib/bs4/tests/__init__.py1
-rw-r--r--bitbake/lib/bs4/tests/test_builder_registry.py147
-rw-r--r--bitbake/lib/bs4/tests/test_docs.py32
-rw-r--r--bitbake/lib/bs4/tests/test_html5lib.py98
-rw-r--r--bitbake/lib/bs4/tests/test_htmlparser.py31
-rw-r--r--bitbake/lib/bs4/tests/test_lxml.py70
-rw-r--r--bitbake/lib/bs4/tests/test_soup.py479
-rw-r--r--bitbake/lib/bs4/tests/test_tree.py2004
8 files changed, 0 insertions, 2862 deletions
diff --git a/bitbake/lib/bs4/tests/__init__.py b/bitbake/lib/bs4/tests/__init__.py
deleted file mode 100644
index 142c8cc3f1..0000000000
--- a/bitbake/lib/bs4/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
1"The beautifulsoup tests."
diff --git a/bitbake/lib/bs4/tests/test_builder_registry.py b/bitbake/lib/bs4/tests/test_builder_registry.py
deleted file mode 100644
index 90cad82933..0000000000
--- a/bitbake/lib/bs4/tests/test_builder_registry.py
+++ /dev/null
@@ -1,147 +0,0 @@
1"""Tests of the builder registry."""
2
3import unittest
4import warnings
5
6from bs4 import BeautifulSoup
7from bs4.builder import (
8 builder_registry as registry,
9 HTMLParserTreeBuilder,
10 TreeBuilderRegistry,
11)
12
13try:
14 from bs4.builder import HTML5TreeBuilder
15 HTML5LIB_PRESENT = True
16except ImportError:
17 HTML5LIB_PRESENT = False
18
19try:
20 from bs4.builder import (
21 LXMLTreeBuilderForXML,
22 LXMLTreeBuilder,
23 )
24 LXML_PRESENT = True
25except ImportError:
26 LXML_PRESENT = False
27
28
29class BuiltInRegistryTest(unittest.TestCase):
30 """Test the built-in registry with the default builders registered."""
31
32 def test_combination(self):
33 if LXML_PRESENT:
34 self.assertEqual(registry.lookup('fast', 'html'),
35 LXMLTreeBuilder)
36
37 if LXML_PRESENT:
38 self.assertEqual(registry.lookup('permissive', 'xml'),
39 LXMLTreeBuilderForXML)
40 self.assertEqual(registry.lookup('strict', 'html'),
41 HTMLParserTreeBuilder)
42 if HTML5LIB_PRESENT:
43 self.assertEqual(registry.lookup('html5lib', 'html'),
44 HTML5TreeBuilder)
45
46 def test_lookup_by_markup_type(self):
47 if LXML_PRESENT:
48 self.assertEqual(registry.lookup('html'), LXMLTreeBuilder)
49 self.assertEqual(registry.lookup('xml'), LXMLTreeBuilderForXML)
50 else:
51 self.assertEqual(registry.lookup('xml'), None)
52 if HTML5LIB_PRESENT:
53 self.assertEqual(registry.lookup('html'), HTML5TreeBuilder)
54 else:
55 self.assertEqual(registry.lookup('html'), HTMLParserTreeBuilder)
56
57 def test_named_library(self):
58 if LXML_PRESENT:
59 self.assertEqual(registry.lookup('lxml', 'xml'),
60 LXMLTreeBuilderForXML)
61 self.assertEqual(registry.lookup('lxml', 'html'),
62 LXMLTreeBuilder)
63 if HTML5LIB_PRESENT:
64 self.assertEqual(registry.lookup('html5lib'),
65 HTML5TreeBuilder)
66
67 self.assertEqual(registry.lookup('html.parser'),
68 HTMLParserTreeBuilder)
69
70 def test_beautifulsoup_constructor_does_lookup(self):
71
72 with warnings.catch_warnings(record=True) as w:
73 # This will create a warning about not explicitly
74 # specifying a parser, but we'll ignore it.
75
76 # You can pass in a string.
77 BeautifulSoup("", features="html")
78 # Or a list of strings.
79 BeautifulSoup("", features=["html", "fast"])
80
81 # You'll get an exception if BS can't find an appropriate
82 # builder.
83 self.assertRaises(ValueError, BeautifulSoup,
84 "", features="no-such-feature")
85
86class RegistryTest(unittest.TestCase):
87 """Test the TreeBuilderRegistry class in general."""
88
89 def setUp(self):
90 self.registry = TreeBuilderRegistry()
91
92 def builder_for_features(self, *feature_list):
93 cls = type('Builder_' + '_'.join(feature_list),
94 (object,), {'features' : feature_list})
95
96 self.registry.register(cls)
97 return cls
98
99 def test_register_with_no_features(self):
100 builder = self.builder_for_features()
101
102 # Since the builder advertises no features, you can't find it
103 # by looking up features.
104 self.assertEqual(self.registry.lookup('foo'), None)
105
106 # But you can find it by doing a lookup with no features, if
107 # this happens to be the only registered builder.
108 self.assertEqual(self.registry.lookup(), builder)
109
110 def test_register_with_features_makes_lookup_succeed(self):
111 builder = self.builder_for_features('foo', 'bar')
112 self.assertEqual(self.registry.lookup('foo'), builder)
113 self.assertEqual(self.registry.lookup('bar'), builder)
114
115 def test_lookup_fails_when_no_builder_implements_feature(self):
116 builder = self.builder_for_features('foo', 'bar')
117 self.assertEqual(self.registry.lookup('baz'), None)
118
119 def test_lookup_gets_most_recent_registration_when_no_feature_specified(self):
120 builder1 = self.builder_for_features('foo')
121 builder2 = self.builder_for_features('bar')
122 self.assertEqual(self.registry.lookup(), builder2)
123
124 def test_lookup_fails_when_no_tree_builders_registered(self):
125 self.assertEqual(self.registry.lookup(), None)
126
127 def test_lookup_gets_most_recent_builder_supporting_all_features(self):
128 has_one = self.builder_for_features('foo')
129 has_the_other = self.builder_for_features('bar')
130 has_both_early = self.builder_for_features('foo', 'bar', 'baz')
131 has_both_late = self.builder_for_features('foo', 'bar', 'quux')
132 lacks_one = self.builder_for_features('bar')
133 has_the_other = self.builder_for_features('foo')
134
135 # There are two builders featuring 'foo' and 'bar', but
136 # the one that also features 'quux' was registered later.
137 self.assertEqual(self.registry.lookup('foo', 'bar'),
138 has_both_late)
139
140 # There is only one builder featuring 'foo', 'bar', and 'baz'.
141 self.assertEqual(self.registry.lookup('foo', 'bar', 'baz'),
142 has_both_early)
143
144 def test_lookup_fails_when_cannot_reconcile_requested_features(self):
145 builder1 = self.builder_for_features('foo', 'bar')
146 builder2 = self.builder_for_features('foo', 'baz')
147 self.assertEqual(self.registry.lookup('bar', 'baz'), None)
diff --git a/bitbake/lib/bs4/tests/test_docs.py b/bitbake/lib/bs4/tests/test_docs.py
deleted file mode 100644
index d1d76a33bf..0000000000
--- a/bitbake/lib/bs4/tests/test_docs.py
+++ /dev/null
@@ -1,32 +0,0 @@
1"Test harness for doctests."
2
3# pylint: disable-msg=E0611,W0142
4
5__metaclass__ = type
6__all__ = [
7 'additional_tests',
8 ]
9
10import doctest
11#from pkg_resources import (
12# resource_filename, resource_exists, resource_listdir, cleanup_resources)
13
14DOCTEST_FLAGS = (
15 doctest.ELLIPSIS |
16 doctest.NORMALIZE_WHITESPACE |
17 doctest.REPORT_NDIFF)
18
19# def additional_tests():
20# "Run the doc tests (README.txt and docs/*, if any exist)"
21# doctest_files = [
22# os.path.abspath(resource_filename('bs4', 'README.txt'))]
23# if resource_exists('bs4', 'docs'):
24# for name in resource_listdir('bs4', 'docs'):
25# if name.endswith('.txt'):
26# doctest_files.append(
27# os.path.abspath(
28# resource_filename('bs4', 'docs/%s' % name)))
29# kwargs = dict(module_relative=False, optionflags=DOCTEST_FLAGS)
30# atexit.register(cleanup_resources)
31# return unittest.TestSuite((
32# doctest.DocFileSuite(*doctest_files, **kwargs)))
diff --git a/bitbake/lib/bs4/tests/test_html5lib.py b/bitbake/lib/bs4/tests/test_html5lib.py
deleted file mode 100644
index a7494ca5ba..0000000000
--- a/bitbake/lib/bs4/tests/test_html5lib.py
+++ /dev/null
@@ -1,98 +0,0 @@
1"""Tests to ensure that the html5lib tree builder generates good trees."""
2
3import warnings
4
5try:
6 from bs4.builder import HTML5TreeBuilder
7 HTML5LIB_PRESENT = True
8except ImportError as e:
9 HTML5LIB_PRESENT = False
10from bs4.element import SoupStrainer
11from bs4.testing import (
12 HTML5TreeBuilderSmokeTest,
13 SoupTest,
14 skipIf,
15)
16
17@skipIf(
18 not HTML5LIB_PRESENT,
19 "html5lib seems not to be present, not testing its tree builder.")
20class HTML5LibBuilderSmokeTest(SoupTest, HTML5TreeBuilderSmokeTest):
21 """See ``HTML5TreeBuilderSmokeTest``."""
22
23 @property
24 def default_builder(self):
25 return HTML5TreeBuilder()
26
27 def test_soupstrainer(self):
28 # The html5lib tree builder does not support SoupStrainers.
29 strainer = SoupStrainer("b")
30 markup = "<p>A <b>bold</b> statement.</p>"
31 with warnings.catch_warnings(record=True) as w:
32 soup = self.soup(markup, parse_only=strainer)
33 self.assertEqual(
34 soup.decode(), self.document_for(markup))
35
36 self.assertTrue(
37 "the html5lib tree builder doesn't support parse_only" in
38 str(w[0].message))
39
40 def test_correctly_nested_tables(self):
41 """html5lib inserts <tbody> tags where other parsers don't."""
42 markup = ('<table id="1">'
43 '<tr>'
44 "<td>Here's another table:"
45 '<table id="2">'
46 '<tr><td>foo</td></tr>'
47 '</table></td>')
48
49 self.assertSoupEquals(
50 markup,
51 '<table id="1"><tbody><tr><td>Here\'s another table:'
52 '<table id="2"><tbody><tr><td>foo</td></tr></tbody></table>'
53 '</td></tr></tbody></table>')
54
55 self.assertSoupEquals(
56 "<table><thead><tr><td>Foo</td></tr></thead>"
57 "<tbody><tr><td>Bar</td></tr></tbody>"
58 "<tfoot><tr><td>Baz</td></tr></tfoot></table>")
59
60 def test_xml_declaration_followed_by_doctype(self):
61 markup = '''<?xml version="1.0" encoding="utf-8"?>
62<!DOCTYPE html>
63<html>
64 <head>
65 </head>
66 <body>
67 <p>foo</p>
68 </body>
69</html>'''
70 soup = self.soup(markup)
71 # Verify that we can reach the <p> tag; this means the tree is connected.
72 self.assertEqual(b"<p>foo</p>", soup.p.encode())
73
74 def test_reparented_markup(self):
75 markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>'
76 soup = self.soup(markup)
77 self.assertEqual("<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p></body>", soup.body.decode())
78 self.assertEqual(2, len(soup.find_all('p')))
79
80
81 def test_reparented_markup_ends_with_whitespace(self):
82 markup = '<p><em>foo</p>\n<p>bar<a></a></em></p>\n'
83 soup = self.soup(markup)
84 self.assertEqual("<body><p><em>foo</em></p><em>\n</em><p><em>bar<a></a></em></p>\n</body>", soup.body.decode())
85 self.assertEqual(2, len(soup.find_all('p')))
86
87 def test_processing_instruction(self):
88 """Processing instructions become comments."""
89 markup = b"""<?PITarget PIContent?>"""
90 soup = self.soup(markup)
91 assert str(soup).startswith("<!--?PITarget PIContent?-->")
92
93 def test_cloned_multivalue_node(self):
94 markup = b"""<a class="my_class"><p></a>"""
95 soup = self.soup(markup)
96 a1, a2 = soup.find_all('a')
97 self.assertEqual(a1, a2)
98 assert a1 is not a2
diff --git a/bitbake/lib/bs4/tests/test_htmlparser.py b/bitbake/lib/bs4/tests/test_htmlparser.py
deleted file mode 100644
index 30a25e6709..0000000000
--- a/bitbake/lib/bs4/tests/test_htmlparser.py
+++ /dev/null
@@ -1,31 +0,0 @@
1"""Tests to ensure that the html.parser tree builder generates good
2trees."""
3
4import pickle
5from bs4.testing import SoupTest, HTMLTreeBuilderSmokeTest
6from bs4.builder import HTMLParserTreeBuilder
7
8class HTMLParserTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
9
10 @property
11 def default_builder(self):
12 return HTMLParserTreeBuilder()
13
14 def test_namespaced_system_doctype(self):
15 # html.parser can't handle namespaced doctypes, so skip this one.
16 pass
17
18 def test_namespaced_public_doctype(self):
19 # html.parser can't handle namespaced doctypes, so skip this one.
20 pass
21
22 def test_builder_is_pickled(self):
23 """Unlike most tree builders, HTMLParserTreeBuilder and will
24 be restored after pickling.
25 """
26 tree = self.soup("<a><b>foo</a>")
27 dumped = pickle.dumps(tree, 2)
28 loaded = pickle.loads(dumped)
29 self.assertTrue(isinstance(loaded.builder, type(tree.builder)))
30
31
diff --git a/bitbake/lib/bs4/tests/test_lxml.py b/bitbake/lib/bs4/tests/test_lxml.py
deleted file mode 100644
index 6b6cdd07cb..0000000000
--- a/bitbake/lib/bs4/tests/test_lxml.py
+++ /dev/null
@@ -1,70 +0,0 @@
1"""Tests to ensure that the lxml tree builder generates good trees."""
2
3import warnings
4
5try:
6 import lxml.etree
7 LXML_PRESENT = True
8 LXML_VERSION = lxml.etree.LXML_VERSION
9except ImportError as e:
10 LXML_PRESENT = False
11 LXML_VERSION = (0,)
12
13if LXML_PRESENT:
14 from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
15
16from bs4 import BeautifulStoneSoup
17from bs4.testing import skipIf
18from bs4.testing import (
19 HTMLTreeBuilderSmokeTest,
20 XMLTreeBuilderSmokeTest,
21 SoupTest,
22 skipIf,
23)
24
25@skipIf(
26 not LXML_PRESENT,
27 "lxml seems not to be present, not testing its tree builder.")
28class LXMLTreeBuilderSmokeTest(SoupTest, HTMLTreeBuilderSmokeTest):
29 """See ``HTMLTreeBuilderSmokeTest``."""
30
31 @property
32 def default_builder(self):
33 return LXMLTreeBuilder()
34
35 def test_out_of_range_entity(self):
36 self.assertSoupEquals(
37 "<p>foo&#10000000000000;bar</p>", "<p>foobar</p>")
38 self.assertSoupEquals(
39 "<p>foo&#x10000000000000;bar</p>", "<p>foobar</p>")
40 self.assertSoupEquals(
41 "<p>foo&#1000000000;bar</p>", "<p>foobar</p>")
42
43 # In lxml < 2.3.5, an empty doctype causes a segfault. Skip this
44 # test if an old version of lxml is installed.
45
46 @skipIf(
47 not LXML_PRESENT or LXML_VERSION < (2,3,5,0),
48 "Skipping doctype test for old version of lxml to avoid segfault.")
49 def test_empty_doctype(self):
50 soup = self.soup("<!DOCTYPE>")
51 doctype = soup.contents[0]
52 self.assertEqual("", doctype.strip())
53
54 def test_beautifulstonesoup_is_xml_parser(self):
55 # Make sure that the deprecated BSS class uses an xml builder
56 # if one is installed.
57 with warnings.catch_warnings(record=True) as w:
58 soup = BeautifulStoneSoup("<b />")
59 self.assertEqual("<b/>", str(soup.b))
60 self.assertTrue("BeautifulStoneSoup class is deprecated" in str(w[0].message))
61
62@skipIf(
63 not LXML_PRESENT,
64 "lxml seems not to be present, not testing its XML tree builder.")
65class LXMLXMLTreeBuilderSmokeTest(SoupTest, XMLTreeBuilderSmokeTest):
66 """See ``HTMLTreeBuilderSmokeTest``."""
67
68 @property
69 def default_builder(self):
70 return LXMLTreeBuilderForXML()
diff --git a/bitbake/lib/bs4/tests/test_soup.py b/bitbake/lib/bs4/tests/test_soup.py
deleted file mode 100644
index 6ad3cb3765..0000000000
--- a/bitbake/lib/bs4/tests/test_soup.py
+++ /dev/null
@@ -1,479 +0,0 @@
1# -*- coding: utf-8 -*-
2"""Tests of Beautiful Soup as a whole."""
3
4import logging
5import unittest
6import sys
7import tempfile
8
9from bs4 import BeautifulSoup
10from bs4.element import (
11 CharsetMetaAttributeValue,
12 ContentMetaAttributeValue,
13 SoupStrainer,
14 NamespacedAttribute,
15 )
16import bs4.dammit
17from bs4.dammit import (
18 EntitySubstitution,
19 UnicodeDammit,
20 EncodingDetector,
21)
22from bs4.testing import (
23 SoupTest,
24 skipIf,
25)
26import warnings
27
28try:
29 from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
30 LXML_PRESENT = True
31except ImportError as e:
32 LXML_PRESENT = False
33
34PYTHON_2_PRE_2_7 = (sys.version_info < (2,7))
35PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2))
36
37class TestConstructor(SoupTest):
38
39 def test_short_unicode_input(self):
40 data = "<h1>éé</h1>"
41 soup = self.soup(data)
42 self.assertEqual("éé", soup.h1.string)
43
44 def test_embedded_null(self):
45 data = "<h1>foo\0bar</h1>"
46 soup = self.soup(data)
47 self.assertEqual("foo\0bar", soup.h1.string)
48
49 def test_exclude_encodings(self):
50 utf8_data = "Räksmörgås".encode("utf-8")
51 soup = self.soup(utf8_data, exclude_encodings=["utf-8"])
52 self.assertEqual("windows-1252", soup.original_encoding)
53
54
55class TestWarnings(SoupTest):
56
57 def _no_parser_specified(self, s, is_there=True):
58 v = s.startswith(BeautifulSoup.NO_PARSER_SPECIFIED_WARNING[:80])
59 self.assertTrue(v)
60
61 def test_warning_if_no_parser_specified(self):
62 with warnings.catch_warnings(record=True) as w:
63 soup = self.soup("<a><b></b></a>")
64 msg = str(w[0].message)
65 self._assert_no_parser_specified(msg)
66
67 def test_warning_if_parser_specified_too_vague(self):
68 with warnings.catch_warnings(record=True) as w:
69 soup = self.soup("<a><b></b></a>", "html")
70 msg = str(w[0].message)
71 self._assert_no_parser_specified(msg)
72
73 def test_no_warning_if_explicit_parser_specified(self):
74 with warnings.catch_warnings(record=True) as w:
75 soup = self.soup("<a><b></b></a>", "html.parser")
76 self.assertEqual([], w)
77
78 def test_parseOnlyThese_renamed_to_parse_only(self):
79 with warnings.catch_warnings(record=True) as w:
80 soup = self.soup("<a><b></b></a>", parseOnlyThese=SoupStrainer("b"))
81 msg = str(w[0].message)
82 self.assertTrue("parseOnlyThese" in msg)
83 self.assertTrue("parse_only" in msg)
84 self.assertEqual(b"<b></b>", soup.encode())
85
86 def test_fromEncoding_renamed_to_from_encoding(self):
87 with warnings.catch_warnings(record=True) as w:
88 utf8 = b"\xc3\xa9"
89 soup = self.soup(utf8, fromEncoding="utf8")
90 msg = str(w[0].message)
91 self.assertTrue("fromEncoding" in msg)
92 self.assertTrue("from_encoding" in msg)
93 self.assertEqual("utf8", soup.original_encoding)
94
95 def test_unrecognized_keyword_argument(self):
96 self.assertRaises(
97 TypeError, self.soup, "<a>", no_such_argument=True)
98
99class TestWarnings(SoupTest):
100
101 def test_disk_file_warning(self):
102 filehandle = tempfile.NamedTemporaryFile()
103 filename = filehandle.name
104 try:
105 with warnings.catch_warnings(record=True) as w:
106 soup = self.soup(filename)
107 msg = str(w[0].message)
108 self.assertTrue("looks like a filename" in msg)
109 finally:
110 filehandle.close()
111
112 # The file no longer exists, so Beautiful Soup will no longer issue the warning.
113 with warnings.catch_warnings(record=True) as w:
114 soup = self.soup(filename)
115 self.assertEqual(0, len(w))
116
117 def test_url_warning(self):
118 with warnings.catch_warnings(record=True) as w:
119 soup = self.soup("http://www.crummy.com/")
120 msg = str(w[0].message)
121 self.assertTrue("looks like a URL" in msg)
122
123 with warnings.catch_warnings(record=True) as w:
124 soup = self.soup("http://www.crummy.com/ is great")
125 self.assertEqual(0, len(w))
126
127class TestSelectiveParsing(SoupTest):
128
129 def test_parse_with_soupstrainer(self):
130 markup = "No<b>Yes</b><a>No<b>Yes <c>Yes</c></b>"
131 strainer = SoupStrainer("b")
132 soup = self.soup(markup, parse_only=strainer)
133 self.assertEqual(soup.encode(), b"<b>Yes</b><b>Yes <c>Yes</c></b>")
134
135
136class TestEntitySubstitution(unittest.TestCase):
137 """Standalone tests of the EntitySubstitution class."""
138 def setUp(self):
139 self.sub = EntitySubstitution
140
141 def test_simple_html_substitution(self):
142 # Unicode characters corresponding to named HTML entites
143 # are substituted, and no others.
144 s = "foo\u2200\N{SNOWMAN}\u00f5bar"
145 self.assertEqual(self.sub.substitute_html(s),
146 "foo&forall;\N{SNOWMAN}&otilde;bar")
147
148 def test_smart_quote_substitution(self):
149 # MS smart quotes are a common source of frustration, so we
150 # give them a special test.
151 quotes = b"\x91\x92foo\x93\x94"
152 dammit = UnicodeDammit(quotes)
153 self.assertEqual(self.sub.substitute_html(dammit.markup),
154 "&lsquo;&rsquo;foo&ldquo;&rdquo;")
155
156 def test_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_false(self):
157 s = 'Welcome to "my bar"'
158 self.assertEqual(self.sub.substitute_xml(s, False), s)
159
160 def test_xml_attribute_quoting_normally_uses_double_quotes(self):
161 self.assertEqual(self.sub.substitute_xml("Welcome", True),
162 '"Welcome"')
163 self.assertEqual(self.sub.substitute_xml("Bob's Bar", True),
164 '"Bob\'s Bar"')
165
166 def test_xml_attribute_quoting_uses_single_quotes_when_value_contains_double_quotes(self):
167 s = 'Welcome to "my bar"'
168 self.assertEqual(self.sub.substitute_xml(s, True),
169 "'Welcome to \"my bar\"'")
170
171 def test_xml_attribute_quoting_escapes_single_quotes_when_value_contains_both_single_and_double_quotes(self):
172 s = 'Welcome to "Bob\'s Bar"'
173 self.assertEqual(
174 self.sub.substitute_xml(s, True),
175 '"Welcome to &quot;Bob\'s Bar&quot;"')
176
177 def test_xml_quotes_arent_escaped_when_value_is_not_being_quoted(self):
178 quoted = 'Welcome to "Bob\'s Bar"'
179 self.assertEqual(self.sub.substitute_xml(quoted), quoted)
180
181 def test_xml_quoting_handles_angle_brackets(self):
182 self.assertEqual(
183 self.sub.substitute_xml("foo<bar>"),
184 "foo&lt;bar&gt;")
185
186 def test_xml_quoting_handles_ampersands(self):
187 self.assertEqual(self.sub.substitute_xml("AT&T"), "AT&amp;T")
188
189 def test_xml_quoting_including_ampersands_when_they_are_part_of_an_entity(self):
190 self.assertEqual(
191 self.sub.substitute_xml("&Aacute;T&T"),
192 "&amp;Aacute;T&amp;T")
193
194 def test_xml_quoting_ignoring_ampersands_when_they_are_part_of_an_entity(self):
195 self.assertEqual(
196 self.sub.substitute_xml_containing_entities("&Aacute;T&T"),
197 "&Aacute;T&amp;T")
198
199 def test_quotes_not_html_substituted(self):
200 """There's no need to do this except inside attribute values."""
201 text = 'Bob\'s "bar"'
202 self.assertEqual(self.sub.substitute_html(text), text)
203
204
205class TestEncodingConversion(SoupTest):
206 # Test Beautiful Soup's ability to decode and encode from various
207 # encodings.
208
209 def setUp(self):
210 super(TestEncodingConversion, self).setUp()
211 self.unicode_data = '<html><head><meta charset="utf-8"/></head><body><foo>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</foo></body></html>'
212 self.utf8_data = self.unicode_data.encode("utf-8")
213 # Just so you know what it looks like.
214 self.assertEqual(
215 self.utf8_data,
216 b'<html><head><meta charset="utf-8"/></head><body><foo>Sacr\xc3\xa9 bleu!</foo></body></html>')
217
218 def test_ascii_in_unicode_out(self):
219 # ASCII input is converted to Unicode. The original_encoding
220 # attribute is set to 'utf-8', a superset of ASCII.
221 chardet = bs4.dammit.chardet_dammit
222 logging.disable(logging.WARNING)
223 try:
224 def noop(str):
225 return None
226 # Disable chardet, which will realize that the ASCII is ASCII.
227 bs4.dammit.chardet_dammit = noop
228 ascii = b"<foo>a</foo>"
229 soup_from_ascii = self.soup(ascii)
230 unicode_output = soup_from_ascii.decode()
231 self.assertTrue(isinstance(unicode_output, str))
232 self.assertEqual(unicode_output, self.document_for(ascii.decode()))
233 self.assertEqual(soup_from_ascii.original_encoding.lower(), "utf-8")
234 finally:
235 logging.disable(logging.NOTSET)
236 bs4.dammit.chardet_dammit = chardet
237
238 def test_unicode_in_unicode_out(self):
239 # Unicode input is left alone. The original_encoding attribute
240 # is not set.
241 soup_from_unicode = self.soup(self.unicode_data)
242 self.assertEqual(soup_from_unicode.decode(), self.unicode_data)
243 self.assertEqual(soup_from_unicode.foo.string, 'Sacr\xe9 bleu!')
244 self.assertEqual(soup_from_unicode.original_encoding, None)
245
246 def test_utf8_in_unicode_out(self):
247 # UTF-8 input is converted to Unicode. The original_encoding
248 # attribute is set.
249 soup_from_utf8 = self.soup(self.utf8_data)
250 self.assertEqual(soup_from_utf8.decode(), self.unicode_data)
251 self.assertEqual(soup_from_utf8.foo.string, 'Sacr\xe9 bleu!')
252
253 def test_utf8_out(self):
254 # The internal data structures can be encoded as UTF-8.
255 soup_from_unicode = self.soup(self.unicode_data)
256 self.assertEqual(soup_from_unicode.encode('utf-8'), self.utf8_data)
257
258 @skipIf(
259 PYTHON_2_PRE_2_7 or PYTHON_3_PRE_3_2,
260 "Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.")
261 def test_attribute_name_containing_unicode_characters(self):
262 markup = '<div><a \N{SNOWMAN}="snowman"></a></div>'
263 self.assertEqual(self.soup(markup).div.encode("utf8"), markup.encode("utf8"))
264
265class TestUnicodeDammit(unittest.TestCase):
266 """Standalone tests of UnicodeDammit."""
267
268 def test_unicode_input(self):
269 markup = "I'm already Unicode! \N{SNOWMAN}"
270 dammit = UnicodeDammit(markup)
271 self.assertEqual(dammit.unicode_markup, markup)
272
273 def test_smart_quotes_to_unicode(self):
274 markup = b"<foo>\x91\x92\x93\x94</foo>"
275 dammit = UnicodeDammit(markup)
276 self.assertEqual(
277 dammit.unicode_markup, "<foo>\u2018\u2019\u201c\u201d</foo>")
278
279 def test_smart_quotes_to_xml_entities(self):
280 markup = b"<foo>\x91\x92\x93\x94</foo>"
281 dammit = UnicodeDammit(markup, smart_quotes_to="xml")
282 self.assertEqual(
283 dammit.unicode_markup, "<foo>&#x2018;&#x2019;&#x201C;&#x201D;</foo>")
284
285 def test_smart_quotes_to_html_entities(self):
286 markup = b"<foo>\x91\x92\x93\x94</foo>"
287 dammit = UnicodeDammit(markup, smart_quotes_to="html")
288 self.assertEqual(
289 dammit.unicode_markup, "<foo>&lsquo;&rsquo;&ldquo;&rdquo;</foo>")
290
291 def test_smart_quotes_to_ascii(self):
292 markup = b"<foo>\x91\x92\x93\x94</foo>"
293 dammit = UnicodeDammit(markup, smart_quotes_to="ascii")
294 self.assertEqual(
295 dammit.unicode_markup, """<foo>''""</foo>""")
296
297 def test_detect_utf8(self):
298 utf8 = b"Sacr\xc3\xa9 bleu! \xe2\x98\x83"
299 dammit = UnicodeDammit(utf8)
300 self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
301 self.assertEqual(dammit.unicode_markup, 'Sacr\xe9 bleu! \N{SNOWMAN}')
302
303
304 def test_convert_hebrew(self):
305 hebrew = b"\xed\xe5\xec\xf9"
306 dammit = UnicodeDammit(hebrew, ["iso-8859-8"])
307 self.assertEqual(dammit.original_encoding.lower(), 'iso-8859-8')
308 self.assertEqual(dammit.unicode_markup, '\u05dd\u05d5\u05dc\u05e9')
309
310 def test_dont_see_smart_quotes_where_there_are_none(self):
311 utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
312 dammit = UnicodeDammit(utf_8)
313 self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
314 self.assertEqual(dammit.unicode_markup.encode("utf-8"), utf_8)
315
316 def test_ignore_inappropriate_codecs(self):
317 utf8_data = "Räksmörgås".encode("utf-8")
318 dammit = UnicodeDammit(utf8_data, ["iso-8859-8"])
319 self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
320
321 def test_ignore_invalid_codecs(self):
322 utf8_data = "Räksmörgås".encode("utf-8")
323 for bad_encoding in ['.utf8', '...', 'utF---16.!']:
324 dammit = UnicodeDammit(utf8_data, [bad_encoding])
325 self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
326
327 def test_exclude_encodings(self):
328 # This is UTF-8.
329 utf8_data = "Räksmörgås".encode("utf-8")
330
331 # But if we exclude UTF-8 from consideration, the guess is
332 # Windows-1252.
333 dammit = UnicodeDammit(utf8_data, exclude_encodings=["utf-8"])
334 self.assertEqual(dammit.original_encoding.lower(), 'windows-1252')
335
336 # And if we exclude that, there is no valid guess at all.
337 dammit = UnicodeDammit(
338 utf8_data, exclude_encodings=["utf-8", "windows-1252"])
339 self.assertEqual(dammit.original_encoding, None)
340
341 def test_encoding_detector_replaces_junk_in_encoding_name_with_replacement_character(self):
342 detected = EncodingDetector(
343 b'<?xml version="1.0" encoding="UTF-\xdb" ?>')
344 encodings = list(detected.encodings)
345 assert 'utf-\N{REPLACEMENT CHARACTER}' in encodings
346
347 def test_detect_html5_style_meta_tag(self):
348
349 for data in (
350 b'<html><meta charset="euc-jp" /></html>',
351 b"<html><meta charset='euc-jp' /></html>",
352 b"<html><meta charset=euc-jp /></html>",
353 b"<html><meta charset=euc-jp/></html>"):
354 dammit = UnicodeDammit(data, is_html=True)
355 self.assertEqual(
356 "euc-jp", dammit.original_encoding)
357
358 def test_last_ditch_entity_replacement(self):
359 # This is a UTF-8 document that contains bytestrings
360 # completely incompatible with UTF-8 (ie. encoded with some other
361 # encoding).
362 #
363 # Since there is no consistent encoding for the document,
364 # Unicode, Dammit will eventually encode the document as UTF-8
365 # and encode the incompatible characters as REPLACEMENT
366 # CHARACTER.
367 #
368 # If chardet is installed, it will detect that the document
369 # can be converted into ISO-8859-1 without errors. This happens
370 # to be the wrong encoding, but it is a consistent encoding, so the
371 # code we're testing here won't run.
372 #
373 # So we temporarily disable chardet if it's present.
374 doc = b"""\357\273\277<?xml version="1.0" encoding="UTF-8"?>
375<html><b>\330\250\330\252\330\261</b>
376<i>\310\322\321\220\312\321\355\344</i></html>"""
377 chardet = bs4.dammit.chardet_dammit
378 logging.disable(logging.WARNING)
379 try:
380 def noop(str):
381 return None
382 bs4.dammit.chardet_dammit = noop
383 dammit = UnicodeDammit(doc)
384 self.assertEqual(True, dammit.contains_replacement_characters)
385 self.assertTrue("\ufffd" in dammit.unicode_markup)
386
387 soup = BeautifulSoup(doc, "html.parser")
388 self.assertTrue(soup.contains_replacement_characters)
389 finally:
390 logging.disable(logging.NOTSET)
391 bs4.dammit.chardet_dammit = chardet
392
393 def test_byte_order_mark_removed(self):
394 # A document written in UTF-16LE will have its byte order marker stripped.
395 data = b'\xff\xfe<\x00a\x00>\x00\xe1\x00\xe9\x00<\x00/\x00a\x00>\x00'
396 dammit = UnicodeDammit(data)
397 self.assertEqual("<a>áé</a>", dammit.unicode_markup)
398 self.assertEqual("utf-16le", dammit.original_encoding)
399
400 def test_detwingle(self):
401 # Here's a UTF8 document.
402 utf8 = ("\N{SNOWMAN}" * 3).encode("utf8")
403
404 # Here's a Windows-1252 document.
405 windows_1252 = (
406 "\N{LEFT DOUBLE QUOTATION MARK}Hi, I like Windows!"
407 "\N{RIGHT DOUBLE QUOTATION MARK}").encode("windows_1252")
408
409 # Through some unholy alchemy, they've been stuck together.
410 doc = utf8 + windows_1252 + utf8
411
412 # The document can't be turned into UTF-8:
413 self.assertRaises(UnicodeDecodeError, doc.decode, "utf8")
414
415 # Unicode, Dammit thinks the whole document is Windows-1252,
416 # and decodes it into "☃☃☃“Hi, I like Windows!”☃☃☃"
417
418 # But if we run it through fix_embedded_windows_1252, it's fixed:
419
420 fixed = UnicodeDammit.detwingle(doc)
421 self.assertEqual(
422 "☃☃☃“Hi, I like Windows!”☃☃☃", fixed.decode("utf8"))
423
424 def test_detwingle_ignores_multibyte_characters(self):
425 # Each of these characters has a UTF-8 representation ending
426 # in \x93. \x93 is a smart quote if interpreted as
427 # Windows-1252. But our code knows to skip over multibyte
428 # UTF-8 characters, so they'll survive the process unscathed.
429 for tricky_unicode_char in (
430 "\N{LATIN SMALL LIGATURE OE}", # 2-byte char '\xc5\x93'
431 "\N{LATIN SUBSCRIPT SMALL LETTER X}", # 3-byte char '\xe2\x82\x93'
432 "\xf0\x90\x90\x93", # This is a CJK character, not sure which one.
433 ):
434 input = tricky_unicode_char.encode("utf8")
435 self.assertTrue(input.endswith(b'\x93'))
436 output = UnicodeDammit.detwingle(input)
437 self.assertEqual(output, input)
438
439class TestNamedspacedAttribute(SoupTest):
440
441 def test_name_may_be_none(self):
442 a = NamespacedAttribute("xmlns", None)
443 self.assertEqual(a, "xmlns")
444
445 def test_attribute_is_equivalent_to_colon_separated_string(self):
446 a = NamespacedAttribute("a", "b")
447 self.assertEqual("a:b", a)
448
449 def test_attributes_are_equivalent_if_prefix_and_name_identical(self):
450 a = NamespacedAttribute("a", "b", "c")
451 b = NamespacedAttribute("a", "b", "c")
452 self.assertEqual(a, b)
453
454 # The actual namespace is not considered.
455 c = NamespacedAttribute("a", "b", None)
456 self.assertEqual(a, c)
457
458 # But name and prefix are important.
459 d = NamespacedAttribute("a", "z", "c")
460 self.assertNotEqual(a, d)
461
462 e = NamespacedAttribute("z", "b", "c")
463 self.assertNotEqual(a, e)
464
465
466class TestAttributeValueWithCharsetSubstitution(unittest.TestCase):
467
468 def test_content_meta_attribute_value(self):
469 value = CharsetMetaAttributeValue("euc-jp")
470 self.assertEqual("euc-jp", value)
471 self.assertEqual("euc-jp", value.original_value)
472 self.assertEqual("utf8", value.encode("utf8"))
473
474
475 def test_content_meta_attribute_value(self):
476 value = ContentMetaAttributeValue("text/html; charset=euc-jp")
477 self.assertEqual("text/html; charset=euc-jp", value)
478 self.assertEqual("text/html; charset=euc-jp", value.original_value)
479 self.assertEqual("text/html; charset=utf8", value.encode("utf8"))
diff --git a/bitbake/lib/bs4/tests/test_tree.py b/bitbake/lib/bs4/tests/test_tree.py
deleted file mode 100644
index 8e5c66426e..0000000000
--- a/bitbake/lib/bs4/tests/test_tree.py
+++ /dev/null
@@ -1,2004 +0,0 @@
1# -*- coding: utf-8 -*-
2"""Tests for Beautiful Soup's tree traversal methods.
3
4The tree traversal methods are the main advantage of using Beautiful
5Soup over just using a parser.
6
7Different parsers will build different Beautiful Soup trees given the
8same markup, but all Beautiful Soup trees can be traversed with the
9methods tested here.
10"""
11
12import copy
13import pickle
14import re
15import warnings
16from bs4 import BeautifulSoup
17from bs4.builder import builder_registry
18from bs4.element import (
19 PY3K,
20 CData,
21 Comment,
22 Declaration,
23 Doctype,
24 NavigableString,
25 SoupStrainer,
26 Tag,
27)
28from bs4.testing import SoupTest
29
30XML_BUILDER_PRESENT = (builder_registry.lookup("xml") is not None)
31LXML_PRESENT = (builder_registry.lookup("lxml") is not None)
32
33class TreeTest(SoupTest):
34
35 def assertSelects(self, tags, should_match):
36 """Make sure that the given tags have the correct text.
37
38 This is used in tests that define a bunch of tags, each
39 containing a single string, and then select certain strings by
40 some mechanism.
41 """
42 self.assertEqual([tag.string for tag in tags], should_match)
43
44 def assertSelectsIDs(self, tags, should_match):
45 """Make sure that the given tags have the correct IDs.
46
47 This is used in tests that define a bunch of tags, each
48 containing a single string, and then select certain strings by
49 some mechanism.
50 """
51 self.assertEqual([tag['id'] for tag in tags], should_match)
52
53
54class TestFind(TreeTest):
55 """Basic tests of the find() method.
56
57 find() just calls find_all() with limit=1, so it's not tested all
58 that thouroughly here.
59 """
60
61 def test_find_tag(self):
62 soup = self.soup("<a>1</a><b>2</b><a>3</a><b>4</b>")
63 self.assertEqual(soup.find("b").string, "2")
64
65 def test_unicode_text_find(self):
66 soup = self.soup('<h1>Räksmörgås</h1>')
67 self.assertEqual(soup.find(string='Räksmörgås'), 'Räksmörgås')
68
69 def test_unicode_attribute_find(self):
70 soup = self.soup('<h1 id="Räksmörgås">here it is</h1>')
71 str(soup)
72 self.assertEqual("here it is", soup.find(id='Räksmörgås').text)
73
74
75 def test_find_everything(self):
76 """Test an optimization that finds all tags."""
77 soup = self.soup("<a>foo</a><b>bar</b>")
78 self.assertEqual(2, len(soup.find_all()))
79
80 def test_find_everything_with_name(self):
81 """Test an optimization that finds all tags with a given name."""
82 soup = self.soup("<a>foo</a><b>bar</b><a>baz</a>")
83 self.assertEqual(2, len(soup.find_all('a')))
84
85class TestFindAll(TreeTest):
86 """Basic tests of the find_all() method."""
87
88 def test_find_all_text_nodes(self):
89 """You can search the tree for text nodes."""
90 soup = self.soup("<html>Foo<b>bar</b>\xbb</html>")
91 # Exact match.
92 self.assertEqual(soup.find_all(string="bar"), ["bar"])
93 self.assertEqual(soup.find_all(text="bar"), ["bar"])
94 # Match any of a number of strings.
95 self.assertEqual(
96 soup.find_all(text=["Foo", "bar"]), ["Foo", "bar"])
97 # Match a regular expression.
98 self.assertEqual(soup.find_all(text=re.compile('.*')),
99 ["Foo", "bar", '\xbb'])
100 # Match anything.
101 self.assertEqual(soup.find_all(text=True),
102 ["Foo", "bar", '\xbb'])
103
104 def test_find_all_limit(self):
105 """You can limit the number of items returned by find_all."""
106 soup = self.soup("<a>1</a><a>2</a><a>3</a><a>4</a><a>5</a>")
107 self.assertSelects(soup.find_all('a', limit=3), ["1", "2", "3"])
108 self.assertSelects(soup.find_all('a', limit=1), ["1"])
109 self.assertSelects(
110 soup.find_all('a', limit=10), ["1", "2", "3", "4", "5"])
111
112 # A limit of 0 means no limit.
113 self.assertSelects(
114 soup.find_all('a', limit=0), ["1", "2", "3", "4", "5"])
115
116 def test_calling_a_tag_is_calling_findall(self):
117 soup = self.soup("<a>1</a><b>2<a id='foo'>3</a></b>")
118 self.assertSelects(soup('a', limit=1), ["1"])
119 self.assertSelects(soup.b(id="foo"), ["3"])
120
121 def test_find_all_with_self_referential_data_structure_does_not_cause_infinite_recursion(self):
122 soup = self.soup("<a></a>")
123 # Create a self-referential list.
124 l = []
125 l.append(l)
126
127 # Without special code in _normalize_search_value, this would cause infinite
128 # recursion.
129 self.assertEqual([], soup.find_all(l))
130
131 def test_find_all_resultset(self):
132 """All find_all calls return a ResultSet"""
133 soup = self.soup("<a></a>")
134 result = soup.find_all("a")
135 self.assertTrue(hasattr(result, "source"))
136
137 result = soup.find_all(True)
138 self.assertTrue(hasattr(result, "source"))
139
140 result = soup.find_all(text="foo")
141 self.assertTrue(hasattr(result, "source"))
142
143
144class TestFindAllBasicNamespaces(TreeTest):
145
146 def test_find_by_namespaced_name(self):
147 soup = self.soup('<mathml:msqrt>4</mathml:msqrt><a svg:fill="red">')
148 self.assertEqual("4", soup.find("mathml:msqrt").string)
149 self.assertEqual("a", soup.find(attrs= { "svg:fill" : "red" }).name)
150
151
152class TestFindAllByName(TreeTest):
153 """Test ways of finding tags by tag name."""
154
155 def setUp(self):
156 super(TreeTest, self).setUp()
157 self.tree = self.soup("""<a>First tag.</a>
158 <b>Second tag.</b>
159 <c>Third <a>Nested tag.</a> tag.</c>""")
160
161 def test_find_all_by_tag_name(self):
162 # Find all the <a> tags.
163 self.assertSelects(
164 self.tree.find_all('a'), ['First tag.', 'Nested tag.'])
165
166 def test_find_all_by_name_and_text(self):
167 self.assertSelects(
168 self.tree.find_all('a', text='First tag.'), ['First tag.'])
169
170 self.assertSelects(
171 self.tree.find_all('a', text=True), ['First tag.', 'Nested tag.'])
172
173 self.assertSelects(
174 self.tree.find_all('a', text=re.compile("tag")),
175 ['First tag.', 'Nested tag.'])
176
177
178 def test_find_all_on_non_root_element(self):
179 # You can call find_all on any node, not just the root.
180 self.assertSelects(self.tree.c.find_all('a'), ['Nested tag.'])
181
182 def test_calling_element_invokes_find_all(self):
183 self.assertSelects(self.tree('a'), ['First tag.', 'Nested tag.'])
184
185 def test_find_all_by_tag_strainer(self):
186 self.assertSelects(
187 self.tree.find_all(SoupStrainer('a')),
188 ['First tag.', 'Nested tag.'])
189
190 def test_find_all_by_tag_names(self):
191 self.assertSelects(
192 self.tree.find_all(['a', 'b']),
193 ['First tag.', 'Second tag.', 'Nested tag.'])
194
195 def test_find_all_by_tag_dict(self):
196 self.assertSelects(
197 self.tree.find_all({'a' : True, 'b' : True}),
198 ['First tag.', 'Second tag.', 'Nested tag.'])
199
200 def test_find_all_by_tag_re(self):
201 self.assertSelects(
202 self.tree.find_all(re.compile('^[ab]$')),
203 ['First tag.', 'Second tag.', 'Nested tag.'])
204
205 def test_find_all_with_tags_matching_method(self):
206 # You can define an oracle method that determines whether
207 # a tag matches the search.
208 def id_matches_name(tag):
209 return tag.name == tag.get('id')
210
211 tree = self.soup("""<a id="a">Match 1.</a>
212 <a id="1">Does not match.</a>
213 <b id="b">Match 2.</a>""")
214
215 self.assertSelects(
216 tree.find_all(id_matches_name), ["Match 1.", "Match 2."])
217
218
219class TestFindAllByAttribute(TreeTest):
220
221 def test_find_all_by_attribute_name(self):
222 # You can pass in keyword arguments to find_all to search by
223 # attribute.
224 tree = self.soup("""
225 <a id="first">Matching a.</a>
226 <a id="second">
227 Non-matching <b id="first">Matching b.</b>a.
228 </a>""")
229 self.assertSelects(tree.find_all(id='first'),
230 ["Matching a.", "Matching b."])
231
232 def test_find_all_by_utf8_attribute_value(self):
233 peace = "םולש".encode("utf8")
234 data = '<a title="םולש"></a>'.encode("utf8")
235 soup = self.soup(data)
236 self.assertEqual([soup.a], soup.find_all(title=peace))
237 self.assertEqual([soup.a], soup.find_all(title=peace.decode("utf8")))
238 self.assertEqual([soup.a], soup.find_all(title=[peace, "something else"]))
239
240 def test_find_all_by_attribute_dict(self):
241 # You can pass in a dictionary as the argument 'attrs'. This
242 # lets you search for attributes like 'name' (a fixed argument
243 # to find_all) and 'class' (a reserved word in Python.)
244 tree = self.soup("""
245 <a name="name1" class="class1">Name match.</a>
246 <a name="name2" class="class2">Class match.</a>
247 <a name="name3" class="class3">Non-match.</a>
248 <name1>A tag called 'name1'.</name1>
249 """)
250
251 # This doesn't do what you want.
252 self.assertSelects(tree.find_all(name='name1'),
253 ["A tag called 'name1'."])
254 # This does what you want.
255 self.assertSelects(tree.find_all(attrs={'name' : 'name1'}),
256 ["Name match."])
257
258 self.assertSelects(tree.find_all(attrs={'class' : 'class2'}),
259 ["Class match."])
260
261 def test_find_all_by_class(self):
262 tree = self.soup("""
263 <a class="1">Class 1.</a>
264 <a class="2">Class 2.</a>
265 <b class="1">Class 1.</b>
266 <c class="3 4">Class 3 and 4.</c>
267 """)
268
269 # Passing in the class_ keyword argument will search against
270 # the 'class' attribute.
271 self.assertSelects(tree.find_all('a', class_='1'), ['Class 1.'])
272 self.assertSelects(tree.find_all('c', class_='3'), ['Class 3 and 4.'])
273 self.assertSelects(tree.find_all('c', class_='4'), ['Class 3 and 4.'])
274
275 # Passing in a string to 'attrs' will also search the CSS class.
276 self.assertSelects(tree.find_all('a', '1'), ['Class 1.'])
277 self.assertSelects(tree.find_all(attrs='1'), ['Class 1.', 'Class 1.'])
278 self.assertSelects(tree.find_all('c', '3'), ['Class 3 and 4.'])
279 self.assertSelects(tree.find_all('c', '4'), ['Class 3 and 4.'])
280
281 def test_find_by_class_when_multiple_classes_present(self):
282 tree = self.soup("<gar class='foo bar'>Found it</gar>")
283
284 f = tree.find_all("gar", class_=re.compile("o"))
285 self.assertSelects(f, ["Found it"])
286
287 f = tree.find_all("gar", class_=re.compile("a"))
288 self.assertSelects(f, ["Found it"])
289
290 # Since the class is not the string "foo bar", but the two
291 # strings "foo" and "bar", this will not find anything.
292 f = tree.find_all("gar", class_=re.compile("o b"))
293 self.assertSelects(f, [])
294
295 def test_find_all_with_non_dictionary_for_attrs_finds_by_class(self):
296 soup = self.soup("<a class='bar'>Found it</a>")
297
298 self.assertSelects(soup.find_all("a", re.compile("ba")), ["Found it"])
299
300 def big_attribute_value(value):
301 return len(value) > 3
302
303 self.assertSelects(soup.find_all("a", big_attribute_value), [])
304
305 def small_attribute_value(value):
306 return len(value) <= 3
307
308 self.assertSelects(
309 soup.find_all("a", small_attribute_value), ["Found it"])
310
311 def test_find_all_with_string_for_attrs_finds_multiple_classes(self):
312 soup = self.soup('<a class="foo bar"></a><a class="foo"></a>')
313 a, a2 = soup.find_all("a")
314 self.assertEqual([a, a2], soup.find_all("a", "foo"))
315 self.assertEqual([a], soup.find_all("a", "bar"))
316
317 # If you specify the class as a string that contains a
318 # space, only that specific value will be found.
319 self.assertEqual([a], soup.find_all("a", class_="foo bar"))
320 self.assertEqual([a], soup.find_all("a", "foo bar"))
321 self.assertEqual([], soup.find_all("a", "bar foo"))
322
323 def test_find_all_by_attribute_soupstrainer(self):
324 tree = self.soup("""
325 <a id="first">Match.</a>
326 <a id="second">Non-match.</a>""")
327
328 strainer = SoupStrainer(attrs={'id' : 'first'})
329 self.assertSelects(tree.find_all(strainer), ['Match.'])
330
331 def test_find_all_with_missing_atribute(self):
332 # You can pass in None as the value of an attribute to find_all.
333 # This will match tags that do not have that attribute set.
334 tree = self.soup("""<a id="1">ID present.</a>
335 <a>No ID present.</a>
336 <a id="">ID is empty.</a>""")
337 self.assertSelects(tree.find_all('a', id=None), ["No ID present."])
338
339 def test_find_all_with_defined_attribute(self):
340 # You can pass in None as the value of an attribute to find_all.
341 # This will match tags that have that attribute set to any value.
342 tree = self.soup("""<a id="1">ID present.</a>
343 <a>No ID present.</a>
344 <a id="">ID is empty.</a>""")
345 self.assertSelects(
346 tree.find_all(id=True), ["ID present.", "ID is empty."])
347
348 def test_find_all_with_numeric_attribute(self):
349 # If you search for a number, it's treated as a string.
350 tree = self.soup("""<a id=1>Unquoted attribute.</a>
351 <a id="1">Quoted attribute.</a>""")
352
353 expected = ["Unquoted attribute.", "Quoted attribute."]
354 self.assertSelects(tree.find_all(id=1), expected)
355 self.assertSelects(tree.find_all(id="1"), expected)
356
357 def test_find_all_with_list_attribute_values(self):
358 # You can pass a list of attribute values instead of just one,
359 # and you'll get tags that match any of the values.
360 tree = self.soup("""<a id="1">1</a>
361 <a id="2">2</a>
362 <a id="3">3</a>
363 <a>No ID.</a>""")
364 self.assertSelects(tree.find_all(id=["1", "3", "4"]),
365 ["1", "3"])
366
367 def test_find_all_with_regular_expression_attribute_value(self):
368 # You can pass a regular expression as an attribute value, and
369 # you'll get tags whose values for that attribute match the
370 # regular expression.
371 tree = self.soup("""<a id="a">One a.</a>
372 <a id="aa">Two as.</a>
373 <a id="ab">Mixed as and bs.</a>
374 <a id="b">One b.</a>
375 <a>No ID.</a>""")
376
377 self.assertSelects(tree.find_all(id=re.compile("^a+$")),
378 ["One a.", "Two as."])
379
380 def test_find_by_name_and_containing_string(self):
381 soup = self.soup("<b>foo</b><b>bar</b><a>foo</a>")
382 a = soup.a
383
384 self.assertEqual([a], soup.find_all("a", text="foo"))
385 self.assertEqual([], soup.find_all("a", text="bar"))
386 self.assertEqual([], soup.find_all("a", text="bar"))
387
388 def test_find_by_name_and_containing_string_when_string_is_buried(self):
389 soup = self.soup("<a>foo</a><a><b><c>foo</c></b></a>")
390 self.assertEqual(soup.find_all("a"), soup.find_all("a", text="foo"))
391
392 def test_find_by_attribute_and_containing_string(self):
393 soup = self.soup('<b id="1">foo</b><a id="2">foo</a>')
394 a = soup.a
395
396 self.assertEqual([a], soup.find_all(id=2, text="foo"))
397 self.assertEqual([], soup.find_all(id=1, text="bar"))
398
399
400
401
402class TestIndex(TreeTest):
403 """Test Tag.index"""
404 def test_index(self):
405 tree = self.soup("""<div>
406 <a>Identical</a>
407 <b>Not identical</b>
408 <a>Identical</a>
409
410 <c><d>Identical with child</d></c>
411 <b>Also not identical</b>
412 <c><d>Identical with child</d></c>
413 </div>""")
414 div = tree.div
415 for i, element in enumerate(div.contents):
416 self.assertEqual(i, div.index(element))
417 self.assertRaises(ValueError, tree.index, 1)
418
419
420class TestParentOperations(TreeTest):
421 """Test navigation and searching through an element's parents."""
422
423 def setUp(self):
424 super(TestParentOperations, self).setUp()
425 self.tree = self.soup('''<ul id="empty"></ul>
426 <ul id="top">
427 <ul id="middle">
428 <ul id="bottom">
429 <b>Start here</b>
430 </ul>
431 </ul>''')
432 self.start = self.tree.b
433
434
435 def test_parent(self):
436 self.assertEqual(self.start.parent['id'], 'bottom')
437 self.assertEqual(self.start.parent.parent['id'], 'middle')
438 self.assertEqual(self.start.parent.parent.parent['id'], 'top')
439
440 def test_parent_of_top_tag_is_soup_object(self):
441 top_tag = self.tree.contents[0]
442 self.assertEqual(top_tag.parent, self.tree)
443
444 def test_soup_object_has_no_parent(self):
445 self.assertEqual(None, self.tree.parent)
446
447 def test_find_parents(self):
448 self.assertSelectsIDs(
449 self.start.find_parents('ul'), ['bottom', 'middle', 'top'])
450 self.assertSelectsIDs(
451 self.start.find_parents('ul', id="middle"), ['middle'])
452
453 def test_find_parent(self):
454 self.assertEqual(self.start.find_parent('ul')['id'], 'bottom')
455 self.assertEqual(self.start.find_parent('ul', id='top')['id'], 'top')
456
457 def test_parent_of_text_element(self):
458 text = self.tree.find(text="Start here")
459 self.assertEqual(text.parent.name, 'b')
460
461 def test_text_element_find_parent(self):
462 text = self.tree.find(text="Start here")
463 self.assertEqual(text.find_parent('ul')['id'], 'bottom')
464
465 def test_parent_generator(self):
466 parents = [parent['id'] for parent in self.start.parents
467 if parent is not None and 'id' in parent.attrs]
468 self.assertEqual(parents, ['bottom', 'middle', 'top'])
469
470
471class ProximityTest(TreeTest):
472
473 def setUp(self):
474 super(TreeTest, self).setUp()
475 self.tree = self.soup(
476 '<html id="start"><head></head><body><b id="1">One</b><b id="2">Two</b><b id="3">Three</b></body></html>')
477
478
479class TestNextOperations(ProximityTest):
480
481 def setUp(self):
482 super(TestNextOperations, self).setUp()
483 self.start = self.tree.b
484
485 def test_next(self):
486 self.assertEqual(self.start.next_element, "One")
487 self.assertEqual(self.start.next_element.next_element['id'], "2")
488
489 def test_next_of_last_item_is_none(self):
490 last = self.tree.find(text="Three")
491 self.assertEqual(last.next_element, None)
492
493 def test_next_of_root_is_none(self):
494 # The document root is outside the next/previous chain.
495 self.assertEqual(self.tree.next_element, None)
496
497 def test_find_all_next(self):
498 self.assertSelects(self.start.find_all_next('b'), ["Two", "Three"])
499 self.start.find_all_next(id=3)
500 self.assertSelects(self.start.find_all_next(id=3), ["Three"])
501
502 def test_find_next(self):
503 self.assertEqual(self.start.find_next('b')['id'], '2')
504 self.assertEqual(self.start.find_next(text="Three"), "Three")
505
506 def test_find_next_for_text_element(self):
507 text = self.tree.find(text="One")
508 self.assertEqual(text.find_next("b").string, "Two")
509 self.assertSelects(text.find_all_next("b"), ["Two", "Three"])
510
511 def test_next_generator(self):
512 start = self.tree.find(text="Two")
513 successors = [node for node in start.next_elements]
514 # There are two successors: the final <b> tag and its text contents.
515 tag, contents = successors
516 self.assertEqual(tag['id'], '3')
517 self.assertEqual(contents, "Three")
518
519class TestPreviousOperations(ProximityTest):
520
521 def setUp(self):
522 super(TestPreviousOperations, self).setUp()
523 self.end = self.tree.find(text="Three")
524
525 def test_previous(self):
526 self.assertEqual(self.end.previous_element['id'], "3")
527 self.assertEqual(self.end.previous_element.previous_element, "Two")
528
529 def test_previous_of_first_item_is_none(self):
530 first = self.tree.find('html')
531 self.assertEqual(first.previous_element, None)
532
533 def test_previous_of_root_is_none(self):
534 # The document root is outside the next/previous chain.
535 # XXX This is broken!
536 #self.assertEqual(self.tree.previous_element, None)
537 pass
538
539 def test_find_all_previous(self):
540 # The <b> tag containing the "Three" node is the predecessor
541 # of the "Three" node itself, which is why "Three" shows up
542 # here.
543 self.assertSelects(
544 self.end.find_all_previous('b'), ["Three", "Two", "One"])
545 self.assertSelects(self.end.find_all_previous(id=1), ["One"])
546
547 def test_find_previous(self):
548 self.assertEqual(self.end.find_previous('b')['id'], '3')
549 self.assertEqual(self.end.find_previous(text="One"), "One")
550
551 def test_find_previous_for_text_element(self):
552 text = self.tree.find(text="Three")
553 self.assertEqual(text.find_previous("b").string, "Three")
554 self.assertSelects(
555 text.find_all_previous("b"), ["Three", "Two", "One"])
556
557 def test_previous_generator(self):
558 start = self.tree.find(text="One")
559 predecessors = [node for node in start.previous_elements]
560
561 # There are four predecessors: the <b> tag containing "One"
562 # the <body> tag, the <head> tag, and the <html> tag.
563 b, body, head, html = predecessors
564 self.assertEqual(b['id'], '1')
565 self.assertEqual(body.name, "body")
566 self.assertEqual(head.name, "head")
567 self.assertEqual(html.name, "html")
568
569
570class SiblingTest(TreeTest):
571
572 def setUp(self):
573 super(SiblingTest, self).setUp()
574 markup = '''<html>
575 <span id="1">
576 <span id="1.1"></span>
577 </span>
578 <span id="2">
579 <span id="2.1"></span>
580 </span>
581 <span id="3">
582 <span id="3.1"></span>
583 </span>
584 <span id="4"></span>
585 </html>'''
586 # All that whitespace looks good but makes the tests more
587 # difficult. Get rid of it.
588 markup = re.compile("\n\s*").sub("", markup)
589 self.tree = self.soup(markup)
590
591
592class TestNextSibling(SiblingTest):
593
594 def setUp(self):
595 super(TestNextSibling, self).setUp()
596 self.start = self.tree.find(id="1")
597
598 def test_next_sibling_of_root_is_none(self):
599 self.assertEqual(self.tree.next_sibling, None)
600
601 def test_next_sibling(self):
602 self.assertEqual(self.start.next_sibling['id'], '2')
603 self.assertEqual(self.start.next_sibling.next_sibling['id'], '3')
604
605 # Note the difference between next_sibling and next_element.
606 self.assertEqual(self.start.next_element['id'], '1.1')
607
608 def test_next_sibling_may_not_exist(self):
609 self.assertEqual(self.tree.html.next_sibling, None)
610
611 nested_span = self.tree.find(id="1.1")
612 self.assertEqual(nested_span.next_sibling, None)
613
614 last_span = self.tree.find(id="4")
615 self.assertEqual(last_span.next_sibling, None)
616
617 def test_find_next_sibling(self):
618 self.assertEqual(self.start.find_next_sibling('span')['id'], '2')
619
620 def test_next_siblings(self):
621 self.assertSelectsIDs(self.start.find_next_siblings("span"),
622 ['2', '3', '4'])
623
624 self.assertSelectsIDs(self.start.find_next_siblings(id='3'), ['3'])
625
626 def test_next_sibling_for_text_element(self):
627 soup = self.soup("Foo<b>bar</b>baz")
628 start = soup.find(text="Foo")
629 self.assertEqual(start.next_sibling.name, 'b')
630 self.assertEqual(start.next_sibling.next_sibling, 'baz')
631
632 self.assertSelects(start.find_next_siblings('b'), ['bar'])
633 self.assertEqual(start.find_next_sibling(text="baz"), "baz")
634 self.assertEqual(start.find_next_sibling(text="nonesuch"), None)
635
636
637class TestPreviousSibling(SiblingTest):
638
639 def setUp(self):
640 super(TestPreviousSibling, self).setUp()
641 self.end = self.tree.find(id="4")
642
643 def test_previous_sibling_of_root_is_none(self):
644 self.assertEqual(self.tree.previous_sibling, None)
645
646 def test_previous_sibling(self):
647 self.assertEqual(self.end.previous_sibling['id'], '3')
648 self.assertEqual(self.end.previous_sibling.previous_sibling['id'], '2')
649
650 # Note the difference between previous_sibling and previous_element.
651 self.assertEqual(self.end.previous_element['id'], '3.1')
652
653 def test_previous_sibling_may_not_exist(self):
654 self.assertEqual(self.tree.html.previous_sibling, None)
655
656 nested_span = self.tree.find(id="1.1")
657 self.assertEqual(nested_span.previous_sibling, None)
658
659 first_span = self.tree.find(id="1")
660 self.assertEqual(first_span.previous_sibling, None)
661
662 def test_find_previous_sibling(self):
663 self.assertEqual(self.end.find_previous_sibling('span')['id'], '3')
664
665 def test_previous_siblings(self):
666 self.assertSelectsIDs(self.end.find_previous_siblings("span"),
667 ['3', '2', '1'])
668
669 self.assertSelectsIDs(self.end.find_previous_siblings(id='1'), ['1'])
670
671 def test_previous_sibling_for_text_element(self):
672 soup = self.soup("Foo<b>bar</b>baz")
673 start = soup.find(text="baz")
674 self.assertEqual(start.previous_sibling.name, 'b')
675 self.assertEqual(start.previous_sibling.previous_sibling, 'Foo')
676
677 self.assertSelects(start.find_previous_siblings('b'), ['bar'])
678 self.assertEqual(start.find_previous_sibling(text="Foo"), "Foo")
679 self.assertEqual(start.find_previous_sibling(text="nonesuch"), None)
680
681
682class TestTagCreation(SoupTest):
683 """Test the ability to create new tags."""
684 def test_new_tag(self):
685 soup = self.soup("")
686 new_tag = soup.new_tag("foo", bar="baz")
687 self.assertTrue(isinstance(new_tag, Tag))
688 self.assertEqual("foo", new_tag.name)
689 self.assertEqual(dict(bar="baz"), new_tag.attrs)
690 self.assertEqual(None, new_tag.parent)
691
692 def test_tag_inherits_self_closing_rules_from_builder(self):
693 if XML_BUILDER_PRESENT:
694 xml_soup = BeautifulSoup("", "lxml-xml")
695 xml_br = xml_soup.new_tag("br")
696 xml_p = xml_soup.new_tag("p")
697
698 # Both the <br> and <p> tag are empty-element, just because
699 # they have no contents.
700 self.assertEqual(b"<br/>", xml_br.encode())
701 self.assertEqual(b"<p/>", xml_p.encode())
702
703 html_soup = BeautifulSoup("", "html.parser")
704 html_br = html_soup.new_tag("br")
705 html_p = html_soup.new_tag("p")
706
707 # The HTML builder users HTML's rules about which tags are
708 # empty-element tags, and the new tags reflect these rules.
709 self.assertEqual(b"<br/>", html_br.encode())
710 self.assertEqual(b"<p></p>", html_p.encode())
711
712 def test_new_string_creates_navigablestring(self):
713 soup = self.soup("")
714 s = soup.new_string("foo")
715 self.assertEqual("foo", s)
716 self.assertTrue(isinstance(s, NavigableString))
717
718 def test_new_string_can_create_navigablestring_subclass(self):
719 soup = self.soup("")
720 s = soup.new_string("foo", Comment)
721 self.assertEqual("foo", s)
722 self.assertTrue(isinstance(s, Comment))
723
724class TestTreeModification(SoupTest):
725
726 def test_attribute_modification(self):
727 soup = self.soup('<a id="1"></a>')
728 soup.a['id'] = 2
729 self.assertEqual(soup.decode(), self.document_for('<a id="2"></a>'))
730 del(soup.a['id'])
731 self.assertEqual(soup.decode(), self.document_for('<a></a>'))
732 soup.a['id2'] = 'foo'
733 self.assertEqual(soup.decode(), self.document_for('<a id2="foo"></a>'))
734
735 def test_new_tag_creation(self):
736 builder = builder_registry.lookup('html')()
737 soup = self.soup("<body></body>", builder=builder)
738 a = Tag(soup, builder, 'a')
739 ol = Tag(soup, builder, 'ol')
740 a['href'] = 'http://foo.com/'
741 soup.body.insert(0, a)
742 soup.body.insert(1, ol)
743 self.assertEqual(
744 soup.body.encode(),
745 b'<body><a href="http://foo.com/"></a><ol></ol></body>')
746
747 def test_append_to_contents_moves_tag(self):
748 doc = """<p id="1">Don't leave me <b>here</b>.</p>
749 <p id="2">Don\'t leave!</p>"""
750 soup = self.soup(doc)
751 second_para = soup.find(id='2')
752 bold = soup.b
753
754 # Move the <b> tag to the end of the second paragraph.
755 soup.find(id='2').append(soup.b)
756
757 # The <b> tag is now a child of the second paragraph.
758 self.assertEqual(bold.parent, second_para)
759
760 self.assertEqual(
761 soup.decode(), self.document_for(
762 '<p id="1">Don\'t leave me .</p>\n'
763 '<p id="2">Don\'t leave!<b>here</b></p>'))
764
765 def test_replace_with_returns_thing_that_was_replaced(self):
766 text = "<a></a><b><c></c></b>"
767 soup = self.soup(text)
768 a = soup.a
769 new_a = a.replace_with(soup.c)
770 self.assertEqual(a, new_a)
771
772 def test_unwrap_returns_thing_that_was_replaced(self):
773 text = "<a><b></b><c></c></a>"
774 soup = self.soup(text)
775 a = soup.a
776 new_a = a.unwrap()
777 self.assertEqual(a, new_a)
778
779 def test_replace_with_and_unwrap_give_useful_exception_when_tag_has_no_parent(self):
780 soup = self.soup("<a><b>Foo</b></a><c>Bar</c>")
781 a = soup.a
782 a.extract()
783 self.assertEqual(None, a.parent)
784 self.assertRaises(ValueError, a.unwrap)
785 self.assertRaises(ValueError, a.replace_with, soup.c)
786
787 def test_replace_tag_with_itself(self):
788 text = "<a><b></b><c>Foo<d></d></c></a><a><e></e></a>"
789 soup = self.soup(text)
790 c = soup.c
791 soup.c.replace_with(c)
792 self.assertEqual(soup.decode(), self.document_for(text))
793
794 def test_replace_tag_with_its_parent_raises_exception(self):
795 text = "<a><b></b></a>"
796 soup = self.soup(text)
797 self.assertRaises(ValueError, soup.b.replace_with, soup.a)
798
799 def test_insert_tag_into_itself_raises_exception(self):
800 text = "<a><b></b></a>"
801 soup = self.soup(text)
802 self.assertRaises(ValueError, soup.a.insert, 0, soup.a)
803
804 def test_replace_with_maintains_next_element_throughout(self):
805 soup = self.soup('<p><a>one</a><b>three</b></p>')
806 a = soup.a
807 b = a.contents[0]
808 # Make it so the <a> tag has two text children.
809 a.insert(1, "two")
810
811 # Now replace each one with the empty string.
812 left, right = a.contents
813 left.replaceWith('')
814 right.replaceWith('')
815
816 # The <b> tag is still connected to the tree.
817 self.assertEqual("three", soup.b.string)
818
819 def test_replace_final_node(self):
820 soup = self.soup("<b>Argh!</b>")
821 soup.find(text="Argh!").replace_with("Hooray!")
822 new_text = soup.find(text="Hooray!")
823 b = soup.b
824 self.assertEqual(new_text.previous_element, b)
825 self.assertEqual(new_text.parent, b)
826 self.assertEqual(new_text.previous_element.next_element, new_text)
827 self.assertEqual(new_text.next_element, None)
828
829 def test_consecutive_text_nodes(self):
830 # A builder should never create two consecutive text nodes,
831 # but if you insert one next to another, Beautiful Soup will
832 # handle it correctly.
833 soup = self.soup("<a><b>Argh!</b><c></c></a>")
834 soup.b.insert(1, "Hooray!")
835
836 self.assertEqual(
837 soup.decode(), self.document_for(
838 "<a><b>Argh!Hooray!</b><c></c></a>"))
839
840 new_text = soup.find(text="Hooray!")
841 self.assertEqual(new_text.previous_element, "Argh!")
842 self.assertEqual(new_text.previous_element.next_element, new_text)
843
844 self.assertEqual(new_text.previous_sibling, "Argh!")
845 self.assertEqual(new_text.previous_sibling.next_sibling, new_text)
846
847 self.assertEqual(new_text.next_sibling, None)
848 self.assertEqual(new_text.next_element, soup.c)
849
850 def test_insert_string(self):
851 soup = self.soup("<a></a>")
852 soup.a.insert(0, "bar")
853 soup.a.insert(0, "foo")
854 # The string were added to the tag.
855 self.assertEqual(["foo", "bar"], soup.a.contents)
856 # And they were converted to NavigableStrings.
857 self.assertEqual(soup.a.contents[0].next_element, "bar")
858
859 def test_insert_tag(self):
860 builder = self.default_builder
861 soup = self.soup(
862 "<a><b>Find</b><c>lady!</c><d></d></a>", builder=builder)
863 magic_tag = Tag(soup, builder, 'magictag')
864 magic_tag.insert(0, "the")
865 soup.a.insert(1, magic_tag)
866
867 self.assertEqual(
868 soup.decode(), self.document_for(
869 "<a><b>Find</b><magictag>the</magictag><c>lady!</c><d></d></a>"))
870
871 # Make sure all the relationships are hooked up correctly.
872 b_tag = soup.b
873 self.assertEqual(b_tag.next_sibling, magic_tag)
874 self.assertEqual(magic_tag.previous_sibling, b_tag)
875
876 find = b_tag.find(text="Find")
877 self.assertEqual(find.next_element, magic_tag)
878 self.assertEqual(magic_tag.previous_element, find)
879
880 c_tag = soup.c
881 self.assertEqual(magic_tag.next_sibling, c_tag)
882 self.assertEqual(c_tag.previous_sibling, magic_tag)
883
884 the = magic_tag.find(text="the")
885 self.assertEqual(the.parent, magic_tag)
886 self.assertEqual(the.next_element, c_tag)
887 self.assertEqual(c_tag.previous_element, the)
888
889 def test_append_child_thats_already_at_the_end(self):
890 data = "<a><b></b></a>"
891 soup = self.soup(data)
892 soup.a.append(soup.b)
893 self.assertEqual(data, soup.decode())
894
895 def test_move_tag_to_beginning_of_parent(self):
896 data = "<a><b></b><c></c><d></d></a>"
897 soup = self.soup(data)
898 soup.a.insert(0, soup.d)
899 self.assertEqual("<a><d></d><b></b><c></c></a>", soup.decode())
900
901 def test_insert_works_on_empty_element_tag(self):
902 # This is a little strange, since most HTML parsers don't allow
903 # markup like this to come through. But in general, we don't
904 # know what the parser would or wouldn't have allowed, so
905 # I'm letting this succeed for now.
906 soup = self.soup("<br/>")
907 soup.br.insert(1, "Contents")
908 self.assertEqual(str(soup.br), "<br>Contents</br>")
909
910 def test_insert_before(self):
911 soup = self.soup("<a>foo</a><b>bar</b>")
912 soup.b.insert_before("BAZ")
913 soup.a.insert_before("QUUX")
914 self.assertEqual(
915 soup.decode(), self.document_for("QUUX<a>foo</a>BAZ<b>bar</b>"))
916
917 soup.a.insert_before(soup.b)
918 self.assertEqual(
919 soup.decode(), self.document_for("QUUX<b>bar</b><a>foo</a>BAZ"))
920
921 def test_insert_after(self):
922 soup = self.soup("<a>foo</a><b>bar</b>")
923 soup.b.insert_after("BAZ")
924 soup.a.insert_after("QUUX")
925 self.assertEqual(
926 soup.decode(), self.document_for("<a>foo</a>QUUX<b>bar</b>BAZ"))
927 soup.b.insert_after(soup.a)
928 self.assertEqual(
929 soup.decode(), self.document_for("QUUX<b>bar</b><a>foo</a>BAZ"))
930
931 def test_insert_after_raises_exception_if_after_has_no_meaning(self):
932 soup = self.soup("")
933 tag = soup.new_tag("a")
934 string = soup.new_string("")
935 self.assertRaises(ValueError, string.insert_after, tag)
936 self.assertRaises(NotImplementedError, soup.insert_after, tag)
937 self.assertRaises(ValueError, tag.insert_after, tag)
938
939 def test_insert_before_raises_notimplementederror_if_before_has_no_meaning(self):
940 soup = self.soup("")
941 tag = soup.new_tag("a")
942 string = soup.new_string("")
943 self.assertRaises(ValueError, string.insert_before, tag)
944 self.assertRaises(NotImplementedError, soup.insert_before, tag)
945 self.assertRaises(ValueError, tag.insert_before, tag)
946
947 def test_replace_with(self):
948 soup = self.soup(
949 "<p>There's <b>no</b> business like <b>show</b> business</p>")
950 no, show = soup.find_all('b')
951 show.replace_with(no)
952 self.assertEqual(
953 soup.decode(),
954 self.document_for(
955 "<p>There's business like <b>no</b> business</p>"))
956
957 self.assertEqual(show.parent, None)
958 self.assertEqual(no.parent, soup.p)
959 self.assertEqual(no.next_element, "no")
960 self.assertEqual(no.next_sibling, " business")
961
962 def test_replace_first_child(self):
963 data = "<a><b></b><c></c></a>"
964 soup = self.soup(data)
965 soup.b.replace_with(soup.c)
966 self.assertEqual("<a><c></c></a>", soup.decode())
967
968 def test_replace_last_child(self):
969 data = "<a><b></b><c></c></a>"
970 soup = self.soup(data)
971 soup.c.replace_with(soup.b)
972 self.assertEqual("<a><b></b></a>", soup.decode())
973
974 def test_nested_tag_replace_with(self):
975 soup = self.soup(
976 """<a>We<b>reserve<c>the</c><d>right</d></b></a><e>to<f>refuse</f><g>service</g></e>""")
977
978 # Replace the entire <b> tag and its contents ("reserve the
979 # right") with the <f> tag ("refuse").
980 remove_tag = soup.b
981 move_tag = soup.f
982 remove_tag.replace_with(move_tag)
983
984 self.assertEqual(
985 soup.decode(), self.document_for(
986 "<a>We<f>refuse</f></a><e>to<g>service</g></e>"))
987
988 # The <b> tag is now an orphan.
989 self.assertEqual(remove_tag.parent, None)
990 self.assertEqual(remove_tag.find(text="right").next_element, None)
991 self.assertEqual(remove_tag.previous_element, None)
992 self.assertEqual(remove_tag.next_sibling, None)
993 self.assertEqual(remove_tag.previous_sibling, None)
994
995 # The <f> tag is now connected to the <a> tag.
996 self.assertEqual(move_tag.parent, soup.a)
997 self.assertEqual(move_tag.previous_element, "We")
998 self.assertEqual(move_tag.next_element.next_element, soup.e)
999 self.assertEqual(move_tag.next_sibling, None)
1000
1001 # The gap where the <f> tag used to be has been mended, and
1002 # the word "to" is now connected to the <g> tag.
1003 to_text = soup.find(text="to")
1004 g_tag = soup.g
1005 self.assertEqual(to_text.next_element, g_tag)
1006 self.assertEqual(to_text.next_sibling, g_tag)
1007 self.assertEqual(g_tag.previous_element, to_text)
1008 self.assertEqual(g_tag.previous_sibling, to_text)
1009
1010 def test_unwrap(self):
1011 tree = self.soup("""
1012 <p>Unneeded <em>formatting</em> is unneeded</p>
1013 """)
1014 tree.em.unwrap()
1015 self.assertEqual(tree.em, None)
1016 self.assertEqual(tree.p.text, "Unneeded formatting is unneeded")
1017
1018 def test_wrap(self):
1019 soup = self.soup("I wish I was bold.")
1020 value = soup.string.wrap(soup.new_tag("b"))
1021 self.assertEqual(value.decode(), "<b>I wish I was bold.</b>")
1022 self.assertEqual(
1023 soup.decode(), self.document_for("<b>I wish I was bold.</b>"))
1024
1025 def test_wrap_extracts_tag_from_elsewhere(self):
1026 soup = self.soup("<b></b>I wish I was bold.")
1027 soup.b.next_sibling.wrap(soup.b)
1028 self.assertEqual(
1029 soup.decode(), self.document_for("<b>I wish I was bold.</b>"))
1030
1031 def test_wrap_puts_new_contents_at_the_end(self):
1032 soup = self.soup("<b>I like being bold.</b>I wish I was bold.")
1033 soup.b.next_sibling.wrap(soup.b)
1034 self.assertEqual(2, len(soup.b.contents))
1035 self.assertEqual(
1036 soup.decode(), self.document_for(
1037 "<b>I like being bold.I wish I was bold.</b>"))
1038
1039 def test_extract(self):
1040 soup = self.soup(
1041 '<html><body>Some content. <div id="nav">Nav crap</div> More content.</body></html>')
1042
1043 self.assertEqual(len(soup.body.contents), 3)
1044 extracted = soup.find(id="nav").extract()
1045
1046 self.assertEqual(
1047 soup.decode(), "<html><body>Some content. More content.</body></html>")
1048 self.assertEqual(extracted.decode(), '<div id="nav">Nav crap</div>')
1049
1050 # The extracted tag is now an orphan.
1051 self.assertEqual(len(soup.body.contents), 2)
1052 self.assertEqual(extracted.parent, None)
1053 self.assertEqual(extracted.previous_element, None)
1054 self.assertEqual(extracted.next_element.next_element, None)
1055
1056 # The gap where the extracted tag used to be has been mended.
1057 content_1 = soup.find(text="Some content. ")
1058 content_2 = soup.find(text=" More content.")
1059 self.assertEqual(content_1.next_element, content_2)
1060 self.assertEqual(content_1.next_sibling, content_2)
1061 self.assertEqual(content_2.previous_element, content_1)
1062 self.assertEqual(content_2.previous_sibling, content_1)
1063
1064 def test_extract_distinguishes_between_identical_strings(self):
1065 soup = self.soup("<a>foo</a><b>bar</b>")
1066 foo_1 = soup.a.string
1067 bar_1 = soup.b.string
1068 foo_2 = soup.new_string("foo")
1069 bar_2 = soup.new_string("bar")
1070 soup.a.append(foo_2)
1071 soup.b.append(bar_2)
1072
1073 # Now there are two identical strings in the <a> tag, and two
1074 # in the <b> tag. Let's remove the first "foo" and the second
1075 # "bar".
1076 foo_1.extract()
1077 bar_2.extract()
1078 self.assertEqual(foo_2, soup.a.string)
1079 self.assertEqual(bar_2, soup.b.string)
1080
1081 def test_extract_multiples_of_same_tag(self):
1082 soup = self.soup("""
1083<html>
1084<head>
1085<script>foo</script>
1086</head>
1087<body>
1088 <script>bar</script>
1089 <a></a>
1090</body>
1091<script>baz</script>
1092</html>""")
1093 [soup.script.extract() for i in soup.find_all("script")]
1094 self.assertEqual("<body>\n\n<a></a>\n</body>", str(soup.body))
1095
1096
1097 def test_extract_works_when_element_is_surrounded_by_identical_strings(self):
1098 soup = self.soup(
1099 '<html>\n'
1100 '<body>hi</body>\n'
1101 '</html>')
1102 soup.find('body').extract()
1103 self.assertEqual(None, soup.find('body'))
1104
1105
1106 def test_clear(self):
1107 """Tag.clear()"""
1108 soup = self.soup("<p><a>String <em>Italicized</em></a> and another</p>")
1109 # clear using extract()
1110 a = soup.a
1111 soup.p.clear()
1112 self.assertEqual(len(soup.p.contents), 0)
1113 self.assertTrue(hasattr(a, "contents"))
1114
1115 # clear using decompose()
1116 em = a.em
1117 a.clear(decompose=True)
1118 self.assertEqual(0, len(em.contents))
1119
1120 def test_string_set(self):
1121 """Tag.string = 'string'"""
1122 soup = self.soup("<a></a> <b><c></c></b>")
1123 soup.a.string = "foo"
1124 self.assertEqual(soup.a.contents, ["foo"])
1125 soup.b.string = "bar"
1126 self.assertEqual(soup.b.contents, ["bar"])
1127
1128 def test_string_set_does_not_affect_original_string(self):
1129 soup = self.soup("<a><b>foo</b><c>bar</c>")
1130 soup.b.string = soup.c.string
1131 self.assertEqual(soup.a.encode(), b"<a><b>bar</b><c>bar</c></a>")
1132
1133 def test_set_string_preserves_class_of_string(self):
1134 soup = self.soup("<a></a>")
1135 cdata = CData("foo")
1136 soup.a.string = cdata
1137 self.assertTrue(isinstance(soup.a.string, CData))
1138
1139class TestElementObjects(SoupTest):
1140 """Test various features of element objects."""
1141
1142 def test_len(self):
1143 """The length of an element is its number of children."""
1144 soup = self.soup("<top>1<b>2</b>3</top>")
1145
1146 # The BeautifulSoup object itself contains one element: the
1147 # <top> tag.
1148 self.assertEqual(len(soup.contents), 1)
1149 self.assertEqual(len(soup), 1)
1150
1151 # The <top> tag contains three elements: the text node "1", the
1152 # <b> tag, and the text node "3".
1153 self.assertEqual(len(soup.top), 3)
1154 self.assertEqual(len(soup.top.contents), 3)
1155
1156 def test_member_access_invokes_find(self):
1157 """Accessing a Python member .foo invokes find('foo')"""
1158 soup = self.soup('<b><i></i></b>')
1159 self.assertEqual(soup.b, soup.find('b'))
1160 self.assertEqual(soup.b.i, soup.find('b').find('i'))
1161 self.assertEqual(soup.a, None)
1162
1163 def test_deprecated_member_access(self):
1164 soup = self.soup('<b><i></i></b>')
1165 with warnings.catch_warnings(record=True) as w:
1166 tag = soup.bTag
1167 self.assertEqual(soup.b, tag)
1168 self.assertEqual(
1169 '.bTag is deprecated, use .find("b") instead.',
1170 str(w[0].message))
1171
1172 def test_has_attr(self):
1173 """has_attr() checks for the presence of an attribute.
1174
1175 Please note note: has_attr() is different from
1176 __in__. has_attr() checks the tag's attributes and __in__
1177 checks the tag's chidlren.
1178 """
1179 soup = self.soup("<foo attr='bar'>")
1180 self.assertTrue(soup.foo.has_attr('attr'))
1181 self.assertFalse(soup.foo.has_attr('attr2'))
1182
1183
1184 def test_attributes_come_out_in_alphabetical_order(self):
1185 markup = '<b a="1" z="5" m="3" f="2" y="4"></b>'
1186 self.assertSoupEquals(markup, '<b a="1" f="2" m="3" y="4" z="5"></b>')
1187
1188 def test_string(self):
1189 # A tag that contains only a text node makes that node
1190 # available as .string.
1191 soup = self.soup("<b>foo</b>")
1192 self.assertEqual(soup.b.string, 'foo')
1193
1194 def test_empty_tag_has_no_string(self):
1195 # A tag with no children has no .stirng.
1196 soup = self.soup("<b></b>")
1197 self.assertEqual(soup.b.string, None)
1198
1199 def test_tag_with_multiple_children_has_no_string(self):
1200 # A tag with no children has no .string.
1201 soup = self.soup("<a>foo<b></b><b></b></b>")
1202 self.assertEqual(soup.b.string, None)
1203
1204 soup = self.soup("<a>foo<b></b>bar</b>")
1205 self.assertEqual(soup.b.string, None)
1206
1207 # Even if all the children are strings, due to trickery,
1208 # it won't work--but this would be a good optimization.
1209 soup = self.soup("<a>foo</b>")
1210 soup.a.insert(1, "bar")
1211 self.assertEqual(soup.a.string, None)
1212
1213 def test_tag_with_recursive_string_has_string(self):
1214 # A tag with a single child which has a .string inherits that
1215 # .string.
1216 soup = self.soup("<a><b>foo</b></a>")
1217 self.assertEqual(soup.a.string, "foo")
1218 self.assertEqual(soup.string, "foo")
1219
1220 def test_lack_of_string(self):
1221 """Only a tag containing a single text node has a .string."""
1222 soup = self.soup("<b>f<i>e</i>o</b>")
1223 self.assertFalse(soup.b.string)
1224
1225 soup = self.soup("<b></b>")
1226 self.assertFalse(soup.b.string)
1227
1228 def test_all_text(self):
1229 """Tag.text and Tag.get_text(sep=u"") -> all child text, concatenated"""
1230 soup = self.soup("<a>a<b>r</b> <r> t </r></a>")
1231 self.assertEqual(soup.a.text, "ar t ")
1232 self.assertEqual(soup.a.get_text(strip=True), "art")
1233 self.assertEqual(soup.a.get_text(","), "a,r, , t ")
1234 self.assertEqual(soup.a.get_text(",", strip=True), "a,r,t")
1235
1236 def test_get_text_ignores_comments(self):
1237 soup = self.soup("foo<!--IGNORE-->bar")
1238 self.assertEqual(soup.get_text(), "foobar")
1239
1240 self.assertEqual(
1241 soup.get_text(types=(NavigableString, Comment)), "fooIGNOREbar")
1242 self.assertEqual(
1243 soup.get_text(types=None), "fooIGNOREbar")
1244
1245 def test_all_strings_ignores_comments(self):
1246 soup = self.soup("foo<!--IGNORE-->bar")
1247 self.assertEqual(['foo', 'bar'], list(soup.strings))
1248
1249class TestCDAtaListAttributes(SoupTest):
1250
1251 """Testing cdata-list attributes like 'class'.
1252 """
1253 def test_single_value_becomes_list(self):
1254 soup = self.soup("<a class='foo'>")
1255 self.assertEqual(["foo"],soup.a['class'])
1256
1257 def test_multiple_values_becomes_list(self):
1258 soup = self.soup("<a class='foo bar'>")
1259 self.assertEqual(["foo", "bar"], soup.a['class'])
1260
1261 def test_multiple_values_separated_by_weird_whitespace(self):
1262 soup = self.soup("<a class='foo\tbar\nbaz'>")
1263 self.assertEqual(["foo", "bar", "baz"],soup.a['class'])
1264
1265 def test_attributes_joined_into_string_on_output(self):
1266 soup = self.soup("<a class='foo\tbar'>")
1267 self.assertEqual(b'<a class="foo bar"></a>', soup.a.encode())
1268
1269 def test_accept_charset(self):
1270 soup = self.soup('<form accept-charset="ISO-8859-1 UTF-8">')
1271 self.assertEqual(['ISO-8859-1', 'UTF-8'], soup.form['accept-charset'])
1272
1273 def test_cdata_attribute_applying_only_to_one_tag(self):
1274 data = '<a accept-charset="ISO-8859-1 UTF-8"></a>'
1275 soup = self.soup(data)
1276 # We saw in another test that accept-charset is a cdata-list
1277 # attribute for the <form> tag. But it's not a cdata-list
1278 # attribute for any other tag.
1279 self.assertEqual('ISO-8859-1 UTF-8', soup.a['accept-charset'])
1280
1281 def test_string_has_immutable_name_property(self):
1282 string = self.soup("s").string
1283 self.assertEqual(None, string.name)
1284 def t():
1285 string.name = 'foo'
1286 self.assertRaises(AttributeError, t)
1287
1288class TestPersistence(SoupTest):
1289 "Testing features like pickle and deepcopy."
1290
1291 def setUp(self):
1292 super(TestPersistence, self).setUp()
1293 self.page = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
1294"http://www.w3.org/TR/REC-html40/transitional.dtd">
1295<html>
1296<head>
1297<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
1298<title>Beautiful Soup: We called him Tortoise because he taught us.</title>
1299<link rev="made" href="mailto:leonardr@segfault.org">
1300<meta name="Description" content="Beautiful Soup: an HTML parser optimized for screen-scraping.">
1301<meta name="generator" content="Markov Approximation 1.4 (module: leonardr)">
1302<meta name="author" content="Leonard Richardson">
1303</head>
1304<body>
1305<a href="foo">foo</a>
1306<a href="foo"><b>bar</b></a>
1307</body>
1308</html>"""
1309 self.tree = self.soup(self.page)
1310
1311 def test_pickle_and_unpickle_identity(self):
1312 # Pickling a tree, then unpickling it, yields a tree identical
1313 # to the original.
1314 dumped = pickle.dumps(self.tree, 2)
1315 loaded = pickle.loads(dumped)
1316 self.assertEqual(loaded.__class__, BeautifulSoup)
1317 self.assertEqual(loaded.decode(), self.tree.decode())
1318
1319 def test_deepcopy_identity(self):
1320 # Making a deepcopy of a tree yields an identical tree.
1321 copied = copy.deepcopy(self.tree)
1322 self.assertEqual(copied.decode(), self.tree.decode())
1323
1324 def test_unicode_pickle(self):
1325 # A tree containing Unicode characters can be pickled.
1326 html = "<b>\N{SNOWMAN}</b>"
1327 soup = self.soup(html)
1328 dumped = pickle.dumps(soup, pickle.HIGHEST_PROTOCOL)
1329 loaded = pickle.loads(dumped)
1330 self.assertEqual(loaded.decode(), soup.decode())
1331
1332 def test_copy_navigablestring_is_not_attached_to_tree(self):
1333 html = "<b>Foo<a></a></b><b>Bar</b>"
1334 soup = self.soup(html)
1335 s1 = soup.find(string="Foo")
1336 s2 = copy.copy(s1)
1337 self.assertEqual(s1, s2)
1338 self.assertEqual(None, s2.parent)
1339 self.assertEqual(None, s2.next_element)
1340 self.assertNotEqual(None, s1.next_sibling)
1341 self.assertEqual(None, s2.next_sibling)
1342 self.assertEqual(None, s2.previous_element)
1343
1344 def test_copy_navigablestring_subclass_has_same_type(self):
1345 html = "<b><!--Foo--></b>"
1346 soup = self.soup(html)
1347 s1 = soup.string
1348 s2 = copy.copy(s1)
1349 self.assertEqual(s1, s2)
1350 self.assertTrue(isinstance(s2, Comment))
1351
1352 def test_copy_entire_soup(self):
1353 html = "<div><b>Foo<a></a></b><b>Bar</b></div>end"
1354 soup = self.soup(html)
1355 soup_copy = copy.copy(soup)
1356 self.assertEqual(soup, soup_copy)
1357
1358 def test_copy_tag_copies_contents(self):
1359 html = "<div><b>Foo<a></a></b><b>Bar</b></div>end"
1360 soup = self.soup(html)
1361 div = soup.div
1362 div_copy = copy.copy(div)
1363
1364 # The two tags look the same, and evaluate to equal.
1365 self.assertEqual(str(div), str(div_copy))
1366 self.assertEqual(div, div_copy)
1367
1368 # But they're not the same object.
1369 self.assertFalse(div is div_copy)
1370
1371 # And they don't have the same relation to the parse tree. The
1372 # copy is not associated with a parse tree at all.
1373 self.assertEqual(None, div_copy.parent)
1374 self.assertEqual(None, div_copy.previous_element)
1375 self.assertEqual(None, div_copy.find(string='Bar').next_element)
1376 self.assertNotEqual(None, div.find(string='Bar').next_element)
1377
1378class TestSubstitutions(SoupTest):
1379
1380 def test_default_formatter_is_minimal(self):
1381 markup = "<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
1382 soup = self.soup(markup)
1383 decoded = soup.decode(formatter="minimal")
1384 # The < is converted back into &lt; but the e-with-acute is left alone.
1385 self.assertEqual(
1386 decoded,
1387 self.document_for(
1388 "<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"))
1389
1390 def test_formatter_html(self):
1391 markup = "<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
1392 soup = self.soup(markup)
1393 decoded = soup.decode(formatter="html")
1394 self.assertEqual(
1395 decoded,
1396 self.document_for("<b>&lt;&lt;Sacr&eacute; bleu!&gt;&gt;</b>"))
1397
1398 def test_formatter_minimal(self):
1399 markup = "<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
1400 soup = self.soup(markup)
1401 decoded = soup.decode(formatter="minimal")
1402 # The < is converted back into &lt; but the e-with-acute is left alone.
1403 self.assertEqual(
1404 decoded,
1405 self.document_for(
1406 "<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"))
1407
1408 def test_formatter_null(self):
1409 markup = "<b>&lt;&lt;Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!&gt;&gt;</b>"
1410 soup = self.soup(markup)
1411 decoded = soup.decode(formatter=None)
1412 # Neither the angle brackets nor the e-with-acute are converted.
1413 # This is not valid HTML, but it's what the user wanted.
1414 self.assertEqual(decoded,
1415 self.document_for("<b><<Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!>></b>"))
1416
1417 def test_formatter_custom(self):
1418 markup = "<b>&lt;foo&gt;</b><b>bar</b>"
1419 soup = self.soup(markup)
1420 decoded = soup.decode(formatter = lambda x: x.upper())
1421 # Instead of normal entity conversion code, the custom
1422 # callable is called on every string.
1423 self.assertEqual(
1424 decoded,
1425 self.document_for("<b><FOO></b><b>BAR</b>"))
1426
1427 def test_formatter_is_run_on_attribute_values(self):
1428 markup = '<a href="http://a.com?a=b&c=é">e</a>'
1429 soup = self.soup(markup)
1430 a = soup.a
1431
1432 expect_minimal = '<a href="http://a.com?a=b&amp;c=é">e</a>'
1433
1434 self.assertEqual(expect_minimal, a.decode())
1435 self.assertEqual(expect_minimal, a.decode(formatter="minimal"))
1436
1437 expect_html = '<a href="http://a.com?a=b&amp;c=&eacute;">e</a>'
1438 self.assertEqual(expect_html, a.decode(formatter="html"))
1439
1440 self.assertEqual(markup, a.decode(formatter=None))
1441 expect_upper = '<a href="HTTP://A.COM?A=B&C=É">E</a>'
1442 self.assertEqual(expect_upper, a.decode(formatter=lambda x: x.upper()))
1443
1444 def test_formatter_skips_script_tag_for_html_documents(self):
1445 doc = """
1446 <script type="text/javascript">
1447 console.log("< < hey > > ");
1448 </script>
1449"""
1450 encoded = BeautifulSoup(doc, 'html.parser').encode()
1451 self.assertTrue(b"< < hey > >" in encoded)
1452
1453 def test_formatter_skips_style_tag_for_html_documents(self):
1454 doc = """
1455 <style type="text/css">
1456 console.log("< < hey > > ");
1457 </style>
1458"""
1459 encoded = BeautifulSoup(doc, 'html.parser').encode()
1460 self.assertTrue(b"< < hey > >" in encoded)
1461
1462 def test_prettify_leaves_preformatted_text_alone(self):
1463 soup = self.soup("<div> foo <pre> \tbar\n \n </pre> baz ")
1464 # Everything outside the <pre> tag is reformatted, but everything
1465 # inside is left alone.
1466 self.assertEqual(
1467 '<div>\n foo\n <pre> \tbar\n \n </pre>\n baz\n</div>',
1468 soup.div.prettify())
1469
1470 def test_prettify_accepts_formatter(self):
1471 soup = BeautifulSoup("<html><body>foo</body></html>", 'html.parser')
1472 pretty = soup.prettify(formatter = lambda x: x.upper())
1473 self.assertTrue("FOO" in pretty)
1474
1475 def test_prettify_outputs_unicode_by_default(self):
1476 soup = self.soup("<a></a>")
1477 self.assertEqual(str, type(soup.prettify()))
1478
1479 def test_prettify_can_encode_data(self):
1480 soup = self.soup("<a></a>")
1481 self.assertEqual(bytes, type(soup.prettify("utf-8")))
1482
1483 def test_html_entity_substitution_off_by_default(self):
1484 markup = "<b>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</b>"
1485 soup = self.soup(markup)
1486 encoded = soup.b.encode("utf-8")
1487 self.assertEqual(encoded, markup.encode('utf-8'))
1488
1489 def test_encoding_substitution(self):
1490 # Here's the <meta> tag saying that a document is
1491 # encoded in Shift-JIS.
1492 meta_tag = ('<meta content="text/html; charset=x-sjis" '
1493 'http-equiv="Content-type"/>')
1494 soup = self.soup(meta_tag)
1495
1496 # Parse the document, and the charset apprears unchanged.
1497 self.assertEqual(soup.meta['content'], 'text/html; charset=x-sjis')
1498
1499 # Encode the document into some encoding, and the encoding is
1500 # substituted into the meta tag.
1501 utf_8 = soup.encode("utf-8")
1502 self.assertTrue(b"charset=utf-8" in utf_8)
1503
1504 euc_jp = soup.encode("euc_jp")
1505 self.assertTrue(b"charset=euc_jp" in euc_jp)
1506
1507 shift_jis = soup.encode("shift-jis")
1508 self.assertTrue(b"charset=shift-jis" in shift_jis)
1509
1510 utf_16_u = soup.encode("utf-16").decode("utf-16")
1511 self.assertTrue("charset=utf-16" in utf_16_u)
1512
1513 def test_encoding_substitution_doesnt_happen_if_tag_is_strained(self):
1514 markup = ('<head><meta content="text/html; charset=x-sjis" '
1515 'http-equiv="Content-type"/></head><pre>foo</pre>')
1516
1517 # Beautiful Soup used to try to rewrite the meta tag even if the
1518 # meta tag got filtered out by the strainer. This test makes
1519 # sure that doesn't happen.
1520 strainer = SoupStrainer('pre')
1521 soup = self.soup(markup, parse_only=strainer)
1522 self.assertEqual(soup.contents[0].name, 'pre')
1523
1524class TestEncoding(SoupTest):
1525 """Test the ability to encode objects into strings."""
1526
1527 def test_unicode_string_can_be_encoded(self):
1528 html = "<b>\N{SNOWMAN}</b>"
1529 soup = self.soup(html)
1530 self.assertEqual(soup.b.string.encode("utf-8"),
1531 "\N{SNOWMAN}".encode("utf-8"))
1532
1533 def test_tag_containing_unicode_string_can_be_encoded(self):
1534 html = "<b>\N{SNOWMAN}</b>"
1535 soup = self.soup(html)
1536 self.assertEqual(
1537 soup.b.encode("utf-8"), html.encode("utf-8"))
1538
1539 def test_encoding_substitutes_unrecognized_characters_by_default(self):
1540 html = "<b>\N{SNOWMAN}</b>"
1541 soup = self.soup(html)
1542 self.assertEqual(soup.b.encode("ascii"), b"<b>&#9731;</b>")
1543
1544 def test_encoding_can_be_made_strict(self):
1545 html = "<b>\N{SNOWMAN}</b>"
1546 soup = self.soup(html)
1547 self.assertRaises(
1548 UnicodeEncodeError, soup.encode, "ascii", errors="strict")
1549
1550 def test_decode_contents(self):
1551 html = "<b>\N{SNOWMAN}</b>"
1552 soup = self.soup(html)
1553 self.assertEqual("\N{SNOWMAN}", soup.b.decode_contents())
1554
1555 def test_encode_contents(self):
1556 html = "<b>\N{SNOWMAN}</b>"
1557 soup = self.soup(html)
1558 self.assertEqual(
1559 "\N{SNOWMAN}".encode("utf8"), soup.b.encode_contents(
1560 encoding="utf8"))
1561
1562 def test_deprecated_renderContents(self):
1563 html = "<b>\N{SNOWMAN}</b>"
1564 soup = self.soup(html)
1565 self.assertEqual(
1566 "\N{SNOWMAN}".encode("utf8"), soup.b.renderContents())
1567
1568 def test_repr(self):
1569 html = "<b>\N{SNOWMAN}</b>"
1570 soup = self.soup(html)
1571 if PY3K:
1572 self.assertEqual(html, repr(soup))
1573 else:
1574 self.assertEqual(b'<b>\\u2603</b>', repr(soup))
1575
1576class TestNavigableStringSubclasses(SoupTest):
1577
1578 def test_cdata(self):
1579 # None of the current builders turn CDATA sections into CData
1580 # objects, but you can create them manually.
1581 soup = self.soup("")
1582 cdata = CData("foo")
1583 soup.insert(1, cdata)
1584 self.assertEqual(str(soup), "<![CDATA[foo]]>")
1585 self.assertEqual(soup.find(text="foo"), "foo")
1586 self.assertEqual(soup.contents[0], "foo")
1587
1588 def test_cdata_is_never_formatted(self):
1589 """Text inside a CData object is passed into the formatter.
1590
1591 But the return value is ignored.
1592 """
1593
1594 self.count = 0
1595 def increment(*args):
1596 self.count += 1
1597 return "BITTER FAILURE"
1598
1599 soup = self.soup("")
1600 cdata = CData("<><><>")
1601 soup.insert(1, cdata)
1602 self.assertEqual(
1603 b"<![CDATA[<><><>]]>", soup.encode(formatter=increment))
1604 self.assertEqual(1, self.count)
1605
1606 def test_doctype_ends_in_newline(self):
1607 # Unlike other NavigableString subclasses, a DOCTYPE always ends
1608 # in a newline.
1609 doctype = Doctype("foo")
1610 soup = self.soup("")
1611 soup.insert(1, doctype)
1612 self.assertEqual(soup.encode(), b"<!DOCTYPE foo>\n")
1613
1614 def test_declaration(self):
1615 d = Declaration("foo")
1616 self.assertEqual("<?foo?>", d.output_ready())
1617
1618class TestSoupSelector(TreeTest):
1619
1620 HTML = """
1621<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
1622"http://www.w3.org/TR/html4/strict.dtd">
1623<html>
1624<head>
1625<title>The title</title>
1626<link rel="stylesheet" href="blah.css" type="text/css" id="l1">
1627</head>
1628<body>
1629<custom-dashed-tag class="dashed" id="dash1">Hello there.</custom-dashed-tag>
1630<div id="main" class="fancy">
1631<div id="inner">
1632<h1 id="header1">An H1</h1>
1633<p>Some text</p>
1634<p class="onep" id="p1">Some more text</p>
1635<h2 id="header2">An H2</h2>
1636<p class="class1 class2 class3" id="pmulti">Another</p>
1637<a href="http://bob.example.org/" rel="friend met" id="bob">Bob</a>
1638<h2 id="header3">Another H2</h2>
1639<a id="me" href="http://simonwillison.net/" rel="me">me</a>
1640<span class="s1">
1641<a href="#" id="s1a1">span1a1</a>
1642<a href="#" id="s1a2">span1a2 <span id="s1a2s1">test</span></a>
1643<span class="span2">
1644<a href="#" id="s2a1">span2a1</a>
1645</span>
1646<span class="span3"></span>
1647<custom-dashed-tag class="dashed" id="dash2"/>
1648<div data-tag="dashedvalue" id="data1"/>
1649</span>
1650</div>
1651<x id="xid">
1652<z id="zida"/>
1653<z id="zidab"/>
1654<z id="zidac"/>
1655</x>
1656<y id="yid">
1657<z id="zidb"/>
1658</y>
1659<p lang="en" id="lang-en">English</p>
1660<p lang="en-gb" id="lang-en-gb">English UK</p>
1661<p lang="en-us" id="lang-en-us">English US</p>
1662<p lang="fr" id="lang-fr">French</p>
1663</div>
1664
1665<div id="footer">
1666</div>
1667"""
1668
1669 def setUp(self):
1670 self.soup = BeautifulSoup(self.HTML, 'html.parser')
1671
1672 def assertSelects(self, selector, expected_ids):
1673 el_ids = [el['id'] for el in self.soup.select(selector)]
1674 el_ids.sort()
1675 expected_ids.sort()
1676 self.assertEqual(expected_ids, el_ids,
1677 "Selector %s, expected [%s], got [%s]" % (
1678 selector, ', '.join(expected_ids), ', '.join(el_ids)
1679 )
1680 )
1681
1682 assertSelect = assertSelects
1683
1684 def assertSelectMultiple(self, *tests):
1685 for selector, expected_ids in tests:
1686 self.assertSelect(selector, expected_ids)
1687
1688 def test_one_tag_one(self):
1689 els = self.soup.select('title')
1690 self.assertEqual(len(els), 1)
1691 self.assertEqual(els[0].name, 'title')
1692 self.assertEqual(els[0].contents, ['The title'])
1693
1694 def test_one_tag_many(self):
1695 els = self.soup.select('div')
1696 self.assertEqual(len(els), 4)
1697 for div in els:
1698 self.assertEqual(div.name, 'div')
1699
1700 el = self.soup.select_one('div')
1701 self.assertEqual('main', el['id'])
1702
1703 def test_select_one_returns_none_if_no_match(self):
1704 match = self.soup.select_one('nonexistenttag')
1705 self.assertEqual(None, match)
1706
1707
1708 def test_tag_in_tag_one(self):
1709 els = self.soup.select('div div')
1710 self.assertSelects('div div', ['inner', 'data1'])
1711
1712 def test_tag_in_tag_many(self):
1713 for selector in ('html div', 'html body div', 'body div'):
1714 self.assertSelects(selector, ['data1', 'main', 'inner', 'footer'])
1715
1716 def test_tag_no_match(self):
1717 self.assertEqual(len(self.soup.select('del')), 0)
1718
1719 def test_invalid_tag(self):
1720 self.assertRaises(ValueError, self.soup.select, 'tag%t')
1721
1722 def test_select_dashed_tag_ids(self):
1723 self.assertSelects('custom-dashed-tag', ['dash1', 'dash2'])
1724
1725 def test_select_dashed_by_id(self):
1726 dashed = self.soup.select('custom-dashed-tag[id=\"dash2\"]')
1727 self.assertEqual(dashed[0].name, 'custom-dashed-tag')
1728 self.assertEqual(dashed[0]['id'], 'dash2')
1729
1730 def test_dashed_tag_text(self):
1731 self.assertEqual(self.soup.select('body > custom-dashed-tag')[0].text, 'Hello there.')
1732
1733 def test_select_dashed_matches_find_all(self):
1734 self.assertEqual(self.soup.select('custom-dashed-tag'), self.soup.find_all('custom-dashed-tag'))
1735
1736 def test_header_tags(self):
1737 self.assertSelectMultiple(
1738 ('h1', ['header1']),
1739 ('h2', ['header2', 'header3']),
1740 )
1741
1742 def test_class_one(self):
1743 for selector in ('.onep', 'p.onep', 'html p.onep'):
1744 els = self.soup.select(selector)
1745 self.assertEqual(len(els), 1)
1746 self.assertEqual(els[0].name, 'p')
1747 self.assertEqual(els[0]['class'], ['onep'])
1748
1749 def test_class_mismatched_tag(self):
1750 els = self.soup.select('div.onep')
1751 self.assertEqual(len(els), 0)
1752
1753 def test_one_id(self):
1754 for selector in ('div#inner', '#inner', 'div div#inner'):
1755 self.assertSelects(selector, ['inner'])
1756
1757 def test_bad_id(self):
1758 els = self.soup.select('#doesnotexist')
1759 self.assertEqual(len(els), 0)
1760
1761 def test_items_in_id(self):
1762 els = self.soup.select('div#inner p')
1763 self.assertEqual(len(els), 3)
1764 for el in els:
1765 self.assertEqual(el.name, 'p')
1766 self.assertEqual(els[1]['class'], ['onep'])
1767 self.assertFalse(els[0].has_attr('class'))
1768
1769 def test_a_bunch_of_emptys(self):
1770 for selector in ('div#main del', 'div#main div.oops', 'div div#main'):
1771 self.assertEqual(len(self.soup.select(selector)), 0)
1772
1773 def test_multi_class_support(self):
1774 for selector in ('.class1', 'p.class1', '.class2', 'p.class2',
1775 '.class3', 'p.class3', 'html p.class2', 'div#inner .class2'):
1776 self.assertSelects(selector, ['pmulti'])
1777
1778 def test_multi_class_selection(self):
1779 for selector in ('.class1.class3', '.class3.class2',
1780 '.class1.class2.class3'):
1781 self.assertSelects(selector, ['pmulti'])
1782
1783 def test_child_selector(self):
1784 self.assertSelects('.s1 > a', ['s1a1', 's1a2'])
1785 self.assertSelects('.s1 > a span', ['s1a2s1'])
1786
1787 def test_child_selector_id(self):
1788 self.assertSelects('.s1 > a#s1a2 span', ['s1a2s1'])
1789
1790 def test_attribute_equals(self):
1791 self.assertSelectMultiple(
1792 ('p[class="onep"]', ['p1']),
1793 ('p[id="p1"]', ['p1']),
1794 ('[class="onep"]', ['p1']),
1795 ('[id="p1"]', ['p1']),
1796 ('link[rel="stylesheet"]', ['l1']),
1797 ('link[type="text/css"]', ['l1']),
1798 ('link[href="blah.css"]', ['l1']),
1799 ('link[href="no-blah.css"]', []),
1800 ('[rel="stylesheet"]', ['l1']),
1801 ('[type="text/css"]', ['l1']),
1802 ('[href="blah.css"]', ['l1']),
1803 ('[href="no-blah.css"]', []),
1804 ('p[href="no-blah.css"]', []),
1805 ('[href="no-blah.css"]', []),
1806 )
1807
1808 def test_attribute_tilde(self):
1809 self.assertSelectMultiple(
1810 ('p[class~="class1"]', ['pmulti']),
1811 ('p[class~="class2"]', ['pmulti']),
1812 ('p[class~="class3"]', ['pmulti']),
1813 ('[class~="class1"]', ['pmulti']),
1814 ('[class~="class2"]', ['pmulti']),
1815 ('[class~="class3"]', ['pmulti']),
1816 ('a[rel~="friend"]', ['bob']),
1817 ('a[rel~="met"]', ['bob']),
1818 ('[rel~="friend"]', ['bob']),
1819 ('[rel~="met"]', ['bob']),
1820 )
1821
1822 def test_attribute_startswith(self):
1823 self.assertSelectMultiple(
1824 ('[rel^="style"]', ['l1']),
1825 ('link[rel^="style"]', ['l1']),
1826 ('notlink[rel^="notstyle"]', []),
1827 ('[rel^="notstyle"]', []),
1828 ('link[rel^="notstyle"]', []),
1829 ('link[href^="bla"]', ['l1']),
1830 ('a[href^="http://"]', ['bob', 'me']),
1831 ('[href^="http://"]', ['bob', 'me']),
1832 ('[id^="p"]', ['pmulti', 'p1']),
1833 ('[id^="m"]', ['me', 'main']),
1834 ('div[id^="m"]', ['main']),
1835 ('a[id^="m"]', ['me']),
1836 ('div[data-tag^="dashed"]', ['data1'])
1837 )
1838
1839 def test_attribute_endswith(self):
1840 self.assertSelectMultiple(
1841 ('[href$=".css"]', ['l1']),
1842 ('link[href$=".css"]', ['l1']),
1843 ('link[id$="1"]', ['l1']),
1844 ('[id$="1"]', ['data1', 'l1', 'p1', 'header1', 's1a1', 's2a1', 's1a2s1', 'dash1']),
1845 ('div[id$="1"]', ['data1']),
1846 ('[id$="noending"]', []),
1847 )
1848
1849 def test_attribute_contains(self):
1850 self.assertSelectMultiple(
1851 # From test_attribute_startswith
1852 ('[rel*="style"]', ['l1']),
1853 ('link[rel*="style"]', ['l1']),
1854 ('notlink[rel*="notstyle"]', []),
1855 ('[rel*="notstyle"]', []),
1856 ('link[rel*="notstyle"]', []),
1857 ('link[href*="bla"]', ['l1']),
1858 ('[href*="http://"]', ['bob', 'me']),
1859 ('[id*="p"]', ['pmulti', 'p1']),
1860 ('div[id*="m"]', ['main']),
1861 ('a[id*="m"]', ['me']),
1862 # From test_attribute_endswith
1863 ('[href*=".css"]', ['l1']),
1864 ('link[href*=".css"]', ['l1']),
1865 ('link[id*="1"]', ['l1']),
1866 ('[id*="1"]', ['data1', 'l1', 'p1', 'header1', 's1a1', 's1a2', 's2a1', 's1a2s1', 'dash1']),
1867 ('div[id*="1"]', ['data1']),
1868 ('[id*="noending"]', []),
1869 # New for this test
1870 ('[href*="."]', ['bob', 'me', 'l1']),
1871 ('a[href*="."]', ['bob', 'me']),
1872 ('link[href*="."]', ['l1']),
1873 ('div[id*="n"]', ['main', 'inner']),
1874 ('div[id*="nn"]', ['inner']),
1875 ('div[data-tag*="edval"]', ['data1'])
1876 )
1877
1878 def test_attribute_exact_or_hypen(self):
1879 self.assertSelectMultiple(
1880 ('p[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']),
1881 ('[lang|="en"]', ['lang-en', 'lang-en-gb', 'lang-en-us']),
1882 ('p[lang|="fr"]', ['lang-fr']),
1883 ('p[lang|="gb"]', []),
1884 )
1885
1886 def test_attribute_exists(self):
1887 self.assertSelectMultiple(
1888 ('[rel]', ['l1', 'bob', 'me']),
1889 ('link[rel]', ['l1']),
1890 ('a[rel]', ['bob', 'me']),
1891 ('[lang]', ['lang-en', 'lang-en-gb', 'lang-en-us', 'lang-fr']),
1892 ('p[class]', ['p1', 'pmulti']),
1893 ('[blah]', []),
1894 ('p[blah]', []),
1895 ('div[data-tag]', ['data1'])
1896 )
1897
1898 def test_unsupported_pseudoclass(self):
1899 self.assertRaises(
1900 NotImplementedError, self.soup.select, "a:no-such-pseudoclass")
1901
1902 self.assertRaises(
1903 NotImplementedError, self.soup.select, "a:nth-of-type(a)")
1904
1905
1906 def test_nth_of_type(self):
1907 # Try to select first paragraph
1908 els = self.soup.select('div#inner p:nth-of-type(1)')
1909 self.assertEqual(len(els), 1)
1910 self.assertEqual(els[0].string, 'Some text')
1911
1912 # Try to select third paragraph
1913 els = self.soup.select('div#inner p:nth-of-type(3)')
1914 self.assertEqual(len(els), 1)
1915 self.assertEqual(els[0].string, 'Another')
1916
1917 # Try to select (non-existent!) fourth paragraph
1918 els = self.soup.select('div#inner p:nth-of-type(4)')
1919 self.assertEqual(len(els), 0)
1920
1921 # Pass in an invalid value.
1922 self.assertRaises(
1923 ValueError, self.soup.select, 'div p:nth-of-type(0)')
1924
1925 def test_nth_of_type_direct_descendant(self):
1926 els = self.soup.select('div#inner > p:nth-of-type(1)')
1927 self.assertEqual(len(els), 1)
1928 self.assertEqual(els[0].string, 'Some text')
1929
1930 def test_id_child_selector_nth_of_type(self):
1931 self.assertSelects('#inner > p:nth-of-type(2)', ['p1'])
1932
1933 def test_select_on_element(self):
1934 # Other tests operate on the tree; this operates on an element
1935 # within the tree.
1936 inner = self.soup.find("div", id="main")
1937 selected = inner.select("div")
1938 # The <div id="inner"> tag was selected. The <div id="footer">
1939 # tag was not.
1940 self.assertSelectsIDs(selected, ['inner', 'data1'])
1941
1942 def test_overspecified_child_id(self):
1943 self.assertSelects(".fancy #inner", ['inner'])
1944 self.assertSelects(".normal #inner", [])
1945
1946 def test_adjacent_sibling_selector(self):
1947 self.assertSelects('#p1 + h2', ['header2'])
1948 self.assertSelects('#p1 + h2 + p', ['pmulti'])
1949 self.assertSelects('#p1 + #header2 + .class1', ['pmulti'])
1950 self.assertEqual([], self.soup.select('#p1 + p'))
1951
1952 def test_general_sibling_selector(self):
1953 self.assertSelects('#p1 ~ h2', ['header2', 'header3'])
1954 self.assertSelects('#p1 ~ #header2', ['header2'])
1955 self.assertSelects('#p1 ~ h2 + a', ['me'])
1956 self.assertSelects('#p1 ~ h2 + [rel="me"]', ['me'])
1957 self.assertEqual([], self.soup.select('#inner ~ h2'))
1958
1959 def test_dangling_combinator(self):
1960 self.assertRaises(ValueError, self.soup.select, 'h1 >')
1961
1962 def test_sibling_combinator_wont_select_same_tag_twice(self):
1963 self.assertSelects('p[lang] ~ p', ['lang-en-gb', 'lang-en-us', 'lang-fr'])
1964
1965 # Test the selector grouping operator (the comma)
1966 def test_multiple_select(self):
1967 self.assertSelects('x, y', ['xid', 'yid'])
1968
1969 def test_multiple_select_with_no_space(self):
1970 self.assertSelects('x,y', ['xid', 'yid'])
1971
1972 def test_multiple_select_with_more_space(self):
1973 self.assertSelects('x, y', ['xid', 'yid'])
1974
1975 def test_multiple_select_duplicated(self):
1976 self.assertSelects('x, x', ['xid'])
1977
1978 def test_multiple_select_sibling(self):
1979 self.assertSelects('x, y ~ p[lang=fr]', ['xid', 'lang-fr'])
1980
1981 def test_multiple_select_tag_and_direct_descendant(self):
1982 self.assertSelects('x, y > z', ['xid', 'zidb'])
1983
1984 def test_multiple_select_direct_descendant_and_tags(self):
1985 self.assertSelects('div > x, y, z', ['xid', 'yid', 'zida', 'zidb', 'zidab', 'zidac'])
1986
1987 def test_multiple_select_indirect_descendant(self):
1988 self.assertSelects('div x,y, z', ['xid', 'yid', 'zida', 'zidb', 'zidab', 'zidac'])
1989
1990 def test_invalid_multiple_select(self):
1991 self.assertRaises(ValueError, self.soup.select, ',x, y')
1992 self.assertRaises(ValueError, self.soup.select, 'x,,y')
1993
1994 def test_multiple_select_attrs(self):
1995 self.assertSelects('p[lang=en], p[lang=en-gb]', ['lang-en', 'lang-en-gb'])
1996
1997 def test_multiple_select_ids(self):
1998 self.assertSelects('x, y > z[id=zida], z[id=zidab], z[id=zidb]', ['xid', 'zidb', 'zidab'])
1999
2000 def test_multiple_select_nested(self):
2001 self.assertSelects('body > div > x, y > z', ['xid', 'zidb'])
2002
2003
2004