summaryrefslogtreecommitdiffstats
path: root/bitbake/lib/bs4/builder/_html5lib.py
diff options
context:
space:
mode:
Diffstat (limited to 'bitbake/lib/bs4/builder/_html5lib.py')
-rw-r--r--bitbake/lib/bs4/builder/_html5lib.py71
1 files changed, 59 insertions, 12 deletions
diff --git a/bitbake/lib/bs4/builder/_html5lib.py b/bitbake/lib/bs4/builder/_html5lib.py
index 7de36ae75e..f0e5924ebb 100644
--- a/bitbake/lib/bs4/builder/_html5lib.py
+++ b/bitbake/lib/bs4/builder/_html5lib.py
@@ -2,6 +2,7 @@ __all__ = [
2 'HTML5TreeBuilder', 2 'HTML5TreeBuilder',
3 ] 3 ]
4 4
5from pdb import set_trace
5import warnings 6import warnings
6from bs4.builder import ( 7from bs4.builder import (
7 PERMISSIVE, 8 PERMISSIVE,
@@ -9,7 +10,10 @@ from bs4.builder import (
9 HTML_5, 10 HTML_5,
10 HTMLTreeBuilder, 11 HTMLTreeBuilder,
11 ) 12 )
12from bs4.element import NamespacedAttribute 13from bs4.element import (
14 NamespacedAttribute,
15 whitespace_re,
16)
13import html5lib 17import html5lib
14from html5lib.constants import namespaces 18from html5lib.constants import namespaces
15from bs4.element import ( 19from bs4.element import (
@@ -22,11 +26,20 @@ from bs4.element import (
22class HTML5TreeBuilder(HTMLTreeBuilder): 26class HTML5TreeBuilder(HTMLTreeBuilder):
23 """Use html5lib to build a tree.""" 27 """Use html5lib to build a tree."""
24 28
25 features = ['html5lib', PERMISSIVE, HTML_5, HTML] 29 NAME = "html5lib"
30
31 features = [NAME, PERMISSIVE, HTML_5, HTML]
26 32
27 def prepare_markup(self, markup, user_specified_encoding): 33 def prepare_markup(self, markup, user_specified_encoding,
34 document_declared_encoding=None, exclude_encodings=None):
28 # Store the user-specified encoding for use later on. 35 # Store the user-specified encoding for use later on.
29 self.user_specified_encoding = user_specified_encoding 36 self.user_specified_encoding = user_specified_encoding
37
38 # document_declared_encoding and exclude_encodings aren't used
39 # ATM because the html5lib TreeBuilder doesn't use
40 # UnicodeDammit.
41 if exclude_encodings:
42 warnings.warn("You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.")
30 yield (markup, None, None, False) 43 yield (markup, None, None, False)
31 44
32 # These methods are defined by Beautiful Soup. 45 # These methods are defined by Beautiful Soup.
@@ -37,7 +50,7 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
37 doc = parser.parse(markup, encoding=self.user_specified_encoding) 50 doc = parser.parse(markup, encoding=self.user_specified_encoding)
38 51
39 # Set the character encoding detected by the tokenizer. 52 # Set the character encoding detected by the tokenizer.
40 if isinstance(markup, unicode): 53 if isinstance(markup, str):
41 # We need to special-case this because html5lib sets 54 # We need to special-case this because html5lib sets
42 # charEncoding to UTF-8 if it gets Unicode input. 55 # charEncoding to UTF-8 if it gets Unicode input.
43 doc.original_encoding = None 56 doc.original_encoding = None
@@ -51,7 +64,7 @@ class HTML5TreeBuilder(HTMLTreeBuilder):
51 64
52 def test_fragment_to_document(self, fragment): 65 def test_fragment_to_document(self, fragment):
53 """See `TreeBuilder`.""" 66 """See `TreeBuilder`."""
54 return u'<html><head></head><body>%s</body></html>' % fragment 67 return '<html><head></head><body>%s</body></html>' % fragment
55 68
56 69
57class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder): 70class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder):
@@ -101,7 +114,16 @@ class AttrList(object):
101 def __iter__(self): 114 def __iter__(self):
102 return list(self.attrs.items()).__iter__() 115 return list(self.attrs.items()).__iter__()
103 def __setitem__(self, name, value): 116 def __setitem__(self, name, value):
104 "set attr", name, value 117 # If this attribute is a multi-valued attribute for this element,
118 # turn its value into a list.
119 list_attr = HTML5TreeBuilder.cdata_list_attributes
120 if (name in list_attr['*']
121 or (self.element.name in list_attr
122 and name in list_attr[self.element.name])):
123 # A node that is being cloned may have already undergone
124 # this procedure.
125 if not isinstance(value, list):
126 value = whitespace_re.split(value)
105 self.element[name] = value 127 self.element[name] = value
106 def items(self): 128 def items(self):
107 return list(self.attrs.items()) 129 return list(self.attrs.items())
@@ -124,7 +146,7 @@ class Element(html5lib.treebuilders._base.Node):
124 146
125 def appendChild(self, node): 147 def appendChild(self, node):
126 string_child = child = None 148 string_child = child = None
127 if isinstance(node, basestring): 149 if isinstance(node, str):
128 # Some other piece of code decided to pass in a string 150 # Some other piece of code decided to pass in a string
129 # instead of creating a TextElement object to contain the 151 # instead of creating a TextElement object to contain the
130 # string. 152 # string.
@@ -139,7 +161,7 @@ class Element(html5lib.treebuilders._base.Node):
139 else: 161 else:
140 child = node.element 162 child = node.element
141 163
142 if not isinstance(child, basestring) and child.parent is not None: 164 if not isinstance(child, str) and child.parent is not None:
143 node.element.extract() 165 node.element.extract()
144 166
145 if (string_child and self.element.contents 167 if (string_child and self.element.contents
@@ -152,7 +174,7 @@ class Element(html5lib.treebuilders._base.Node):
152 old_element.replace_with(new_element) 174 old_element.replace_with(new_element)
153 self.soup._most_recent_element = new_element 175 self.soup._most_recent_element = new_element
154 else: 176 else:
155 if isinstance(node, basestring): 177 if isinstance(node, str):
156 # Create a brand new NavigableString from this string. 178 # Create a brand new NavigableString from this string.
157 child = self.soup.new_string(node) 179 child = self.soup.new_string(node)
158 180
@@ -161,6 +183,12 @@ class Element(html5lib.treebuilders._base.Node):
161 # immediately after the parent, if it has no children.) 183 # immediately after the parent, if it has no children.)
162 if self.element.contents: 184 if self.element.contents:
163 most_recent_element = self.element._last_descendant(False) 185 most_recent_element = self.element._last_descendant(False)
186 elif self.element.next_element is not None:
187 # Something from further ahead in the parse tree is
188 # being inserted into this earlier element. This is
189 # very annoying because it means an expensive search
190 # for the last element in the tree.
191 most_recent_element = self.soup._last_descendant()
164 else: 192 else:
165 most_recent_element = self.element 193 most_recent_element = self.element
166 194
@@ -172,6 +200,7 @@ class Element(html5lib.treebuilders._base.Node):
172 return AttrList(self.element) 200 return AttrList(self.element)
173 201
174 def setAttributes(self, attributes): 202 def setAttributes(self, attributes):
203
175 if attributes is not None and len(attributes) > 0: 204 if attributes is not None and len(attributes) > 0:
176 205
177 converted_attributes = [] 206 converted_attributes = []
@@ -183,7 +212,7 @@ class Element(html5lib.treebuilders._base.Node):
183 212
184 self.soup.builder._replace_cdata_list_attribute_values( 213 self.soup.builder._replace_cdata_list_attribute_values(
185 self.name, attributes) 214 self.name, attributes)
186 for name, value in attributes.items(): 215 for name, value in list(attributes.items()):
187 self.element[name] = value 216 self.element[name] = value
188 217
189 # The attributes may contain variables that need substitution. 218 # The attributes may contain variables that need substitution.
@@ -218,6 +247,9 @@ class Element(html5lib.treebuilders._base.Node):
218 247
219 def reparentChildren(self, new_parent): 248 def reparentChildren(self, new_parent):
220 """Move all of this tag's children into another tag.""" 249 """Move all of this tag's children into another tag."""
250 # print "MOVE", self.element.contents
251 # print "FROM", self.element
252 # print "TO", new_parent.element
221 element = self.element 253 element = self.element
222 new_parent_element = new_parent.element 254 new_parent_element = new_parent.element
223 # Determine what this tag's next_element will be once all the children 255 # Determine what this tag's next_element will be once all the children
@@ -236,17 +268,28 @@ class Element(html5lib.treebuilders._base.Node):
236 new_parents_last_descendant_next_element = new_parent_element.next_element 268 new_parents_last_descendant_next_element = new_parent_element.next_element
237 269
238 to_append = element.contents 270 to_append = element.contents
239 append_after = new_parent.element.contents 271 append_after = new_parent_element.contents
240 if len(to_append) > 0: 272 if len(to_append) > 0:
241 # Set the first child's previous_element and previous_sibling 273 # Set the first child's previous_element and previous_sibling
242 # to elements within the new parent 274 # to elements within the new parent
243 first_child = to_append[0] 275 first_child = to_append[0]
244 first_child.previous_element = new_parents_last_descendant 276 if new_parents_last_descendant:
277 first_child.previous_element = new_parents_last_descendant
278 else:
279 first_child.previous_element = new_parent_element
245 first_child.previous_sibling = new_parents_last_child 280 first_child.previous_sibling = new_parents_last_child
281 if new_parents_last_descendant:
282 new_parents_last_descendant.next_element = first_child
283 else:
284 new_parent_element.next_element = first_child
285 if new_parents_last_child:
286 new_parents_last_child.next_sibling = first_child
246 287
247 # Fix the last child's next_element and next_sibling 288 # Fix the last child's next_element and next_sibling
248 last_child = to_append[-1] 289 last_child = to_append[-1]
249 last_child.next_element = new_parents_last_descendant_next_element 290 last_child.next_element = new_parents_last_descendant_next_element
291 if new_parents_last_descendant_next_element:
292 new_parents_last_descendant_next_element.previous_element = last_child
250 last_child.next_sibling = None 293 last_child.next_sibling = None
251 294
252 for child in to_append: 295 for child in to_append:
@@ -257,6 +300,10 @@ class Element(html5lib.treebuilders._base.Node):
257 element.contents = [] 300 element.contents = []
258 element.next_element = final_next_element 301 element.next_element = final_next_element
259 302
303 # print "DONE WITH MOVE"
304 # print "FROM", self.element
305 # print "TO", new_parent_element
306
260 def cloneNode(self): 307 def cloneNode(self):
261 tag = self.soup.new_tag(self.element.name, self.namespace) 308 tag = self.soup.new_tag(self.element.name, self.namespace)
262 node = Element(tag, self.soup, self.namespace) 309 node = Element(tag, self.soup, self.namespace)