diff options
Diffstat (limited to 'bitbake/lib/bs4/builder/_html5lib.py')
-rw-r--r-- | bitbake/lib/bs4/builder/_html5lib.py | 71 |
1 files changed, 59 insertions, 12 deletions
diff --git a/bitbake/lib/bs4/builder/_html5lib.py b/bitbake/lib/bs4/builder/_html5lib.py index 7de36ae75e..f0e5924ebb 100644 --- a/bitbake/lib/bs4/builder/_html5lib.py +++ b/bitbake/lib/bs4/builder/_html5lib.py | |||
@@ -2,6 +2,7 @@ __all__ = [ | |||
2 | 'HTML5TreeBuilder', | 2 | 'HTML5TreeBuilder', |
3 | ] | 3 | ] |
4 | 4 | ||
5 | from pdb import set_trace | ||
5 | import warnings | 6 | import warnings |
6 | from bs4.builder import ( | 7 | from bs4.builder import ( |
7 | PERMISSIVE, | 8 | PERMISSIVE, |
@@ -9,7 +10,10 @@ from bs4.builder import ( | |||
9 | HTML_5, | 10 | HTML_5, |
10 | HTMLTreeBuilder, | 11 | HTMLTreeBuilder, |
11 | ) | 12 | ) |
12 | from bs4.element import NamespacedAttribute | 13 | from bs4.element import ( |
14 | NamespacedAttribute, | ||
15 | whitespace_re, | ||
16 | ) | ||
13 | import html5lib | 17 | import html5lib |
14 | from html5lib.constants import namespaces | 18 | from html5lib.constants import namespaces |
15 | from bs4.element import ( | 19 | from bs4.element import ( |
@@ -22,11 +26,20 @@ from bs4.element import ( | |||
22 | class HTML5TreeBuilder(HTMLTreeBuilder): | 26 | class HTML5TreeBuilder(HTMLTreeBuilder): |
23 | """Use html5lib to build a tree.""" | 27 | """Use html5lib to build a tree.""" |
24 | 28 | ||
25 | features = ['html5lib', PERMISSIVE, HTML_5, HTML] | 29 | NAME = "html5lib" |
30 | |||
31 | features = [NAME, PERMISSIVE, HTML_5, HTML] | ||
26 | 32 | ||
27 | def prepare_markup(self, markup, user_specified_encoding): | 33 | def prepare_markup(self, markup, user_specified_encoding, |
34 | document_declared_encoding=None, exclude_encodings=None): | ||
28 | # Store the user-specified encoding for use later on. | 35 | # Store the user-specified encoding for use later on. |
29 | self.user_specified_encoding = user_specified_encoding | 36 | self.user_specified_encoding = user_specified_encoding |
37 | |||
38 | # document_declared_encoding and exclude_encodings aren't used | ||
39 | # ATM because the html5lib TreeBuilder doesn't use | ||
40 | # UnicodeDammit. | ||
41 | if exclude_encodings: | ||
42 | warnings.warn("You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.") | ||
30 | yield (markup, None, None, False) | 43 | yield (markup, None, None, False) |
31 | 44 | ||
32 | # These methods are defined by Beautiful Soup. | 45 | # These methods are defined by Beautiful Soup. |
@@ -37,7 +50,7 @@ class HTML5TreeBuilder(HTMLTreeBuilder): | |||
37 | doc = parser.parse(markup, encoding=self.user_specified_encoding) | 50 | doc = parser.parse(markup, encoding=self.user_specified_encoding) |
38 | 51 | ||
39 | # Set the character encoding detected by the tokenizer. | 52 | # Set the character encoding detected by the tokenizer. |
40 | if isinstance(markup, unicode): | 53 | if isinstance(markup, str): |
41 | # We need to special-case this because html5lib sets | 54 | # We need to special-case this because html5lib sets |
42 | # charEncoding to UTF-8 if it gets Unicode input. | 55 | # charEncoding to UTF-8 if it gets Unicode input. |
43 | doc.original_encoding = None | 56 | doc.original_encoding = None |
@@ -51,7 +64,7 @@ class HTML5TreeBuilder(HTMLTreeBuilder): | |||
51 | 64 | ||
52 | def test_fragment_to_document(self, fragment): | 65 | def test_fragment_to_document(self, fragment): |
53 | """See `TreeBuilder`.""" | 66 | """See `TreeBuilder`.""" |
54 | return u'<html><head></head><body>%s</body></html>' % fragment | 67 | return '<html><head></head><body>%s</body></html>' % fragment |
55 | 68 | ||
56 | 69 | ||
57 | class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder): | 70 | class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder): |
@@ -101,7 +114,16 @@ class AttrList(object): | |||
101 | def __iter__(self): | 114 | def __iter__(self): |
102 | return list(self.attrs.items()).__iter__() | 115 | return list(self.attrs.items()).__iter__() |
103 | def __setitem__(self, name, value): | 116 | def __setitem__(self, name, value): |
104 | "set attr", name, value | 117 | # If this attribute is a multi-valued attribute for this element, |
118 | # turn its value into a list. | ||
119 | list_attr = HTML5TreeBuilder.cdata_list_attributes | ||
120 | if (name in list_attr['*'] | ||
121 | or (self.element.name in list_attr | ||
122 | and name in list_attr[self.element.name])): | ||
123 | # A node that is being cloned may have already undergone | ||
124 | # this procedure. | ||
125 | if not isinstance(value, list): | ||
126 | value = whitespace_re.split(value) | ||
105 | self.element[name] = value | 127 | self.element[name] = value |
106 | def items(self): | 128 | def items(self): |
107 | return list(self.attrs.items()) | 129 | return list(self.attrs.items()) |
@@ -124,7 +146,7 @@ class Element(html5lib.treebuilders._base.Node): | |||
124 | 146 | ||
125 | def appendChild(self, node): | 147 | def appendChild(self, node): |
126 | string_child = child = None | 148 | string_child = child = None |
127 | if isinstance(node, basestring): | 149 | if isinstance(node, str): |
128 | # Some other piece of code decided to pass in a string | 150 | # Some other piece of code decided to pass in a string |
129 | # instead of creating a TextElement object to contain the | 151 | # instead of creating a TextElement object to contain the |
130 | # string. | 152 | # string. |
@@ -139,7 +161,7 @@ class Element(html5lib.treebuilders._base.Node): | |||
139 | else: | 161 | else: |
140 | child = node.element | 162 | child = node.element |
141 | 163 | ||
142 | if not isinstance(child, basestring) and child.parent is not None: | 164 | if not isinstance(child, str) and child.parent is not None: |
143 | node.element.extract() | 165 | node.element.extract() |
144 | 166 | ||
145 | if (string_child and self.element.contents | 167 | if (string_child and self.element.contents |
@@ -152,7 +174,7 @@ class Element(html5lib.treebuilders._base.Node): | |||
152 | old_element.replace_with(new_element) | 174 | old_element.replace_with(new_element) |
153 | self.soup._most_recent_element = new_element | 175 | self.soup._most_recent_element = new_element |
154 | else: | 176 | else: |
155 | if isinstance(node, basestring): | 177 | if isinstance(node, str): |
156 | # Create a brand new NavigableString from this string. | 178 | # Create a brand new NavigableString from this string. |
157 | child = self.soup.new_string(node) | 179 | child = self.soup.new_string(node) |
158 | 180 | ||
@@ -161,6 +183,12 @@ class Element(html5lib.treebuilders._base.Node): | |||
161 | # immediately after the parent, if it has no children.) | 183 | # immediately after the parent, if it has no children.) |
162 | if self.element.contents: | 184 | if self.element.contents: |
163 | most_recent_element = self.element._last_descendant(False) | 185 | most_recent_element = self.element._last_descendant(False) |
186 | elif self.element.next_element is not None: | ||
187 | # Something from further ahead in the parse tree is | ||
188 | # being inserted into this earlier element. This is | ||
189 | # very annoying because it means an expensive search | ||
190 | # for the last element in the tree. | ||
191 | most_recent_element = self.soup._last_descendant() | ||
164 | else: | 192 | else: |
165 | most_recent_element = self.element | 193 | most_recent_element = self.element |
166 | 194 | ||
@@ -172,6 +200,7 @@ class Element(html5lib.treebuilders._base.Node): | |||
172 | return AttrList(self.element) | 200 | return AttrList(self.element) |
173 | 201 | ||
174 | def setAttributes(self, attributes): | 202 | def setAttributes(self, attributes): |
203 | |||
175 | if attributes is not None and len(attributes) > 0: | 204 | if attributes is not None and len(attributes) > 0: |
176 | 205 | ||
177 | converted_attributes = [] | 206 | converted_attributes = [] |
@@ -183,7 +212,7 @@ class Element(html5lib.treebuilders._base.Node): | |||
183 | 212 | ||
184 | self.soup.builder._replace_cdata_list_attribute_values( | 213 | self.soup.builder._replace_cdata_list_attribute_values( |
185 | self.name, attributes) | 214 | self.name, attributes) |
186 | for name, value in attributes.items(): | 215 | for name, value in list(attributes.items()): |
187 | self.element[name] = value | 216 | self.element[name] = value |
188 | 217 | ||
189 | # The attributes may contain variables that need substitution. | 218 | # The attributes may contain variables that need substitution. |
@@ -218,6 +247,9 @@ class Element(html5lib.treebuilders._base.Node): | |||
218 | 247 | ||
219 | def reparentChildren(self, new_parent): | 248 | def reparentChildren(self, new_parent): |
220 | """Move all of this tag's children into another tag.""" | 249 | """Move all of this tag's children into another tag.""" |
250 | # print "MOVE", self.element.contents | ||
251 | # print "FROM", self.element | ||
252 | # print "TO", new_parent.element | ||
221 | element = self.element | 253 | element = self.element |
222 | new_parent_element = new_parent.element | 254 | new_parent_element = new_parent.element |
223 | # Determine what this tag's next_element will be once all the children | 255 | # Determine what this tag's next_element will be once all the children |
@@ -236,17 +268,28 @@ class Element(html5lib.treebuilders._base.Node): | |||
236 | new_parents_last_descendant_next_element = new_parent_element.next_element | 268 | new_parents_last_descendant_next_element = new_parent_element.next_element |
237 | 269 | ||
238 | to_append = element.contents | 270 | to_append = element.contents |
239 | append_after = new_parent.element.contents | 271 | append_after = new_parent_element.contents |
240 | if len(to_append) > 0: | 272 | if len(to_append) > 0: |
241 | # Set the first child's previous_element and previous_sibling | 273 | # Set the first child's previous_element and previous_sibling |
242 | # to elements within the new parent | 274 | # to elements within the new parent |
243 | first_child = to_append[0] | 275 | first_child = to_append[0] |
244 | first_child.previous_element = new_parents_last_descendant | 276 | if new_parents_last_descendant: |
277 | first_child.previous_element = new_parents_last_descendant | ||
278 | else: | ||
279 | first_child.previous_element = new_parent_element | ||
245 | first_child.previous_sibling = new_parents_last_child | 280 | first_child.previous_sibling = new_parents_last_child |
281 | if new_parents_last_descendant: | ||
282 | new_parents_last_descendant.next_element = first_child | ||
283 | else: | ||
284 | new_parent_element.next_element = first_child | ||
285 | if new_parents_last_child: | ||
286 | new_parents_last_child.next_sibling = first_child | ||
246 | 287 | ||
247 | # Fix the last child's next_element and next_sibling | 288 | # Fix the last child's next_element and next_sibling |
248 | last_child = to_append[-1] | 289 | last_child = to_append[-1] |
249 | last_child.next_element = new_parents_last_descendant_next_element | 290 | last_child.next_element = new_parents_last_descendant_next_element |
291 | if new_parents_last_descendant_next_element: | ||
292 | new_parents_last_descendant_next_element.previous_element = last_child | ||
250 | last_child.next_sibling = None | 293 | last_child.next_sibling = None |
251 | 294 | ||
252 | for child in to_append: | 295 | for child in to_append: |
@@ -257,6 +300,10 @@ class Element(html5lib.treebuilders._base.Node): | |||
257 | element.contents = [] | 300 | element.contents = [] |
258 | element.next_element = final_next_element | 301 | element.next_element = final_next_element |
259 | 302 | ||
303 | # print "DONE WITH MOVE" | ||
304 | # print "FROM", self.element | ||
305 | # print "TO", new_parent_element | ||
306 | |||
260 | def cloneNode(self): | 307 | def cloneNode(self): |
261 | tag = self.soup.new_tag(self.element.name, self.namespace) | 308 | tag = self.soup.new_tag(self.element.name, self.namespace) |
262 | node = Element(tag, self.soup, self.namespace) | 309 | node = Element(tag, self.soup, self.namespace) |