From 8d49bef632a0486e0172e543a6c2622398ed7a8c Mon Sep 17 00:00:00 2001 From: Richard Purdie Date: Fri, 6 May 2016 09:06:51 +0100 Subject: bitbake: bitbake/bs4: Upgrade 4.3.2 -> 4.4.1 (python 3 version) Upgrade to 4.4.1 which has been run through 2to3 as per the maintainers recommendation for v3 use. (Bitbake rev: f06e0f8052ba44eeb9ce701192cdf19252b2646d) Signed-off-by: Richard Purdie --- bitbake/lib/bs4/builder/_html5lib.py | 71 ++++++++++++++++++++++++++++++------ 1 file changed, 59 insertions(+), 12 deletions(-) (limited to 'bitbake/lib/bs4/builder/_html5lib.py') diff --git a/bitbake/lib/bs4/builder/_html5lib.py b/bitbake/lib/bs4/builder/_html5lib.py index 7de36ae75e..f0e5924ebb 100644 --- a/bitbake/lib/bs4/builder/_html5lib.py +++ b/bitbake/lib/bs4/builder/_html5lib.py @@ -2,6 +2,7 @@ __all__ = [ 'HTML5TreeBuilder', ] +from pdb import set_trace import warnings from bs4.builder import ( PERMISSIVE, @@ -9,7 +10,10 @@ from bs4.builder import ( HTML_5, HTMLTreeBuilder, ) -from bs4.element import NamespacedAttribute +from bs4.element import ( + NamespacedAttribute, + whitespace_re, +) import html5lib from html5lib.constants import namespaces from bs4.element import ( @@ -22,11 +26,20 @@ from bs4.element import ( class HTML5TreeBuilder(HTMLTreeBuilder): """Use html5lib to build a tree.""" - features = ['html5lib', PERMISSIVE, HTML_5, HTML] + NAME = "html5lib" + + features = [NAME, PERMISSIVE, HTML_5, HTML] - def prepare_markup(self, markup, user_specified_encoding): + def prepare_markup(self, markup, user_specified_encoding, + document_declared_encoding=None, exclude_encodings=None): # Store the user-specified encoding for use later on. self.user_specified_encoding = user_specified_encoding + + # document_declared_encoding and exclude_encodings aren't used + # ATM because the html5lib TreeBuilder doesn't use + # UnicodeDammit. + if exclude_encodings: + warnings.warn("You provided a value for exclude_encoding, but the html5lib tree builder doesn't support exclude_encoding.") yield (markup, None, None, False) # These methods are defined by Beautiful Soup. @@ -37,7 +50,7 @@ class HTML5TreeBuilder(HTMLTreeBuilder): doc = parser.parse(markup, encoding=self.user_specified_encoding) # Set the character encoding detected by the tokenizer. - if isinstance(markup, unicode): + if isinstance(markup, str): # We need to special-case this because html5lib sets # charEncoding to UTF-8 if it gets Unicode input. doc.original_encoding = None @@ -51,7 +64,7 @@ class HTML5TreeBuilder(HTMLTreeBuilder): def test_fragment_to_document(self, fragment): """See `TreeBuilder`.""" - return u'%s' % fragment + return '%s' % fragment class TreeBuilderForHtml5lib(html5lib.treebuilders._base.TreeBuilder): @@ -101,7 +114,16 @@ class AttrList(object): def __iter__(self): return list(self.attrs.items()).__iter__() def __setitem__(self, name, value): - "set attr", name, value + # If this attribute is a multi-valued attribute for this element, + # turn its value into a list. + list_attr = HTML5TreeBuilder.cdata_list_attributes + if (name in list_attr['*'] + or (self.element.name in list_attr + and name in list_attr[self.element.name])): + # A node that is being cloned may have already undergone + # this procedure. + if not isinstance(value, list): + value = whitespace_re.split(value) self.element[name] = value def items(self): return list(self.attrs.items()) @@ -124,7 +146,7 @@ class Element(html5lib.treebuilders._base.Node): def appendChild(self, node): string_child = child = None - if isinstance(node, basestring): + if isinstance(node, str): # Some other piece of code decided to pass in a string # instead of creating a TextElement object to contain the # string. @@ -139,7 +161,7 @@ class Element(html5lib.treebuilders._base.Node): else: child = node.element - if not isinstance(child, basestring) and child.parent is not None: + if not isinstance(child, str) and child.parent is not None: node.element.extract() if (string_child and self.element.contents @@ -152,7 +174,7 @@ class Element(html5lib.treebuilders._base.Node): old_element.replace_with(new_element) self.soup._most_recent_element = new_element else: - if isinstance(node, basestring): + if isinstance(node, str): # Create a brand new NavigableString from this string. child = self.soup.new_string(node) @@ -161,6 +183,12 @@ class Element(html5lib.treebuilders._base.Node): # immediately after the parent, if it has no children.) if self.element.contents: most_recent_element = self.element._last_descendant(False) + elif self.element.next_element is not None: + # Something from further ahead in the parse tree is + # being inserted into this earlier element. This is + # very annoying because it means an expensive search + # for the last element in the tree. + most_recent_element = self.soup._last_descendant() else: most_recent_element = self.element @@ -172,6 +200,7 @@ class Element(html5lib.treebuilders._base.Node): return AttrList(self.element) def setAttributes(self, attributes): + if attributes is not None and len(attributes) > 0: converted_attributes = [] @@ -183,7 +212,7 @@ class Element(html5lib.treebuilders._base.Node): self.soup.builder._replace_cdata_list_attribute_values( self.name, attributes) - for name, value in attributes.items(): + for name, value in list(attributes.items()): self.element[name] = value # The attributes may contain variables that need substitution. @@ -218,6 +247,9 @@ class Element(html5lib.treebuilders._base.Node): def reparentChildren(self, new_parent): """Move all of this tag's children into another tag.""" + # print "MOVE", self.element.contents + # print "FROM", self.element + # print "TO", new_parent.element element = self.element new_parent_element = new_parent.element # Determine what this tag's next_element will be once all the children @@ -236,17 +268,28 @@ class Element(html5lib.treebuilders._base.Node): new_parents_last_descendant_next_element = new_parent_element.next_element to_append = element.contents - append_after = new_parent.element.contents + append_after = new_parent_element.contents if len(to_append) > 0: # Set the first child's previous_element and previous_sibling # to elements within the new parent first_child = to_append[0] - first_child.previous_element = new_parents_last_descendant + if new_parents_last_descendant: + first_child.previous_element = new_parents_last_descendant + else: + first_child.previous_element = new_parent_element first_child.previous_sibling = new_parents_last_child + if new_parents_last_descendant: + new_parents_last_descendant.next_element = first_child + else: + new_parent_element.next_element = first_child + if new_parents_last_child: + new_parents_last_child.next_sibling = first_child # Fix the last child's next_element and next_sibling last_child = to_append[-1] last_child.next_element = new_parents_last_descendant_next_element + if new_parents_last_descendant_next_element: + new_parents_last_descendant_next_element.previous_element = last_child last_child.next_sibling = None for child in to_append: @@ -257,6 +300,10 @@ class Element(html5lib.treebuilders._base.Node): element.contents = [] element.next_element = final_next_element + # print "DONE WITH MOVE" + # print "FROM", self.element + # print "TO", new_parent_element + def cloneNode(self): tag = self.soup.new_tag(self.element.name, self.namespace) node = Element(tag, self.soup, self.namespace) -- cgit v1.2.3-54-g00ecf