1 files changed, 31 insertions, 16 deletions
diff --git a/bitbake/lib/bs4/builder/_lxml.py b/bitbake/lib/bs4/builder/_lxml.py
index fa5d49875e..9c6c14ee65 100644
--- a/bitbake/lib/bs4/builder/_lxml.py
+++ b/bitbake/lib/bs4/builder/_lxml.py
@@ -4,10 +4,15 @@ __all__ = [
    ]
 from io import BytesIO
-from StringIO import StringIO
+from io import StringIO
 import collections
 from lxml import etree
-from bs4.element import Comment, Doctype, NamespacedAttribute
+from bs4.element import (
+    Comment,
+    Doctype,
+    NamespacedAttribute,
+    ProcessingInstruction,
+)
 from bs4.builder import (
    FAST,
    HTML,
@@ -25,8 +30,11 @@ class LXMLTreeBuilderForXML(TreeBuilder):
    is_xml = True
+    NAME = "lxml-xml"
+    ALTERNATE_NAMES = ["xml"]
    # Well, it's permissive by XML parser standards.
-    features = [LXML, XML, FAST, PERMISSIVE]
+    features = [NAME, LXML, XML, FAST, PERMISSIVE]
    CHUNK_SIZE = 512
@@ -70,6 +78,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
            return (None, tag)
    def prepare_markup(self, markup, user_specified_encoding=None,
+                       exclude_encodings=None,
                       document_declared_encoding=None):
        """
        :yield: A series of 4-tuples.
@@ -78,12 +87,12 @@ class LXMLTreeBuilderForXML(TreeBuilder):
        Each 4-tuple represents a strategy for parsing the document.
        """
-        if isinstance(markup, unicode):
+        if isinstance(markup, str):
            # We were given Unicode. Maybe lxml can parse Unicode on
            # this system?
            yield markup, None, document_declared_encoding, False
-        if isinstance(markup, unicode):
+        if isinstance(markup, str):
            # No, apparently not. Convert the Unicode to UTF-8 and
            # tell lxml to parse it as UTF-8.
            yield (markup.encode("utf8"), "utf8",
@@ -95,14 +104,15 @@ class LXMLTreeBuilderForXML(TreeBuilder):
        # the document as each one in turn.
        is_html = not self.is_xml
        try_encodings = [user_specified_encoding, document_declared_encoding]
-        detector = EncodingDetector(markup, try_encodings, is_html)
+        detector = EncodingDetector(
+            markup, try_encodings, is_html, exclude_encodings)
        for encoding in detector.encodings:
            yield (detector.markup, encoding, document_declared_encoding, False)
    def feed(self, markup):
        if isinstance(markup, bytes):
            markup = BytesIO(markup)
-        elif isinstance(markup, unicode):
+        elif isinstance(markup, str):
            markup = StringIO(markup)
        # Call feed() at least once, even if the markup is empty,
@@ -117,7 +127,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
                if len(data) != 0:
                    self.parser.feed(data)
            self.parser.close()
-        except (UnicodeDecodeError, LookupError, etree.ParserError), e:
+        except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
            raise ParserRejectedMarkup(str(e))
    def close(self):
@@ -135,12 +145,12 @@ class LXMLTreeBuilderForXML(TreeBuilder):
            self.nsmaps.append(None)
        elif len(nsmap) > 0:
            # A new namespace mapping has come into play.
-            inverted_nsmap = dict((value, key) for key, value in nsmap.items())
+            inverted_nsmap = dict((value, key) for key, value in list(nsmap.items()))
            self.nsmaps.append(inverted_nsmap)
            # Also treat the namespace mapping as a set of attributes on the
            # tag, so we can recreate it later.
            attrs = attrs.copy()
-            for prefix, namespace in nsmap.items():
+            for prefix, namespace in list(nsmap.items()):
                attribute = NamespacedAttribute(
                    "xmlns", prefix, "http://www.w3.org/2000/xmlns/")
                attrs[attribute] = namespace
@@ -149,7 +159,7 @@ class LXMLTreeBuilderForXML(TreeBuilder):
        # from lxml with namespaces attached to their names, and
        # turn then into NamespacedAttribute objects.
        new_attrs = {}
-        for attr, value in attrs.items():
+        for attr, value in list(attrs.items()):
            namespace, attr = self._getNsTag(attr)
            if namespace is None:
                new_attrs[attr] = value
@@ -189,7 +199,9 @@ class LXMLTreeBuilderForXML(TreeBuilder):
            self.nsmaps.pop()
    def pi(self, target, data):
-        pass
+        self.soup.endData()
+        self.soup.handle_data(target + ' ' + data)
+        self.soup.endData(ProcessingInstruction)
    def data(self, content):
        self.soup.handle_data(content)
@@ -207,12 +219,15 @@ class LXMLTreeBuilderForXML(TreeBuilder):
    def test_fragment_to_document(self, fragment):
        """See `TreeBuilder`."""
-        return u'<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment
+        return '<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment
 class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
-    features = [LXML, HTML, FAST, PERMISSIVE]
+    NAME = LXML
+    ALTERNATE_NAMES = ["lxml-html"]
+    features = ALTERNATE_NAMES + [NAME, HTML, FAST, PERMISSIVE]
    is_xml = False
    def default_parser(self, encoding):
@@ -224,10 +239,10 @@ class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML):
            self.parser = self.parser_for(encoding)
            self.parser.feed(markup)
            self.parser.close()
-        except (UnicodeDecodeError, LookupError, etree.ParserError), e:
+        except (UnicodeDecodeError, LookupError, etree.ParserError) as e:
            raise ParserRejectedMarkup(str(e))
    def test_fragment_to_document(self, fragment):
        """See `TreeBuilder`."""
-        return u'<html><body>%s</body></html>' % fragment
+        return '<html><body>%s</body></html>' % fragment