diff options
| author | Richard Purdie <richard.purdie@linuxfoundation.org> | 2016-05-06 09:06:51 +0100 |
|---|---|---|
| committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2016-06-02 08:24:02 +0100 |
| commit | 822eabf32dd69346071bd25fc3639db252d2f346 (patch) | |
| tree | edac6d1d0d5114a4e3c72fea5589c069453b72d2 /bitbake/lib/bs4/builder/_lxml.py | |
| parent | 4f8959324df3b89487973bd4e8de21debb0a12ef (diff) | |
| download | poky-822eabf32dd69346071bd25fc3639db252d2f346.tar.gz | |
bitbake: bitbake/bs4: Upgrade 4.3.2 -> 4.4.1 (python 3 version)
Upgrade to 4.4.1 which has been run through 2to3 as per the maintainers
recommendation for v3 use.
(Bitbake rev: 2f4b98af93c971a8c466ffaf3c09cca0edb6e3ad)
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'bitbake/lib/bs4/builder/_lxml.py')
| -rw-r--r-- | bitbake/lib/bs4/builder/_lxml.py | 47 |
1 files changed, 31 insertions, 16 deletions
diff --git a/bitbake/lib/bs4/builder/_lxml.py b/bitbake/lib/bs4/builder/_lxml.py index fa5d49875e..9c6c14ee65 100644 --- a/bitbake/lib/bs4/builder/_lxml.py +++ b/bitbake/lib/bs4/builder/_lxml.py | |||
| @@ -4,10 +4,15 @@ __all__ = [ | |||
| 4 | ] | 4 | ] |
| 5 | 5 | ||
| 6 | from io import BytesIO | 6 | from io import BytesIO |
| 7 | from StringIO import StringIO | 7 | from io import StringIO |
| 8 | import collections | 8 | import collections |
| 9 | from lxml import etree | 9 | from lxml import etree |
| 10 | from bs4.element import Comment, Doctype, NamespacedAttribute | 10 | from bs4.element import ( |
| 11 | Comment, | ||
| 12 | Doctype, | ||
| 13 | NamespacedAttribute, | ||
| 14 | ProcessingInstruction, | ||
| 15 | ) | ||
| 11 | from bs4.builder import ( | 16 | from bs4.builder import ( |
| 12 | FAST, | 17 | FAST, |
| 13 | HTML, | 18 | HTML, |
| @@ -25,8 +30,11 @@ class LXMLTreeBuilderForXML(TreeBuilder): | |||
| 25 | 30 | ||
| 26 | is_xml = True | 31 | is_xml = True |
| 27 | 32 | ||
| 33 | NAME = "lxml-xml" | ||
| 34 | ALTERNATE_NAMES = ["xml"] | ||
| 35 | |||
| 28 | # Well, it's permissive by XML parser standards. | 36 | # Well, it's permissive by XML parser standards. |
| 29 | features = [LXML, XML, FAST, PERMISSIVE] | 37 | features = [NAME, LXML, XML, FAST, PERMISSIVE] |
| 30 | 38 | ||
| 31 | CHUNK_SIZE = 512 | 39 | CHUNK_SIZE = 512 |
| 32 | 40 | ||
| @@ -70,6 +78,7 @@ class LXMLTreeBuilderForXML(TreeBuilder): | |||
| 70 | return (None, tag) | 78 | return (None, tag) |
| 71 | 79 | ||
| 72 | def prepare_markup(self, markup, user_specified_encoding=None, | 80 | def prepare_markup(self, markup, user_specified_encoding=None, |
| 81 | exclude_encodings=None, | ||
| 73 | document_declared_encoding=None): | 82 | document_declared_encoding=None): |
| 74 | """ | 83 | """ |
| 75 | :yield: A series of 4-tuples. | 84 | :yield: A series of 4-tuples. |
| @@ -78,12 +87,12 @@ class LXMLTreeBuilderForXML(TreeBuilder): | |||
| 78 | 87 | ||
| 79 | Each 4-tuple represents a strategy for parsing the document. | 88 | Each 4-tuple represents a strategy for parsing the document. |
| 80 | """ | 89 | """ |
| 81 | if isinstance(markup, unicode): | 90 | if isinstance(markup, str): |
| 82 | # We were given Unicode. Maybe lxml can parse Unicode on | 91 | # We were given Unicode. Maybe lxml can parse Unicode on |
| 83 | # this system? | 92 | # this system? |
| 84 | yield markup, None, document_declared_encoding, False | 93 | yield markup, None, document_declared_encoding, False |
| 85 | 94 | ||
| 86 | if isinstance(markup, unicode): | 95 | if isinstance(markup, str): |
| 87 | # No, apparently not. Convert the Unicode to UTF-8 and | 96 | # No, apparently not. Convert the Unicode to UTF-8 and |
| 88 | # tell lxml to parse it as UTF-8. | 97 | # tell lxml to parse it as UTF-8. |
| 89 | yield (markup.encode("utf8"), "utf8", | 98 | yield (markup.encode("utf8"), "utf8", |
| @@ -95,14 +104,15 @@ class LXMLTreeBuilderForXML(TreeBuilder): | |||
| 95 | # the document as each one in turn. | 104 | # the document as each one in turn. |
| 96 | is_html = not self.is_xml | 105 | is_html = not self.is_xml |
| 97 | try_encodings = [user_specified_encoding, document_declared_encoding] | 106 | try_encodings = [user_specified_encoding, document_declared_encoding] |
| 98 | detector = EncodingDetector(markup, try_encodings, is_html) | 107 | detector = EncodingDetector( |
| 108 | markup, try_encodings, is_html, exclude_encodings) | ||
| 99 | for encoding in detector.encodings: | 109 | for encoding in detector.encodings: |
| 100 | yield (detector.markup, encoding, document_declared_encoding, False) | 110 | yield (detector.markup, encoding, document_declared_encoding, False) |
| 101 | 111 | ||
| 102 | def feed(self, markup): | 112 | def feed(self, markup): |
| 103 | if isinstance(markup, bytes): | 113 | if isinstance(markup, bytes): |
| 104 | markup = BytesIO(markup) | 114 | markup = BytesIO(markup) |
| 105 | elif isinstance(markup, unicode): | 115 | elif isinstance(markup, str): |
| 106 | markup = StringIO(markup) | 116 | markup = StringIO(markup) |
| 107 | 117 | ||
| 108 | # Call feed() at least once, even if the markup is empty, | 118 | # Call feed() at least once, even if the markup is empty, |
| @@ -117,7 +127,7 @@ class LXMLTreeBuilderForXML(TreeBuilder): | |||
| 117 | if len(data) != 0: | 127 | if len(data) != 0: |
| 118 | self.parser.feed(data) | 128 | self.parser.feed(data) |
| 119 | self.parser.close() | 129 | self.parser.close() |
| 120 | except (UnicodeDecodeError, LookupError, etree.ParserError), e: | 130 | except (UnicodeDecodeError, LookupError, etree.ParserError) as e: |
| 121 | raise ParserRejectedMarkup(str(e)) | 131 | raise ParserRejectedMarkup(str(e)) |
| 122 | 132 | ||
| 123 | def close(self): | 133 | def close(self): |
| @@ -135,12 +145,12 @@ class LXMLTreeBuilderForXML(TreeBuilder): | |||
| 135 | self.nsmaps.append(None) | 145 | self.nsmaps.append(None) |
| 136 | elif len(nsmap) > 0: | 146 | elif len(nsmap) > 0: |
| 137 | # A new namespace mapping has come into play. | 147 | # A new namespace mapping has come into play. |
| 138 | inverted_nsmap = dict((value, key) for key, value in nsmap.items()) | 148 | inverted_nsmap = dict((value, key) for key, value in list(nsmap.items())) |
| 139 | self.nsmaps.append(inverted_nsmap) | 149 | self.nsmaps.append(inverted_nsmap) |
| 140 | # Also treat the namespace mapping as a set of attributes on the | 150 | # Also treat the namespace mapping as a set of attributes on the |
| 141 | # tag, so we can recreate it later. | 151 | # tag, so we can recreate it later. |
| 142 | attrs = attrs.copy() | 152 | attrs = attrs.copy() |
| 143 | for prefix, namespace in nsmap.items(): | 153 | for prefix, namespace in list(nsmap.items()): |
| 144 | attribute = NamespacedAttribute( | 154 | attribute = NamespacedAttribute( |
| 145 | "xmlns", prefix, "http://www.w3.org/2000/xmlns/") | 155 | "xmlns", prefix, "http://www.w3.org/2000/xmlns/") |
| 146 | attrs[attribute] = namespace | 156 | attrs[attribute] = namespace |
| @@ -149,7 +159,7 @@ class LXMLTreeBuilderForXML(TreeBuilder): | |||
| 149 | # from lxml with namespaces attached to their names, and | 159 | # from lxml with namespaces attached to their names, and |
| 150 | # turn then into NamespacedAttribute objects. | 160 | # turn then into NamespacedAttribute objects. |
| 151 | new_attrs = {} | 161 | new_attrs = {} |
| 152 | for attr, value in attrs.items(): | 162 | for attr, value in list(attrs.items()): |
| 153 | namespace, attr = self._getNsTag(attr) | 163 | namespace, attr = self._getNsTag(attr) |
| 154 | if namespace is None: | 164 | if namespace is None: |
| 155 | new_attrs[attr] = value | 165 | new_attrs[attr] = value |
| @@ -189,7 +199,9 @@ class LXMLTreeBuilderForXML(TreeBuilder): | |||
| 189 | self.nsmaps.pop() | 199 | self.nsmaps.pop() |
| 190 | 200 | ||
| 191 | def pi(self, target, data): | 201 | def pi(self, target, data): |
| 192 | pass | 202 | self.soup.endData() |
| 203 | self.soup.handle_data(target + ' ' + data) | ||
| 204 | self.soup.endData(ProcessingInstruction) | ||
| 193 | 205 | ||
| 194 | def data(self, content): | 206 | def data(self, content): |
| 195 | self.soup.handle_data(content) | 207 | self.soup.handle_data(content) |
| @@ -207,12 +219,15 @@ class LXMLTreeBuilderForXML(TreeBuilder): | |||
| 207 | 219 | ||
| 208 | def test_fragment_to_document(self, fragment): | 220 | def test_fragment_to_document(self, fragment): |
| 209 | """See `TreeBuilder`.""" | 221 | """See `TreeBuilder`.""" |
| 210 | return u'<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment | 222 | return '<?xml version="1.0" encoding="utf-8"?>\n%s' % fragment |
| 211 | 223 | ||
| 212 | 224 | ||
| 213 | class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML): | 225 | class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML): |
| 214 | 226 | ||
| 215 | features = [LXML, HTML, FAST, PERMISSIVE] | 227 | NAME = LXML |
| 228 | ALTERNATE_NAMES = ["lxml-html"] | ||
| 229 | |||
| 230 | features = ALTERNATE_NAMES + [NAME, HTML, FAST, PERMISSIVE] | ||
| 216 | is_xml = False | 231 | is_xml = False |
| 217 | 232 | ||
| 218 | def default_parser(self, encoding): | 233 | def default_parser(self, encoding): |
| @@ -224,10 +239,10 @@ class LXMLTreeBuilder(HTMLTreeBuilder, LXMLTreeBuilderForXML): | |||
| 224 | self.parser = self.parser_for(encoding) | 239 | self.parser = self.parser_for(encoding) |
| 225 | self.parser.feed(markup) | 240 | self.parser.feed(markup) |
| 226 | self.parser.close() | 241 | self.parser.close() |
| 227 | except (UnicodeDecodeError, LookupError, etree.ParserError), e: | 242 | except (UnicodeDecodeError, LookupError, etree.ParserError) as e: |
| 228 | raise ParserRejectedMarkup(str(e)) | 243 | raise ParserRejectedMarkup(str(e)) |
| 229 | 244 | ||
| 230 | 245 | ||
| 231 | def test_fragment_to_document(self, fragment): | 246 | def test_fragment_to_document(self, fragment): |
| 232 | """See `TreeBuilder`.""" | 247 | """See `TreeBuilder`.""" |
| 233 | return u'<html><body>%s</body></html>' % fragment | 248 | return '<html><body>%s</body></html>' % fragment |
