diff options
| author | Richard Purdie <richard.purdie@linuxfoundation.org> | 2016-05-06 09:06:51 +0100 |
|---|---|---|
| committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2016-06-02 08:24:02 +0100 |
| commit | 822eabf32dd69346071bd25fc3639db252d2f346 (patch) | |
| tree | edac6d1d0d5114a4e3c72fea5589c069453b72d2 /bitbake/lib/bs4/__init__.py | |
| parent | 4f8959324df3b89487973bd4e8de21debb0a12ef (diff) | |
| download | poky-822eabf32dd69346071bd25fc3639db252d2f346.tar.gz | |
bitbake: bitbake/bs4: Upgrade 4.3.2 -> 4.4.1 (python 3 version)
Upgrade to 4.4.1 which has been run through 2to3 as per the maintainers
recommendation for v3 use.
(Bitbake rev: 2f4b98af93c971a8c466ffaf3c09cca0edb6e3ad)
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'bitbake/lib/bs4/__init__.py')
| -rw-r--r-- | bitbake/lib/bs4/__init__.py | 112 |
1 files changed, 87 insertions, 25 deletions
diff --git a/bitbake/lib/bs4/__init__.py b/bitbake/lib/bs4/__init__.py index 7ba34269af..f6fdfd50b1 100644 --- a/bitbake/lib/bs4/__init__.py +++ b/bitbake/lib/bs4/__init__.py | |||
| @@ -17,8 +17,8 @@ http://www.crummy.com/software/BeautifulSoup/bs4/doc/ | |||
| 17 | """ | 17 | """ |
| 18 | 18 | ||
| 19 | __author__ = "Leonard Richardson (leonardr@segfault.org)" | 19 | __author__ = "Leonard Richardson (leonardr@segfault.org)" |
| 20 | __version__ = "4.3.2" | 20 | __version__ = "4.4.1" |
| 21 | __copyright__ = "Copyright (c) 2004-2013 Leonard Richardson" | 21 | __copyright__ = "Copyright (c) 2004-2015 Leonard Richardson" |
| 22 | __license__ = "MIT" | 22 | __license__ = "MIT" |
| 23 | 23 | ||
| 24 | __all__ = ['BeautifulSoup'] | 24 | __all__ = ['BeautifulSoup'] |
| @@ -45,7 +45,7 @@ from .element import ( | |||
| 45 | 45 | ||
| 46 | # The very first thing we do is give a useful error if someone is | 46 | # The very first thing we do is give a useful error if someone is |
| 47 | # running this code under Python 3 without converting it. | 47 | # running this code under Python 3 without converting it. |
| 48 | syntax_error = u'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work. You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).' | 48 | 'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'!='You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).' |
| 49 | 49 | ||
| 50 | class BeautifulSoup(Tag): | 50 | class BeautifulSoup(Tag): |
| 51 | """ | 51 | """ |
| @@ -69,7 +69,7 @@ class BeautifulSoup(Tag): | |||
| 69 | like HTML's <br> tag), call handle_starttag and then | 69 | like HTML's <br> tag), call handle_starttag and then |
| 70 | handle_endtag. | 70 | handle_endtag. |
| 71 | """ | 71 | """ |
| 72 | ROOT_TAG_NAME = u'[document]' | 72 | ROOT_TAG_NAME = '[document]' |
| 73 | 73 | ||
| 74 | # If the end-user gives no indication which tree builder they | 74 | # If the end-user gives no indication which tree builder they |
| 75 | # want, look for one with these features. | 75 | # want, look for one with these features. |
| @@ -77,8 +77,11 @@ class BeautifulSoup(Tag): | |||
| 77 | 77 | ||
| 78 | ASCII_SPACES = '\x20\x0a\x09\x0c\x0d' | 78 | ASCII_SPACES = '\x20\x0a\x09\x0c\x0d' |
| 79 | 79 | ||
| 80 | NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nTo get rid of this warning, change this:\n\n BeautifulSoup([your markup])\n\nto this:\n\n BeautifulSoup([your markup], \"%(parser)s\")\n" | ||
| 81 | |||
| 80 | def __init__(self, markup="", features=None, builder=None, | 82 | def __init__(self, markup="", features=None, builder=None, |
| 81 | parse_only=None, from_encoding=None, **kwargs): | 83 | parse_only=None, from_encoding=None, exclude_encodings=None, |
| 84 | **kwargs): | ||
| 82 | """The Soup object is initialized as the 'root tag', and the | 85 | """The Soup object is initialized as the 'root tag', and the |
| 83 | provided markup (which can be a string or a file-like object) | 86 | provided markup (which can be a string or a file-like object) |
| 84 | is fed into the underlying parser.""" | 87 | is fed into the underlying parser.""" |
| @@ -114,9 +117,9 @@ class BeautifulSoup(Tag): | |||
| 114 | del kwargs['isHTML'] | 117 | del kwargs['isHTML'] |
| 115 | warnings.warn( | 118 | warnings.warn( |
| 116 | "BS4 does not respect the isHTML argument to the " | 119 | "BS4 does not respect the isHTML argument to the " |
| 117 | "BeautifulSoup constructor. You can pass in features='html' " | 120 | "BeautifulSoup constructor. Suggest you use " |
| 118 | "or features='xml' to get a builder capable of handling " | 121 | "features='lxml' for HTML and features='lxml-xml' for " |
| 119 | "one or the other.") | 122 | "XML.") |
| 120 | 123 | ||
| 121 | def deprecated_argument(old_name, new_name): | 124 | def deprecated_argument(old_name, new_name): |
| 122 | if old_name in kwargs: | 125 | if old_name in kwargs: |
| @@ -135,12 +138,13 @@ class BeautifulSoup(Tag): | |||
| 135 | "fromEncoding", "from_encoding") | 138 | "fromEncoding", "from_encoding") |
| 136 | 139 | ||
| 137 | if len(kwargs) > 0: | 140 | if len(kwargs) > 0: |
| 138 | arg = kwargs.keys().pop() | 141 | arg = list(kwargs.keys()).pop() |
| 139 | raise TypeError( | 142 | raise TypeError( |
| 140 | "__init__() got an unexpected keyword argument '%s'" % arg) | 143 | "__init__() got an unexpected keyword argument '%s'" % arg) |
| 141 | 144 | ||
| 142 | if builder is None: | 145 | if builder is None: |
| 143 | if isinstance(features, basestring): | 146 | original_features = features |
| 147 | if isinstance(features, str): | ||
| 144 | features = [features] | 148 | features = [features] |
| 145 | if features is None or len(features) == 0: | 149 | if features is None or len(features) == 0: |
| 146 | features = self.DEFAULT_BUILDER_FEATURES | 150 | features = self.DEFAULT_BUILDER_FEATURES |
| @@ -151,6 +155,16 @@ class BeautifulSoup(Tag): | |||
| 151 | "requested: %s. Do you need to install a parser library?" | 155 | "requested: %s. Do you need to install a parser library?" |
| 152 | % ",".join(features)) | 156 | % ",".join(features)) |
| 153 | builder = builder_class() | 157 | builder = builder_class() |
| 158 | if not (original_features == builder.NAME or | ||
| 159 | original_features in builder.ALTERNATE_NAMES): | ||
| 160 | if builder.is_xml: | ||
| 161 | markup_type = "XML" | ||
| 162 | else: | ||
| 163 | markup_type = "HTML" | ||
| 164 | warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict( | ||
| 165 | parser=builder.NAME, | ||
| 166 | markup_type=markup_type)) | ||
| 167 | |||
| 154 | self.builder = builder | 168 | self.builder = builder |
| 155 | self.is_xml = builder.is_xml | 169 | self.is_xml = builder.is_xml |
| 156 | self.builder.soup = self | 170 | self.builder.soup = self |
| @@ -164,7 +178,7 @@ class BeautifulSoup(Tag): | |||
| 164 | # involving passing non-markup to Beautiful Soup. | 178 | # involving passing non-markup to Beautiful Soup. |
| 165 | # Beautiful Soup will still parse the input as markup, | 179 | # Beautiful Soup will still parse the input as markup, |
| 166 | # just in case that's what the user really wants. | 180 | # just in case that's what the user really wants. |
| 167 | if (isinstance(markup, unicode) | 181 | if (isinstance(markup, str) |
| 168 | and not os.path.supports_unicode_filenames): | 182 | and not os.path.supports_unicode_filenames): |
| 169 | possible_filename = markup.encode("utf8") | 183 | possible_filename = markup.encode("utf8") |
| 170 | else: | 184 | else: |
| @@ -172,25 +186,30 @@ class BeautifulSoup(Tag): | |||
| 172 | is_file = False | 186 | is_file = False |
| 173 | try: | 187 | try: |
| 174 | is_file = os.path.exists(possible_filename) | 188 | is_file = os.path.exists(possible_filename) |
| 175 | except Exception, e: | 189 | except Exception as e: |
| 176 | # This is almost certainly a problem involving | 190 | # This is almost certainly a problem involving |
| 177 | # characters not valid in filenames on this | 191 | # characters not valid in filenames on this |
| 178 | # system. Just let it go. | 192 | # system. Just let it go. |
| 179 | pass | 193 | pass |
| 180 | if is_file: | 194 | if is_file: |
| 195 | if isinstance(markup, str): | ||
| 196 | markup = markup.encode("utf8") | ||
| 181 | warnings.warn( | 197 | warnings.warn( |
| 182 | '"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup) | 198 | '"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup) |
| 183 | if markup[:5] == "http:" or markup[:6] == "https:": | 199 | if markup[:5] == "http:" or markup[:6] == "https:": |
| 184 | # TODO: This is ugly but I couldn't get it to work in | 200 | # TODO: This is ugly but I couldn't get it to work in |
| 185 | # Python 3 otherwise. | 201 | # Python 3 otherwise. |
| 186 | if ((isinstance(markup, bytes) and not b' ' in markup) | 202 | if ((isinstance(markup, bytes) and not b' ' in markup) |
| 187 | or (isinstance(markup, unicode) and not u' ' in markup)): | 203 | or (isinstance(markup, str) and not ' ' in markup)): |
| 204 | if isinstance(markup, str): | ||
| 205 | markup = markup.encode("utf8") | ||
| 188 | warnings.warn( | 206 | warnings.warn( |
| 189 | '"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup) | 207 | '"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup) |
| 190 | 208 | ||
| 191 | for (self.markup, self.original_encoding, self.declared_html_encoding, | 209 | for (self.markup, self.original_encoding, self.declared_html_encoding, |
| 192 | self.contains_replacement_characters) in ( | 210 | self.contains_replacement_characters) in ( |
| 193 | self.builder.prepare_markup(markup, from_encoding)): | 211 | self.builder.prepare_markup( |
| 212 | markup, from_encoding, exclude_encodings=exclude_encodings)): | ||
| 194 | self.reset() | 213 | self.reset() |
| 195 | try: | 214 | try: |
| 196 | self._feed() | 215 | self._feed() |
| @@ -203,6 +222,16 @@ class BeautifulSoup(Tag): | |||
| 203 | self.markup = None | 222 | self.markup = None |
| 204 | self.builder.soup = None | 223 | self.builder.soup = None |
| 205 | 224 | ||
| 225 | def __copy__(self): | ||
| 226 | return type(self)(self.encode(), builder=self.builder) | ||
| 227 | |||
| 228 | def __getstate__(self): | ||
| 229 | # Frequently a tree builder can't be pickled. | ||
| 230 | d = dict(self.__dict__) | ||
| 231 | if 'builder' in d and not self.builder.picklable: | ||
| 232 | del d['builder'] | ||
| 233 | return d | ||
| 234 | |||
| 206 | def _feed(self): | 235 | def _feed(self): |
| 207 | # Convert the document to Unicode. | 236 | # Convert the document to Unicode. |
| 208 | self.builder.reset() | 237 | self.builder.reset() |
| @@ -229,9 +258,7 @@ class BeautifulSoup(Tag): | |||
| 229 | 258 | ||
| 230 | def new_string(self, s, subclass=NavigableString): | 259 | def new_string(self, s, subclass=NavigableString): |
| 231 | """Create a new NavigableString associated with this soup.""" | 260 | """Create a new NavigableString associated with this soup.""" |
| 232 | navigable = subclass(s) | 261 | return subclass(s) |
| 233 | navigable.setup() | ||
| 234 | return navigable | ||
| 235 | 262 | ||
| 236 | def insert_before(self, successor): | 263 | def insert_before(self, successor): |
| 237 | raise NotImplementedError("BeautifulSoup objects don't support insert_before().") | 264 | raise NotImplementedError("BeautifulSoup objects don't support insert_before().") |
| @@ -259,7 +286,7 @@ class BeautifulSoup(Tag): | |||
| 259 | 286 | ||
| 260 | def endData(self, containerClass=NavigableString): | 287 | def endData(self, containerClass=NavigableString): |
| 261 | if self.current_data: | 288 | if self.current_data: |
| 262 | current_data = u''.join(self.current_data) | 289 | current_data = ''.join(self.current_data) |
| 263 | # If whitespace is not preserved, and this string contains | 290 | # If whitespace is not preserved, and this string contains |
| 264 | # nothing but ASCII spaces, replace it with a single space | 291 | # nothing but ASCII spaces, replace it with a single space |
| 265 | # or newline. | 292 | # or newline. |
| @@ -290,14 +317,49 @@ class BeautifulSoup(Tag): | |||
| 290 | def object_was_parsed(self, o, parent=None, most_recent_element=None): | 317 | def object_was_parsed(self, o, parent=None, most_recent_element=None): |
| 291 | """Add an object to the parse tree.""" | 318 | """Add an object to the parse tree.""" |
| 292 | parent = parent or self.currentTag | 319 | parent = parent or self.currentTag |
| 293 | most_recent_element = most_recent_element or self._most_recent_element | 320 | previous_element = most_recent_element or self._most_recent_element |
| 294 | o.setup(parent, most_recent_element) | 321 | |
| 322 | next_element = previous_sibling = next_sibling = None | ||
| 323 | if isinstance(o, Tag): | ||
| 324 | next_element = o.next_element | ||
| 325 | next_sibling = o.next_sibling | ||
| 326 | previous_sibling = o.previous_sibling | ||
| 327 | if not previous_element: | ||
| 328 | previous_element = o.previous_element | ||
| 329 | |||
| 330 | o.setup(parent, previous_element, next_element, previous_sibling, next_sibling) | ||
| 295 | 331 | ||
| 296 | if most_recent_element is not None: | ||
| 297 | most_recent_element.next_element = o | ||
| 298 | self._most_recent_element = o | 332 | self._most_recent_element = o |
| 299 | parent.contents.append(o) | 333 | parent.contents.append(o) |
| 300 | 334 | ||
| 335 | if parent.next_sibling: | ||
| 336 | # This node is being inserted into an element that has | ||
| 337 | # already been parsed. Deal with any dangling references. | ||
| 338 | index = parent.contents.index(o) | ||
| 339 | if index == 0: | ||
| 340 | previous_element = parent | ||
| 341 | previous_sibling = None | ||
| 342 | else: | ||
| 343 | previous_element = previous_sibling = parent.contents[index-1] | ||
| 344 | if index == len(parent.contents)-1: | ||
| 345 | next_element = parent.next_sibling | ||
| 346 | next_sibling = None | ||
| 347 | else: | ||
| 348 | next_element = next_sibling = parent.contents[index+1] | ||
| 349 | |||
| 350 | o.previous_element = previous_element | ||
| 351 | if previous_element: | ||
| 352 | previous_element.next_element = o | ||
| 353 | o.next_element = next_element | ||
| 354 | if next_element: | ||
| 355 | next_element.previous_element = o | ||
| 356 | o.next_sibling = next_sibling | ||
| 357 | if next_sibling: | ||
| 358 | next_sibling.previous_sibling = o | ||
| 359 | o.previous_sibling = previous_sibling | ||
| 360 | if previous_sibling: | ||
| 361 | previous_sibling.next_sibling = o | ||
| 362 | |||
| 301 | def _popToTag(self, name, nsprefix=None, inclusivePop=True): | 363 | def _popToTag(self, name, nsprefix=None, inclusivePop=True): |
| 302 | """Pops the tag stack up to and including the most recent | 364 | """Pops the tag stack up to and including the most recent |
| 303 | instance of the given tag. If inclusivePop is false, pops the tag | 365 | instance of the given tag. If inclusivePop is false, pops the tag |
| @@ -367,9 +429,9 @@ class BeautifulSoup(Tag): | |||
| 367 | encoding_part = '' | 429 | encoding_part = '' |
| 368 | if eventual_encoding != None: | 430 | if eventual_encoding != None: |
| 369 | encoding_part = ' encoding="%s"' % eventual_encoding | 431 | encoding_part = ' encoding="%s"' % eventual_encoding |
| 370 | prefix = u'<?xml version="1.0"%s?>\n' % encoding_part | 432 | prefix = '<?xml version="1.0"%s?>\n' % encoding_part |
| 371 | else: | 433 | else: |
| 372 | prefix = u'' | 434 | prefix = '' |
| 373 | if not pretty_print: | 435 | if not pretty_print: |
| 374 | indent_level = None | 436 | indent_level = None |
| 375 | else: | 437 | else: |
| @@ -403,4 +465,4 @@ class FeatureNotFound(ValueError): | |||
| 403 | if __name__ == '__main__': | 465 | if __name__ == '__main__': |
| 404 | import sys | 466 | import sys |
| 405 | soup = BeautifulSoup(sys.stdin) | 467 | soup = BeautifulSoup(sys.stdin) |
| 406 | print soup.prettify() | 468 | print(soup.prettify()) |
