From 8d49bef632a0486e0172e543a6c2622398ed7a8c Mon Sep 17 00:00:00 2001 From: Richard Purdie Date: Fri, 6 May 2016 09:06:51 +0100 Subject: bitbake: bitbake/bs4: Upgrade 4.3.2 -> 4.4.1 (python 3 version) Upgrade to 4.4.1 which has been run through 2to3 as per the maintainers recommendation for v3 use. (Bitbake rev: f06e0f8052ba44eeb9ce701192cdf19252b2646d) Signed-off-by: Richard Purdie --- bitbake/lib/bs4/__init__.py | 112 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 87 insertions(+), 25 deletions(-) (limited to 'bitbake/lib/bs4/__init__.py') diff --git a/bitbake/lib/bs4/__init__.py b/bitbake/lib/bs4/__init__.py index 7ba34269af..f6fdfd50b1 100644 --- a/bitbake/lib/bs4/__init__.py +++ b/bitbake/lib/bs4/__init__.py @@ -17,8 +17,8 @@ http://www.crummy.com/software/BeautifulSoup/bs4/doc/ """ __author__ = "Leonard Richardson (leonardr@segfault.org)" -__version__ = "4.3.2" -__copyright__ = "Copyright (c) 2004-2013 Leonard Richardson" +__version__ = "4.4.1" +__copyright__ = "Copyright (c) 2004-2015 Leonard Richardson" __license__ = "MIT" __all__ = ['BeautifulSoup'] @@ -45,7 +45,7 @@ from .element import ( # The very first thing we do is give a useful error if someone is # running this code under Python 3 without converting it. -syntax_error = u'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work. You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).' +'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'!='You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).' class BeautifulSoup(Tag): """ @@ -69,7 +69,7 @@ class BeautifulSoup(Tag): like HTML's
tag), call handle_starttag and then handle_endtag. """ - ROOT_TAG_NAME = u'[document]' + ROOT_TAG_NAME = '[document]' # If the end-user gives no indication which tree builder they # want, look for one with these features. @@ -77,8 +77,11 @@ class BeautifulSoup(Tag): ASCII_SPACES = '\x20\x0a\x09\x0c\x0d' + NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nTo get rid of this warning, change this:\n\n BeautifulSoup([your markup])\n\nto this:\n\n BeautifulSoup([your markup], \"%(parser)s\")\n" + def __init__(self, markup="", features=None, builder=None, - parse_only=None, from_encoding=None, **kwargs): + parse_only=None, from_encoding=None, exclude_encodings=None, + **kwargs): """The Soup object is initialized as the 'root tag', and the provided markup (which can be a string or a file-like object) is fed into the underlying parser.""" @@ -114,9 +117,9 @@ class BeautifulSoup(Tag): del kwargs['isHTML'] warnings.warn( "BS4 does not respect the isHTML argument to the " - "BeautifulSoup constructor. You can pass in features='html' " - "or features='xml' to get a builder capable of handling " - "one or the other.") + "BeautifulSoup constructor. Suggest you use " + "features='lxml' for HTML and features='lxml-xml' for " + "XML.") def deprecated_argument(old_name, new_name): if old_name in kwargs: @@ -135,12 +138,13 @@ class BeautifulSoup(Tag): "fromEncoding", "from_encoding") if len(kwargs) > 0: - arg = kwargs.keys().pop() + arg = list(kwargs.keys()).pop() raise TypeError( "__init__() got an unexpected keyword argument '%s'" % arg) if builder is None: - if isinstance(features, basestring): + original_features = features + if isinstance(features, str): features = [features] if features is None or len(features) == 0: features = self.DEFAULT_BUILDER_FEATURES @@ -151,6 +155,16 @@ class BeautifulSoup(Tag): "requested: %s. Do you need to install a parser library?" % ",".join(features)) builder = builder_class() + if not (original_features == builder.NAME or + original_features in builder.ALTERNATE_NAMES): + if builder.is_xml: + markup_type = "XML" + else: + markup_type = "HTML" + warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict( + parser=builder.NAME, + markup_type=markup_type)) + self.builder = builder self.is_xml = builder.is_xml self.builder.soup = self @@ -164,7 +178,7 @@ class BeautifulSoup(Tag): # involving passing non-markup to Beautiful Soup. # Beautiful Soup will still parse the input as markup, # just in case that's what the user really wants. - if (isinstance(markup, unicode) + if (isinstance(markup, str) and not os.path.supports_unicode_filenames): possible_filename = markup.encode("utf8") else: @@ -172,25 +186,30 @@ class BeautifulSoup(Tag): is_file = False try: is_file = os.path.exists(possible_filename) - except Exception, e: + except Exception as e: # This is almost certainly a problem involving # characters not valid in filenames on this # system. Just let it go. pass if is_file: + if isinstance(markup, str): + markup = markup.encode("utf8") warnings.warn( '"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup) if markup[:5] == "http:" or markup[:6] == "https:": # TODO: This is ugly but I couldn't get it to work in # Python 3 otherwise. if ((isinstance(markup, bytes) and not b' ' in markup) - or (isinstance(markup, unicode) and not u' ' in markup)): + or (isinstance(markup, str) and not ' ' in markup)): + if isinstance(markup, str): + markup = markup.encode("utf8") warnings.warn( '"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup) for (self.markup, self.original_encoding, self.declared_html_encoding, self.contains_replacement_characters) in ( - self.builder.prepare_markup(markup, from_encoding)): + self.builder.prepare_markup( + markup, from_encoding, exclude_encodings=exclude_encodings)): self.reset() try: self._feed() @@ -203,6 +222,16 @@ class BeautifulSoup(Tag): self.markup = None self.builder.soup = None + def __copy__(self): + return type(self)(self.encode(), builder=self.builder) + + def __getstate__(self): + # Frequently a tree builder can't be pickled. + d = dict(self.__dict__) + if 'builder' in d and not self.builder.picklable: + del d['builder'] + return d + def _feed(self): # Convert the document to Unicode. self.builder.reset() @@ -229,9 +258,7 @@ class BeautifulSoup(Tag): def new_string(self, s, subclass=NavigableString): """Create a new NavigableString associated with this soup.""" - navigable = subclass(s) - navigable.setup() - return navigable + return subclass(s) def insert_before(self, successor): raise NotImplementedError("BeautifulSoup objects don't support insert_before().") @@ -259,7 +286,7 @@ class BeautifulSoup(Tag): def endData(self, containerClass=NavigableString): if self.current_data: - current_data = u''.join(self.current_data) + current_data = ''.join(self.current_data) # If whitespace is not preserved, and this string contains # nothing but ASCII spaces, replace it with a single space # or newline. @@ -290,14 +317,49 @@ class BeautifulSoup(Tag): def object_was_parsed(self, o, parent=None, most_recent_element=None): """Add an object to the parse tree.""" parent = parent or self.currentTag - most_recent_element = most_recent_element or self._most_recent_element - o.setup(parent, most_recent_element) + previous_element = most_recent_element or self._most_recent_element + + next_element = previous_sibling = next_sibling = None + if isinstance(o, Tag): + next_element = o.next_element + next_sibling = o.next_sibling + previous_sibling = o.previous_sibling + if not previous_element: + previous_element = o.previous_element + + o.setup(parent, previous_element, next_element, previous_sibling, next_sibling) - if most_recent_element is not None: - most_recent_element.next_element = o self._most_recent_element = o parent.contents.append(o) + if parent.next_sibling: + # This node is being inserted into an element that has + # already been parsed. Deal with any dangling references. + index = parent.contents.index(o) + if index == 0: + previous_element = parent + previous_sibling = None + else: + previous_element = previous_sibling = parent.contents[index-1] + if index == len(parent.contents)-1: + next_element = parent.next_sibling + next_sibling = None + else: + next_element = next_sibling = parent.contents[index+1] + + o.previous_element = previous_element + if previous_element: + previous_element.next_element = o + o.next_element = next_element + if next_element: + next_element.previous_element = o + o.next_sibling = next_sibling + if next_sibling: + next_sibling.previous_sibling = o + o.previous_sibling = previous_sibling + if previous_sibling: + previous_sibling.next_sibling = o + def _popToTag(self, name, nsprefix=None, inclusivePop=True): """Pops the tag stack up to and including the most recent instance of the given tag. If inclusivePop is false, pops the tag @@ -367,9 +429,9 @@ class BeautifulSoup(Tag): encoding_part = '' if eventual_encoding != None: encoding_part = ' encoding="%s"' % eventual_encoding - prefix = u'\n' % encoding_part + prefix = '\n' % encoding_part else: - prefix = u'' + prefix = '' if not pretty_print: indent_level = None else: @@ -403,4 +465,4 @@ class FeatureNotFound(ValueError): if __name__ == '__main__': import sys soup = BeautifulSoup(sys.stdin) - print soup.prettify() + print(soup.prettify()) -- cgit v1.2.3-54-g00ecf