diff options
author | Richard Purdie <richard.purdie@linuxfoundation.org> | 2016-05-06 09:06:51 +0100 |
---|---|---|
committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2016-06-02 08:24:02 +0100 |
commit | 822eabf32dd69346071bd25fc3639db252d2f346 (patch) | |
tree | edac6d1d0d5114a4e3c72fea5589c069453b72d2 /bitbake/lib/bs4/__init__.py | |
parent | 4f8959324df3b89487973bd4e8de21debb0a12ef (diff) | |
download | poky-822eabf32dd69346071bd25fc3639db252d2f346.tar.gz |
bitbake: bitbake/bs4: Upgrade 4.3.2 -> 4.4.1 (python 3 version)
Upgrade to 4.4.1 which has been run through 2to3 as per the maintainers
recommendation for v3 use.
(Bitbake rev: 2f4b98af93c971a8c466ffaf3c09cca0edb6e3ad)
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'bitbake/lib/bs4/__init__.py')
-rw-r--r-- | bitbake/lib/bs4/__init__.py | 112 |
1 files changed, 87 insertions, 25 deletions
diff --git a/bitbake/lib/bs4/__init__.py b/bitbake/lib/bs4/__init__.py index 7ba34269af..f6fdfd50b1 100644 --- a/bitbake/lib/bs4/__init__.py +++ b/bitbake/lib/bs4/__init__.py | |||
@@ -17,8 +17,8 @@ http://www.crummy.com/software/BeautifulSoup/bs4/doc/ | |||
17 | """ | 17 | """ |
18 | 18 | ||
19 | __author__ = "Leonard Richardson (leonardr@segfault.org)" | 19 | __author__ = "Leonard Richardson (leonardr@segfault.org)" |
20 | __version__ = "4.3.2" | 20 | __version__ = "4.4.1" |
21 | __copyright__ = "Copyright (c) 2004-2013 Leonard Richardson" | 21 | __copyright__ = "Copyright (c) 2004-2015 Leonard Richardson" |
22 | __license__ = "MIT" | 22 | __license__ = "MIT" |
23 | 23 | ||
24 | __all__ = ['BeautifulSoup'] | 24 | __all__ = ['BeautifulSoup'] |
@@ -45,7 +45,7 @@ from .element import ( | |||
45 | 45 | ||
46 | # The very first thing we do is give a useful error if someone is | 46 | # The very first thing we do is give a useful error if someone is |
47 | # running this code under Python 3 without converting it. | 47 | # running this code under Python 3 without converting it. |
48 | syntax_error = u'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work. You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).' | 48 | 'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'!='You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).' |
49 | 49 | ||
50 | class BeautifulSoup(Tag): | 50 | class BeautifulSoup(Tag): |
51 | """ | 51 | """ |
@@ -69,7 +69,7 @@ class BeautifulSoup(Tag): | |||
69 | like HTML's <br> tag), call handle_starttag and then | 69 | like HTML's <br> tag), call handle_starttag and then |
70 | handle_endtag. | 70 | handle_endtag. |
71 | """ | 71 | """ |
72 | ROOT_TAG_NAME = u'[document]' | 72 | ROOT_TAG_NAME = '[document]' |
73 | 73 | ||
74 | # If the end-user gives no indication which tree builder they | 74 | # If the end-user gives no indication which tree builder they |
75 | # want, look for one with these features. | 75 | # want, look for one with these features. |
@@ -77,8 +77,11 @@ class BeautifulSoup(Tag): | |||
77 | 77 | ||
78 | ASCII_SPACES = '\x20\x0a\x09\x0c\x0d' | 78 | ASCII_SPACES = '\x20\x0a\x09\x0c\x0d' |
79 | 79 | ||
80 | NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nTo get rid of this warning, change this:\n\n BeautifulSoup([your markup])\n\nto this:\n\n BeautifulSoup([your markup], \"%(parser)s\")\n" | ||
81 | |||
80 | def __init__(self, markup="", features=None, builder=None, | 82 | def __init__(self, markup="", features=None, builder=None, |
81 | parse_only=None, from_encoding=None, **kwargs): | 83 | parse_only=None, from_encoding=None, exclude_encodings=None, |
84 | **kwargs): | ||
82 | """The Soup object is initialized as the 'root tag', and the | 85 | """The Soup object is initialized as the 'root tag', and the |
83 | provided markup (which can be a string or a file-like object) | 86 | provided markup (which can be a string or a file-like object) |
84 | is fed into the underlying parser.""" | 87 | is fed into the underlying parser.""" |
@@ -114,9 +117,9 @@ class BeautifulSoup(Tag): | |||
114 | del kwargs['isHTML'] | 117 | del kwargs['isHTML'] |
115 | warnings.warn( | 118 | warnings.warn( |
116 | "BS4 does not respect the isHTML argument to the " | 119 | "BS4 does not respect the isHTML argument to the " |
117 | "BeautifulSoup constructor. You can pass in features='html' " | 120 | "BeautifulSoup constructor. Suggest you use " |
118 | "or features='xml' to get a builder capable of handling " | 121 | "features='lxml' for HTML and features='lxml-xml' for " |
119 | "one or the other.") | 122 | "XML.") |
120 | 123 | ||
121 | def deprecated_argument(old_name, new_name): | 124 | def deprecated_argument(old_name, new_name): |
122 | if old_name in kwargs: | 125 | if old_name in kwargs: |
@@ -135,12 +138,13 @@ class BeautifulSoup(Tag): | |||
135 | "fromEncoding", "from_encoding") | 138 | "fromEncoding", "from_encoding") |
136 | 139 | ||
137 | if len(kwargs) > 0: | 140 | if len(kwargs) > 0: |
138 | arg = kwargs.keys().pop() | 141 | arg = list(kwargs.keys()).pop() |
139 | raise TypeError( | 142 | raise TypeError( |
140 | "__init__() got an unexpected keyword argument '%s'" % arg) | 143 | "__init__() got an unexpected keyword argument '%s'" % arg) |
141 | 144 | ||
142 | if builder is None: | 145 | if builder is None: |
143 | if isinstance(features, basestring): | 146 | original_features = features |
147 | if isinstance(features, str): | ||
144 | features = [features] | 148 | features = [features] |
145 | if features is None or len(features) == 0: | 149 | if features is None or len(features) == 0: |
146 | features = self.DEFAULT_BUILDER_FEATURES | 150 | features = self.DEFAULT_BUILDER_FEATURES |
@@ -151,6 +155,16 @@ class BeautifulSoup(Tag): | |||
151 | "requested: %s. Do you need to install a parser library?" | 155 | "requested: %s. Do you need to install a parser library?" |
152 | % ",".join(features)) | 156 | % ",".join(features)) |
153 | builder = builder_class() | 157 | builder = builder_class() |
158 | if not (original_features == builder.NAME or | ||
159 | original_features in builder.ALTERNATE_NAMES): | ||
160 | if builder.is_xml: | ||
161 | markup_type = "XML" | ||
162 | else: | ||
163 | markup_type = "HTML" | ||
164 | warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict( | ||
165 | parser=builder.NAME, | ||
166 | markup_type=markup_type)) | ||
167 | |||
154 | self.builder = builder | 168 | self.builder = builder |
155 | self.is_xml = builder.is_xml | 169 | self.is_xml = builder.is_xml |
156 | self.builder.soup = self | 170 | self.builder.soup = self |
@@ -164,7 +178,7 @@ class BeautifulSoup(Tag): | |||
164 | # involving passing non-markup to Beautiful Soup. | 178 | # involving passing non-markup to Beautiful Soup. |
165 | # Beautiful Soup will still parse the input as markup, | 179 | # Beautiful Soup will still parse the input as markup, |
166 | # just in case that's what the user really wants. | 180 | # just in case that's what the user really wants. |
167 | if (isinstance(markup, unicode) | 181 | if (isinstance(markup, str) |
168 | and not os.path.supports_unicode_filenames): | 182 | and not os.path.supports_unicode_filenames): |
169 | possible_filename = markup.encode("utf8") | 183 | possible_filename = markup.encode("utf8") |
170 | else: | 184 | else: |
@@ -172,25 +186,30 @@ class BeautifulSoup(Tag): | |||
172 | is_file = False | 186 | is_file = False |
173 | try: | 187 | try: |
174 | is_file = os.path.exists(possible_filename) | 188 | is_file = os.path.exists(possible_filename) |
175 | except Exception, e: | 189 | except Exception as e: |
176 | # This is almost certainly a problem involving | 190 | # This is almost certainly a problem involving |
177 | # characters not valid in filenames on this | 191 | # characters not valid in filenames on this |
178 | # system. Just let it go. | 192 | # system. Just let it go. |
179 | pass | 193 | pass |
180 | if is_file: | 194 | if is_file: |
195 | if isinstance(markup, str): | ||
196 | markup = markup.encode("utf8") | ||
181 | warnings.warn( | 197 | warnings.warn( |
182 | '"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup) | 198 | '"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup) |
183 | if markup[:5] == "http:" or markup[:6] == "https:": | 199 | if markup[:5] == "http:" or markup[:6] == "https:": |
184 | # TODO: This is ugly but I couldn't get it to work in | 200 | # TODO: This is ugly but I couldn't get it to work in |
185 | # Python 3 otherwise. | 201 | # Python 3 otherwise. |
186 | if ((isinstance(markup, bytes) and not b' ' in markup) | 202 | if ((isinstance(markup, bytes) and not b' ' in markup) |
187 | or (isinstance(markup, unicode) and not u' ' in markup)): | 203 | or (isinstance(markup, str) and not ' ' in markup)): |
204 | if isinstance(markup, str): | ||
205 | markup = markup.encode("utf8") | ||
188 | warnings.warn( | 206 | warnings.warn( |
189 | '"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup) | 207 | '"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup) |
190 | 208 | ||
191 | for (self.markup, self.original_encoding, self.declared_html_encoding, | 209 | for (self.markup, self.original_encoding, self.declared_html_encoding, |
192 | self.contains_replacement_characters) in ( | 210 | self.contains_replacement_characters) in ( |
193 | self.builder.prepare_markup(markup, from_encoding)): | 211 | self.builder.prepare_markup( |
212 | markup, from_encoding, exclude_encodings=exclude_encodings)): | ||
194 | self.reset() | 213 | self.reset() |
195 | try: | 214 | try: |
196 | self._feed() | 215 | self._feed() |
@@ -203,6 +222,16 @@ class BeautifulSoup(Tag): | |||
203 | self.markup = None | 222 | self.markup = None |
204 | self.builder.soup = None | 223 | self.builder.soup = None |
205 | 224 | ||
225 | def __copy__(self): | ||
226 | return type(self)(self.encode(), builder=self.builder) | ||
227 | |||
228 | def __getstate__(self): | ||
229 | # Frequently a tree builder can't be pickled. | ||
230 | d = dict(self.__dict__) | ||
231 | if 'builder' in d and not self.builder.picklable: | ||
232 | del d['builder'] | ||
233 | return d | ||
234 | |||
206 | def _feed(self): | 235 | def _feed(self): |
207 | # Convert the document to Unicode. | 236 | # Convert the document to Unicode. |
208 | self.builder.reset() | 237 | self.builder.reset() |
@@ -229,9 +258,7 @@ class BeautifulSoup(Tag): | |||
229 | 258 | ||
230 | def new_string(self, s, subclass=NavigableString): | 259 | def new_string(self, s, subclass=NavigableString): |
231 | """Create a new NavigableString associated with this soup.""" | 260 | """Create a new NavigableString associated with this soup.""" |
232 | navigable = subclass(s) | 261 | return subclass(s) |
233 | navigable.setup() | ||
234 | return navigable | ||
235 | 262 | ||
236 | def insert_before(self, successor): | 263 | def insert_before(self, successor): |
237 | raise NotImplementedError("BeautifulSoup objects don't support insert_before().") | 264 | raise NotImplementedError("BeautifulSoup objects don't support insert_before().") |
@@ -259,7 +286,7 @@ class BeautifulSoup(Tag): | |||
259 | 286 | ||
260 | def endData(self, containerClass=NavigableString): | 287 | def endData(self, containerClass=NavigableString): |
261 | if self.current_data: | 288 | if self.current_data: |
262 | current_data = u''.join(self.current_data) | 289 | current_data = ''.join(self.current_data) |
263 | # If whitespace is not preserved, and this string contains | 290 | # If whitespace is not preserved, and this string contains |
264 | # nothing but ASCII spaces, replace it with a single space | 291 | # nothing but ASCII spaces, replace it with a single space |
265 | # or newline. | 292 | # or newline. |
@@ -290,14 +317,49 @@ class BeautifulSoup(Tag): | |||
290 | def object_was_parsed(self, o, parent=None, most_recent_element=None): | 317 | def object_was_parsed(self, o, parent=None, most_recent_element=None): |
291 | """Add an object to the parse tree.""" | 318 | """Add an object to the parse tree.""" |
292 | parent = parent or self.currentTag | 319 | parent = parent or self.currentTag |
293 | most_recent_element = most_recent_element or self._most_recent_element | 320 | previous_element = most_recent_element or self._most_recent_element |
294 | o.setup(parent, most_recent_element) | 321 | |
322 | next_element = previous_sibling = next_sibling = None | ||
323 | if isinstance(o, Tag): | ||
324 | next_element = o.next_element | ||
325 | next_sibling = o.next_sibling | ||
326 | previous_sibling = o.previous_sibling | ||
327 | if not previous_element: | ||
328 | previous_element = o.previous_element | ||
329 | |||
330 | o.setup(parent, previous_element, next_element, previous_sibling, next_sibling) | ||
295 | 331 | ||
296 | if most_recent_element is not None: | ||
297 | most_recent_element.next_element = o | ||
298 | self._most_recent_element = o | 332 | self._most_recent_element = o |
299 | parent.contents.append(o) | 333 | parent.contents.append(o) |
300 | 334 | ||
335 | if parent.next_sibling: | ||
336 | # This node is being inserted into an element that has | ||
337 | # already been parsed. Deal with any dangling references. | ||
338 | index = parent.contents.index(o) | ||
339 | if index == 0: | ||
340 | previous_element = parent | ||
341 | previous_sibling = None | ||
342 | else: | ||
343 | previous_element = previous_sibling = parent.contents[index-1] | ||
344 | if index == len(parent.contents)-1: | ||
345 | next_element = parent.next_sibling | ||
346 | next_sibling = None | ||
347 | else: | ||
348 | next_element = next_sibling = parent.contents[index+1] | ||
349 | |||
350 | o.previous_element = previous_element | ||
351 | if previous_element: | ||
352 | previous_element.next_element = o | ||
353 | o.next_element = next_element | ||
354 | if next_element: | ||
355 | next_element.previous_element = o | ||
356 | o.next_sibling = next_sibling | ||
357 | if next_sibling: | ||
358 | next_sibling.previous_sibling = o | ||
359 | o.previous_sibling = previous_sibling | ||
360 | if previous_sibling: | ||
361 | previous_sibling.next_sibling = o | ||
362 | |||
301 | def _popToTag(self, name, nsprefix=None, inclusivePop=True): | 363 | def _popToTag(self, name, nsprefix=None, inclusivePop=True): |
302 | """Pops the tag stack up to and including the most recent | 364 | """Pops the tag stack up to and including the most recent |
303 | instance of the given tag. If inclusivePop is false, pops the tag | 365 | instance of the given tag. If inclusivePop is false, pops the tag |
@@ -367,9 +429,9 @@ class BeautifulSoup(Tag): | |||
367 | encoding_part = '' | 429 | encoding_part = '' |
368 | if eventual_encoding != None: | 430 | if eventual_encoding != None: |
369 | encoding_part = ' encoding="%s"' % eventual_encoding | 431 | encoding_part = ' encoding="%s"' % eventual_encoding |
370 | prefix = u'<?xml version="1.0"%s?>\n' % encoding_part | 432 | prefix = '<?xml version="1.0"%s?>\n' % encoding_part |
371 | else: | 433 | else: |
372 | prefix = u'' | 434 | prefix = '' |
373 | if not pretty_print: | 435 | if not pretty_print: |
374 | indent_level = None | 436 | indent_level = None |
375 | else: | 437 | else: |
@@ -403,4 +465,4 @@ class FeatureNotFound(ValueError): | |||
403 | if __name__ == '__main__': | 465 | if __name__ == '__main__': |
404 | import sys | 466 | import sys |
405 | soup = BeautifulSoup(sys.stdin) | 467 | soup = BeautifulSoup(sys.stdin) |
406 | print soup.prettify() | 468 | print(soup.prettify()) |