summaryrefslogtreecommitdiffstats
path: root/bitbake/lib/bs4/__init__.py
diff options
context:
space:
mode:
Diffstat (limited to 'bitbake/lib/bs4/__init__.py')
-rw-r--r--bitbake/lib/bs4/__init__.py112
1 files changed, 87 insertions, 25 deletions
diff --git a/bitbake/lib/bs4/__init__.py b/bitbake/lib/bs4/__init__.py
index 7ba34269af..f6fdfd50b1 100644
--- a/bitbake/lib/bs4/__init__.py
+++ b/bitbake/lib/bs4/__init__.py
@@ -17,8 +17,8 @@ http://www.crummy.com/software/BeautifulSoup/bs4/doc/
17""" 17"""
18 18
19__author__ = "Leonard Richardson (leonardr@segfault.org)" 19__author__ = "Leonard Richardson (leonardr@segfault.org)"
20__version__ = "4.3.2" 20__version__ = "4.4.1"
21__copyright__ = "Copyright (c) 2004-2013 Leonard Richardson" 21__copyright__ = "Copyright (c) 2004-2015 Leonard Richardson"
22__license__ = "MIT" 22__license__ = "MIT"
23 23
24__all__ = ['BeautifulSoup'] 24__all__ = ['BeautifulSoup']
@@ -45,7 +45,7 @@ from .element import (
45 45
46# The very first thing we do is give a useful error if someone is 46# The very first thing we do is give a useful error if someone is
47# running this code under Python 3 without converting it. 47# running this code under Python 3 without converting it.
48syntax_error = u'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work. You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).' 48'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'!='You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
49 49
50class BeautifulSoup(Tag): 50class BeautifulSoup(Tag):
51 """ 51 """
@@ -69,7 +69,7 @@ class BeautifulSoup(Tag):
69 like HTML's <br> tag), call handle_starttag and then 69 like HTML's <br> tag), call handle_starttag and then
70 handle_endtag. 70 handle_endtag.
71 """ 71 """
72 ROOT_TAG_NAME = u'[document]' 72 ROOT_TAG_NAME = '[document]'
73 73
74 # If the end-user gives no indication which tree builder they 74 # If the end-user gives no indication which tree builder they
75 # want, look for one with these features. 75 # want, look for one with these features.
@@ -77,8 +77,11 @@ class BeautifulSoup(Tag):
77 77
78 ASCII_SPACES = '\x20\x0a\x09\x0c\x0d' 78 ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
79 79
80 NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nTo get rid of this warning, change this:\n\n BeautifulSoup([your markup])\n\nto this:\n\n BeautifulSoup([your markup], \"%(parser)s\")\n"
81
80 def __init__(self, markup="", features=None, builder=None, 82 def __init__(self, markup="", features=None, builder=None,
81 parse_only=None, from_encoding=None, **kwargs): 83 parse_only=None, from_encoding=None, exclude_encodings=None,
84 **kwargs):
82 """The Soup object is initialized as the 'root tag', and the 85 """The Soup object is initialized as the 'root tag', and the
83 provided markup (which can be a string or a file-like object) 86 provided markup (which can be a string or a file-like object)
84 is fed into the underlying parser.""" 87 is fed into the underlying parser."""
@@ -114,9 +117,9 @@ class BeautifulSoup(Tag):
114 del kwargs['isHTML'] 117 del kwargs['isHTML']
115 warnings.warn( 118 warnings.warn(
116 "BS4 does not respect the isHTML argument to the " 119 "BS4 does not respect the isHTML argument to the "
117 "BeautifulSoup constructor. You can pass in features='html' " 120 "BeautifulSoup constructor. Suggest you use "
118 "or features='xml' to get a builder capable of handling " 121 "features='lxml' for HTML and features='lxml-xml' for "
119 "one or the other.") 122 "XML.")
120 123
121 def deprecated_argument(old_name, new_name): 124 def deprecated_argument(old_name, new_name):
122 if old_name in kwargs: 125 if old_name in kwargs:
@@ -135,12 +138,13 @@ class BeautifulSoup(Tag):
135 "fromEncoding", "from_encoding") 138 "fromEncoding", "from_encoding")
136 139
137 if len(kwargs) > 0: 140 if len(kwargs) > 0:
138 arg = kwargs.keys().pop() 141 arg = list(kwargs.keys()).pop()
139 raise TypeError( 142 raise TypeError(
140 "__init__() got an unexpected keyword argument '%s'" % arg) 143 "__init__() got an unexpected keyword argument '%s'" % arg)
141 144
142 if builder is None: 145 if builder is None:
143 if isinstance(features, basestring): 146 original_features = features
147 if isinstance(features, str):
144 features = [features] 148 features = [features]
145 if features is None or len(features) == 0: 149 if features is None or len(features) == 0:
146 features = self.DEFAULT_BUILDER_FEATURES 150 features = self.DEFAULT_BUILDER_FEATURES
@@ -151,6 +155,16 @@ class BeautifulSoup(Tag):
151 "requested: %s. Do you need to install a parser library?" 155 "requested: %s. Do you need to install a parser library?"
152 % ",".join(features)) 156 % ",".join(features))
153 builder = builder_class() 157 builder = builder_class()
158 if not (original_features == builder.NAME or
159 original_features in builder.ALTERNATE_NAMES):
160 if builder.is_xml:
161 markup_type = "XML"
162 else:
163 markup_type = "HTML"
164 warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict(
165 parser=builder.NAME,
166 markup_type=markup_type))
167
154 self.builder = builder 168 self.builder = builder
155 self.is_xml = builder.is_xml 169 self.is_xml = builder.is_xml
156 self.builder.soup = self 170 self.builder.soup = self
@@ -164,7 +178,7 @@ class BeautifulSoup(Tag):
164 # involving passing non-markup to Beautiful Soup. 178 # involving passing non-markup to Beautiful Soup.
165 # Beautiful Soup will still parse the input as markup, 179 # Beautiful Soup will still parse the input as markup,
166 # just in case that's what the user really wants. 180 # just in case that's what the user really wants.
167 if (isinstance(markup, unicode) 181 if (isinstance(markup, str)
168 and not os.path.supports_unicode_filenames): 182 and not os.path.supports_unicode_filenames):
169 possible_filename = markup.encode("utf8") 183 possible_filename = markup.encode("utf8")
170 else: 184 else:
@@ -172,25 +186,30 @@ class BeautifulSoup(Tag):
172 is_file = False 186 is_file = False
173 try: 187 try:
174 is_file = os.path.exists(possible_filename) 188 is_file = os.path.exists(possible_filename)
175 except Exception, e: 189 except Exception as e:
176 # This is almost certainly a problem involving 190 # This is almost certainly a problem involving
177 # characters not valid in filenames on this 191 # characters not valid in filenames on this
178 # system. Just let it go. 192 # system. Just let it go.
179 pass 193 pass
180 if is_file: 194 if is_file:
195 if isinstance(markup, str):
196 markup = markup.encode("utf8")
181 warnings.warn( 197 warnings.warn(
182 '"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup) 198 '"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup)
183 if markup[:5] == "http:" or markup[:6] == "https:": 199 if markup[:5] == "http:" or markup[:6] == "https:":
184 # TODO: This is ugly but I couldn't get it to work in 200 # TODO: This is ugly but I couldn't get it to work in
185 # Python 3 otherwise. 201 # Python 3 otherwise.
186 if ((isinstance(markup, bytes) and not b' ' in markup) 202 if ((isinstance(markup, bytes) and not b' ' in markup)
187 or (isinstance(markup, unicode) and not u' ' in markup)): 203 or (isinstance(markup, str) and not ' ' in markup)):
204 if isinstance(markup, str):
205 markup = markup.encode("utf8")
188 warnings.warn( 206 warnings.warn(
189 '"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup) 207 '"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup)
190 208
191 for (self.markup, self.original_encoding, self.declared_html_encoding, 209 for (self.markup, self.original_encoding, self.declared_html_encoding,
192 self.contains_replacement_characters) in ( 210 self.contains_replacement_characters) in (
193 self.builder.prepare_markup(markup, from_encoding)): 211 self.builder.prepare_markup(
212 markup, from_encoding, exclude_encodings=exclude_encodings)):
194 self.reset() 213 self.reset()
195 try: 214 try:
196 self._feed() 215 self._feed()
@@ -203,6 +222,16 @@ class BeautifulSoup(Tag):
203 self.markup = None 222 self.markup = None
204 self.builder.soup = None 223 self.builder.soup = None
205 224
225 def __copy__(self):
226 return type(self)(self.encode(), builder=self.builder)
227
228 def __getstate__(self):
229 # Frequently a tree builder can't be pickled.
230 d = dict(self.__dict__)
231 if 'builder' in d and not self.builder.picklable:
232 del d['builder']
233 return d
234
206 def _feed(self): 235 def _feed(self):
207 # Convert the document to Unicode. 236 # Convert the document to Unicode.
208 self.builder.reset() 237 self.builder.reset()
@@ -229,9 +258,7 @@ class BeautifulSoup(Tag):
229 258
230 def new_string(self, s, subclass=NavigableString): 259 def new_string(self, s, subclass=NavigableString):
231 """Create a new NavigableString associated with this soup.""" 260 """Create a new NavigableString associated with this soup."""
232 navigable = subclass(s) 261 return subclass(s)
233 navigable.setup()
234 return navigable
235 262
236 def insert_before(self, successor): 263 def insert_before(self, successor):
237 raise NotImplementedError("BeautifulSoup objects don't support insert_before().") 264 raise NotImplementedError("BeautifulSoup objects don't support insert_before().")
@@ -259,7 +286,7 @@ class BeautifulSoup(Tag):
259 286
260 def endData(self, containerClass=NavigableString): 287 def endData(self, containerClass=NavigableString):
261 if self.current_data: 288 if self.current_data:
262 current_data = u''.join(self.current_data) 289 current_data = ''.join(self.current_data)
263 # If whitespace is not preserved, and this string contains 290 # If whitespace is not preserved, and this string contains
264 # nothing but ASCII spaces, replace it with a single space 291 # nothing but ASCII spaces, replace it with a single space
265 # or newline. 292 # or newline.
@@ -290,14 +317,49 @@ class BeautifulSoup(Tag):
290 def object_was_parsed(self, o, parent=None, most_recent_element=None): 317 def object_was_parsed(self, o, parent=None, most_recent_element=None):
291 """Add an object to the parse tree.""" 318 """Add an object to the parse tree."""
292 parent = parent or self.currentTag 319 parent = parent or self.currentTag
293 most_recent_element = most_recent_element or self._most_recent_element 320 previous_element = most_recent_element or self._most_recent_element
294 o.setup(parent, most_recent_element) 321
322 next_element = previous_sibling = next_sibling = None
323 if isinstance(o, Tag):
324 next_element = o.next_element
325 next_sibling = o.next_sibling
326 previous_sibling = o.previous_sibling
327 if not previous_element:
328 previous_element = o.previous_element
329
330 o.setup(parent, previous_element, next_element, previous_sibling, next_sibling)
295 331
296 if most_recent_element is not None:
297 most_recent_element.next_element = o
298 self._most_recent_element = o 332 self._most_recent_element = o
299 parent.contents.append(o) 333 parent.contents.append(o)
300 334
335 if parent.next_sibling:
336 # This node is being inserted into an element that has
337 # already been parsed. Deal with any dangling references.
338 index = parent.contents.index(o)
339 if index == 0:
340 previous_element = parent
341 previous_sibling = None
342 else:
343 previous_element = previous_sibling = parent.contents[index-1]
344 if index == len(parent.contents)-1:
345 next_element = parent.next_sibling
346 next_sibling = None
347 else:
348 next_element = next_sibling = parent.contents[index+1]
349
350 o.previous_element = previous_element
351 if previous_element:
352 previous_element.next_element = o
353 o.next_element = next_element
354 if next_element:
355 next_element.previous_element = o
356 o.next_sibling = next_sibling
357 if next_sibling:
358 next_sibling.previous_sibling = o
359 o.previous_sibling = previous_sibling
360 if previous_sibling:
361 previous_sibling.next_sibling = o
362
301 def _popToTag(self, name, nsprefix=None, inclusivePop=True): 363 def _popToTag(self, name, nsprefix=None, inclusivePop=True):
302 """Pops the tag stack up to and including the most recent 364 """Pops the tag stack up to and including the most recent
303 instance of the given tag. If inclusivePop is false, pops the tag 365 instance of the given tag. If inclusivePop is false, pops the tag
@@ -367,9 +429,9 @@ class BeautifulSoup(Tag):
367 encoding_part = '' 429 encoding_part = ''
368 if eventual_encoding != None: 430 if eventual_encoding != None:
369 encoding_part = ' encoding="%s"' % eventual_encoding 431 encoding_part = ' encoding="%s"' % eventual_encoding
370 prefix = u'<?xml version="1.0"%s?>\n' % encoding_part 432 prefix = '<?xml version="1.0"%s?>\n' % encoding_part
371 else: 433 else:
372 prefix = u'' 434 prefix = ''
373 if not pretty_print: 435 if not pretty_print:
374 indent_level = None 436 indent_level = None
375 else: 437 else:
@@ -403,4 +465,4 @@ class FeatureNotFound(ValueError):
403if __name__ == '__main__': 465if __name__ == '__main__':
404 import sys 466 import sys
405 soup = BeautifulSoup(sys.stdin) 467 soup = BeautifulSoup(sys.stdin)
406 print soup.prettify() 468 print(soup.prettify())