diff options
Diffstat (limited to 'bitbake/lib/bs4/__init__.py')
-rw-r--r-- | bitbake/lib/bs4/__init__.py | 680 |
1 files changed, 526 insertions, 154 deletions
diff --git a/bitbake/lib/bs4/__init__.py b/bitbake/lib/bs4/__init__.py index e35725b86e..d8ad5e1dc1 100644 --- a/bitbake/lib/bs4/__init__.py +++ b/bitbake/lib/bs4/__init__.py | |||
@@ -1,65 +1,99 @@ | |||
1 | """Beautiful Soup | 1 | """Beautiful Soup Elixir and Tonic - "The Screen-Scraper's Friend". |
2 | Elixir and Tonic | 2 | |
3 | "The Screen-Scraper's Friend" | ||
4 | http://www.crummy.com/software/BeautifulSoup/ | 3 | http://www.crummy.com/software/BeautifulSoup/ |
5 | 4 | ||
6 | Beautiful Soup uses a pluggable XML or HTML parser to parse a | 5 | Beautiful Soup uses a pluggable XML or HTML parser to parse a |
7 | (possibly invalid) document into a tree representation. Beautiful Soup | 6 | (possibly invalid) document into a tree representation. Beautiful Soup |
8 | provides provides methods and Pythonic idioms that make it easy to | 7 | provides methods and Pythonic idioms that make it easy to navigate, |
9 | navigate, search, and modify the parse tree. | 8 | search, and modify the parse tree. |
10 | 9 | ||
11 | Beautiful Soup works with Python 2.6 and up. It works better if lxml | 10 | Beautiful Soup works with Python 3.6 and up. It works better if lxml |
12 | and/or html5lib is installed. | 11 | and/or html5lib is installed. |
13 | 12 | ||
14 | For more than you ever wanted to know about Beautiful Soup, see the | 13 | For more than you ever wanted to know about Beautiful Soup, see the |
15 | documentation: | 14 | documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/ |
16 | http://www.crummy.com/software/BeautifulSoup/bs4/doc/ | ||
17 | """ | 15 | """ |
18 | 16 | ||
19 | __author__ = "Leonard Richardson (leonardr@segfault.org)" | 17 | __author__ = "Leonard Richardson (leonardr@segfault.org)" |
20 | __version__ = "4.4.1" | 18 | __version__ = "4.12.3" |
21 | __copyright__ = "Copyright (c) 2004-2015 Leonard Richardson" | 19 | __copyright__ = "Copyright (c) 2004-2024 Leonard Richardson" |
20 | # Use of this source code is governed by the MIT license. | ||
22 | __license__ = "MIT" | 21 | __license__ = "MIT" |
23 | 22 | ||
24 | __all__ = ['BeautifulSoup'] | 23 | __all__ = ['BeautifulSoup'] |
25 | 24 | ||
25 | from collections import Counter | ||
26 | import os | 26 | import os |
27 | import re | 27 | import re |
28 | import sys | ||
29 | import traceback | ||
28 | import warnings | 30 | import warnings |
29 | 31 | ||
30 | from .builder import builder_registry, ParserRejectedMarkup | 32 | # The very first thing we do is give a useful error if someone is |
33 | # running this code under Python 2. | ||
34 | if sys.version_info.major < 3: | ||
35 | raise ImportError('You are trying to use a Python 3-specific version of Beautiful Soup under Python 2. This will not work. The final version of Beautiful Soup to support Python 2 was 4.9.3.') | ||
36 | |||
37 | from .builder import ( | ||
38 | builder_registry, | ||
39 | ParserRejectedMarkup, | ||
40 | XMLParsedAsHTMLWarning, | ||
41 | HTMLParserTreeBuilder | ||
42 | ) | ||
31 | from .dammit import UnicodeDammit | 43 | from .dammit import UnicodeDammit |
32 | from .element import ( | 44 | from .element import ( |
33 | CData, | 45 | CData, |
34 | Comment, | 46 | Comment, |
47 | CSS, | ||
35 | DEFAULT_OUTPUT_ENCODING, | 48 | DEFAULT_OUTPUT_ENCODING, |
36 | Declaration, | 49 | Declaration, |
37 | Doctype, | 50 | Doctype, |
38 | NavigableString, | 51 | NavigableString, |
39 | PageElement, | 52 | PageElement, |
40 | ProcessingInstruction, | 53 | ProcessingInstruction, |
54 | PYTHON_SPECIFIC_ENCODINGS, | ||
41 | ResultSet, | 55 | ResultSet, |
56 | Script, | ||
57 | Stylesheet, | ||
42 | SoupStrainer, | 58 | SoupStrainer, |
43 | Tag, | 59 | Tag, |
60 | TemplateString, | ||
44 | ) | 61 | ) |
45 | 62 | ||
46 | # The very first thing we do is give a useful error if someone is | 63 | # Define some custom warnings. |
47 | # running this code under Python 3 without converting it. | 64 | class GuessedAtParserWarning(UserWarning): |
48 | 'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'!='You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).' | 65 | """The warning issued when BeautifulSoup has to guess what parser to |
66 | use -- probably because no parser was specified in the constructor. | ||
67 | """ | ||
49 | 68 | ||
50 | class BeautifulSoup(Tag): | 69 | class MarkupResemblesLocatorWarning(UserWarning): |
70 | """The warning issued when BeautifulSoup is given 'markup' that | ||
71 | actually looks like a resource locator -- a URL or a path to a file | ||
72 | on disk. | ||
51 | """ | 73 | """ |
52 | This class defines the basic interface called by the tree builders. | ||
53 | 74 | ||
54 | These methods will be called by the parser: | 75 | |
55 | reset() | 76 | class BeautifulSoup(Tag): |
56 | feed(markup) | 77 | """A data structure representing a parsed HTML or XML document. |
78 | |||
79 | Most of the methods you'll call on a BeautifulSoup object are inherited from | ||
80 | PageElement or Tag. | ||
81 | |||
82 | Internally, this class defines the basic interface called by the | ||
83 | tree builders when converting an HTML/XML document into a data | ||
84 | structure. The interface abstracts away the differences between | ||
85 | parsers. To write a new tree builder, you'll need to understand | ||
86 | these methods as a whole. | ||
87 | |||
88 | These methods will be called by the BeautifulSoup constructor: | ||
89 | * reset() | ||
90 | * feed(markup) | ||
57 | 91 | ||
58 | The tree builder may call these methods from its feed() implementation: | 92 | The tree builder may call these methods from its feed() implementation: |
59 | handle_starttag(name, attrs) # See note about return value | 93 | * handle_starttag(name, attrs) # See note about return value |
60 | handle_endtag(name) | 94 | * handle_endtag(name) |
61 | handle_data(data) # Appends to the current data node | 95 | * handle_data(data) # Appends to the current data node |
62 | endData(containerClass=NavigableString) # Ends the current data node | 96 | * endData(containerClass) # Ends the current data node |
63 | 97 | ||
64 | No matter how complicated the underlying parser is, you should be | 98 | No matter how complicated the underlying parser is, you should be |
65 | able to build a tree using 'start tag' events, 'end tag' events, | 99 | able to build a tree using 'start tag' events, 'end tag' events, |
@@ -69,24 +103,77 @@ class BeautifulSoup(Tag): | |||
69 | like HTML's <br> tag), call handle_starttag and then | 103 | like HTML's <br> tag), call handle_starttag and then |
70 | handle_endtag. | 104 | handle_endtag. |
71 | """ | 105 | """ |
106 | |||
107 | # Since BeautifulSoup subclasses Tag, it's possible to treat it as | ||
108 | # a Tag with a .name. This name makes it clear the BeautifulSoup | ||
109 | # object isn't a real markup tag. | ||
72 | ROOT_TAG_NAME = '[document]' | 110 | ROOT_TAG_NAME = '[document]' |
73 | 111 | ||
74 | # If the end-user gives no indication which tree builder they | 112 | # If the end-user gives no indication which tree builder they |
75 | # want, look for one with these features. | 113 | # want, look for one with these features. |
76 | DEFAULT_BUILDER_FEATURES = ['html', 'fast'] | 114 | DEFAULT_BUILDER_FEATURES = ['html', 'fast'] |
77 | 115 | ||
116 | # A string containing all ASCII whitespace characters, used in | ||
117 | # endData() to detect data chunks that seem 'empty'. | ||
78 | ASCII_SPACES = '\x20\x0a\x09\x0c\x0d' | 118 | ASCII_SPACES = '\x20\x0a\x09\x0c\x0d' |
79 | 119 | ||
80 | NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nTo get rid of this warning, change this:\n\n BeautifulSoup([your markup])\n\nto this:\n\n BeautifulSoup([your markup], \"%(parser)s\")\n" | 120 | NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, pass the additional argument 'features=\"%(parser)s\"' to the BeautifulSoup constructor.\n" |
81 | 121 | ||
82 | def __init__(self, markup="", features=None, builder=None, | 122 | def __init__(self, markup="", features=None, builder=None, |
83 | parse_only=None, from_encoding=None, exclude_encodings=None, | 123 | parse_only=None, from_encoding=None, exclude_encodings=None, |
84 | **kwargs): | 124 | element_classes=None, **kwargs): |
85 | """The Soup object is initialized as the 'root tag', and the | 125 | """Constructor. |
86 | provided markup (which can be a string or a file-like object) | 126 | |
87 | is fed into the underlying parser.""" | 127 | :param markup: A string or a file-like object representing |
88 | 128 | markup to be parsed. | |
129 | |||
130 | :param features: Desirable features of the parser to be | ||
131 | used. This may be the name of a specific parser ("lxml", | ||
132 | "lxml-xml", "html.parser", or "html5lib") or it may be the | ||
133 | type of markup to be used ("html", "html5", "xml"). It's | ||
134 | recommended that you name a specific parser, so that | ||
135 | Beautiful Soup gives you the same results across platforms | ||
136 | and virtual environments. | ||
137 | |||
138 | :param builder: A TreeBuilder subclass to instantiate (or | ||
139 | instance to use) instead of looking one up based on | ||
140 | `features`. You only need to use this if you've implemented a | ||
141 | custom TreeBuilder. | ||
142 | |||
143 | :param parse_only: A SoupStrainer. Only parts of the document | ||
144 | matching the SoupStrainer will be considered. This is useful | ||
145 | when parsing part of a document that would otherwise be too | ||
146 | large to fit into memory. | ||
147 | |||
148 | :param from_encoding: A string indicating the encoding of the | ||
149 | document to be parsed. Pass this in if Beautiful Soup is | ||
150 | guessing wrongly about the document's encoding. | ||
151 | |||
152 | :param exclude_encodings: A list of strings indicating | ||
153 | encodings known to be wrong. Pass this in if you don't know | ||
154 | the document's encoding but you know Beautiful Soup's guess is | ||
155 | wrong. | ||
156 | |||
157 | :param element_classes: A dictionary mapping BeautifulSoup | ||
158 | classes like Tag and NavigableString, to other classes you'd | ||
159 | like to be instantiated instead as the parse tree is | ||
160 | built. This is useful for subclassing Tag or NavigableString | ||
161 | to modify default behavior. | ||
162 | |||
163 | :param kwargs: For backwards compatibility purposes, the | ||
164 | constructor accepts certain keyword arguments used in | ||
165 | Beautiful Soup 3. None of these arguments do anything in | ||
166 | Beautiful Soup 4; they will result in a warning and then be | ||
167 | ignored. | ||
168 | |||
169 | Apart from this, any keyword arguments passed into the | ||
170 | BeautifulSoup constructor are propagated to the TreeBuilder | ||
171 | constructor. This makes it possible to configure a | ||
172 | TreeBuilder by passing in arguments, not just by saying which | ||
173 | one to use. | ||
174 | """ | ||
89 | if 'convertEntities' in kwargs: | 175 | if 'convertEntities' in kwargs: |
176 | del kwargs['convertEntities'] | ||
90 | warnings.warn( | 177 | warnings.warn( |
91 | "BS4 does not respect the convertEntities argument to the " | 178 | "BS4 does not respect the convertEntities argument to the " |
92 | "BeautifulSoup constructor. Entities are always converted " | 179 | "BeautifulSoup constructor. Entities are always converted " |
@@ -125,10 +212,10 @@ class BeautifulSoup(Tag): | |||
125 | if old_name in kwargs: | 212 | if old_name in kwargs: |
126 | warnings.warn( | 213 | warnings.warn( |
127 | 'The "%s" argument to the BeautifulSoup constructor ' | 214 | 'The "%s" argument to the BeautifulSoup constructor ' |
128 | 'has been renamed to "%s."' % (old_name, new_name)) | 215 | 'has been renamed to "%s."' % (old_name, new_name), |
129 | value = kwargs[old_name] | 216 | DeprecationWarning, stacklevel=3 |
130 | del kwargs[old_name] | 217 | ) |
131 | return value | 218 | return kwargs.pop(old_name) |
132 | return None | 219 | return None |
133 | 220 | ||
134 | parse_only = parse_only or deprecated_argument( | 221 | parse_only = parse_only or deprecated_argument( |
@@ -137,13 +224,23 @@ class BeautifulSoup(Tag): | |||
137 | from_encoding = from_encoding or deprecated_argument( | 224 | from_encoding = from_encoding or deprecated_argument( |
138 | "fromEncoding", "from_encoding") | 225 | "fromEncoding", "from_encoding") |
139 | 226 | ||
140 | if len(kwargs) > 0: | 227 | if from_encoding and isinstance(markup, str): |
141 | arg = list(kwargs.keys()).pop() | 228 | warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.") |
142 | raise TypeError( | 229 | from_encoding = None |
143 | "__init__() got an unexpected keyword argument '%s'" % arg) | 230 | |
144 | 231 | self.element_classes = element_classes or dict() | |
145 | if builder is None: | 232 | |
146 | original_features = features | 233 | # We need this information to track whether or not the builder |
234 | # was specified well enough that we can omit the 'you need to | ||
235 | # specify a parser' warning. | ||
236 | original_builder = builder | ||
237 | original_features = features | ||
238 | |||
239 | if isinstance(builder, type): | ||
240 | # A builder class was passed in; it needs to be instantiated. | ||
241 | builder_class = builder | ||
242 | builder = None | ||
243 | elif builder is None: | ||
147 | if isinstance(features, str): | 244 | if isinstance(features, str): |
148 | features = [features] | 245 | features = [features] |
149 | if features is None or len(features) == 0: | 246 | if features is None or len(features) == 0: |
@@ -154,85 +251,227 @@ class BeautifulSoup(Tag): | |||
154 | "Couldn't find a tree builder with the features you " | 251 | "Couldn't find a tree builder with the features you " |
155 | "requested: %s. Do you need to install a parser library?" | 252 | "requested: %s. Do you need to install a parser library?" |
156 | % ",".join(features)) | 253 | % ",".join(features)) |
157 | builder = builder_class() | 254 | |
158 | if not (original_features == builder.NAME or | 255 | # At this point either we have a TreeBuilder instance in |
159 | original_features in builder.ALTERNATE_NAMES): | 256 | # builder, or we have a builder_class that we can instantiate |
257 | # with the remaining **kwargs. | ||
258 | if builder is None: | ||
259 | builder = builder_class(**kwargs) | ||
260 | if not original_builder and not ( | ||
261 | original_features == builder.NAME or | ||
262 | original_features in builder.ALTERNATE_NAMES | ||
263 | ) and markup: | ||
264 | # The user did not tell us which TreeBuilder to use, | ||
265 | # and we had to guess. Issue a warning. | ||
160 | if builder.is_xml: | 266 | if builder.is_xml: |
161 | markup_type = "XML" | 267 | markup_type = "XML" |
162 | else: | 268 | else: |
163 | markup_type = "HTML" | 269 | markup_type = "HTML" |
164 | warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict( | ||
165 | parser=builder.NAME, | ||
166 | markup_type=markup_type)) | ||
167 | 270 | ||
271 | # This code adapted from warnings.py so that we get the same line | ||
272 | # of code as our warnings.warn() call gets, even if the answer is wrong | ||
273 | # (as it may be in a multithreading situation). | ||
274 | caller = None | ||
275 | try: | ||
276 | caller = sys._getframe(1) | ||
277 | except ValueError: | ||
278 | pass | ||
279 | if caller: | ||
280 | globals = caller.f_globals | ||
281 | line_number = caller.f_lineno | ||
282 | else: | ||
283 | globals = sys.__dict__ | ||
284 | line_number= 1 | ||
285 | filename = globals.get('__file__') | ||
286 | if filename: | ||
287 | fnl = filename.lower() | ||
288 | if fnl.endswith((".pyc", ".pyo")): | ||
289 | filename = filename[:-1] | ||
290 | if filename: | ||
291 | # If there is no filename at all, the user is most likely in a REPL, | ||
292 | # and the warning is not necessary. | ||
293 | values = dict( | ||
294 | filename=filename, | ||
295 | line_number=line_number, | ||
296 | parser=builder.NAME, | ||
297 | markup_type=markup_type | ||
298 | ) | ||
299 | warnings.warn( | ||
300 | self.NO_PARSER_SPECIFIED_WARNING % values, | ||
301 | GuessedAtParserWarning, stacklevel=2 | ||
302 | ) | ||
303 | else: | ||
304 | if kwargs: | ||
305 | warnings.warn("Keyword arguments to the BeautifulSoup constructor will be ignored. These would normally be passed into the TreeBuilder constructor, but a TreeBuilder instance was passed in as `builder`.") | ||
306 | |||
168 | self.builder = builder | 307 | self.builder = builder |
169 | self.is_xml = builder.is_xml | 308 | self.is_xml = builder.is_xml |
170 | self.builder.soup = self | 309 | self.known_xml = self.is_xml |
171 | 310 | self._namespaces = dict() | |
172 | self.parse_only = parse_only | 311 | self.parse_only = parse_only |
173 | 312 | ||
174 | if hasattr(markup, 'read'): # It's a file-type object. | 313 | if hasattr(markup, 'read'): # It's a file-type object. |
175 | markup = markup.read() | 314 | markup = markup.read() |
176 | elif len(markup) <= 256: | 315 | elif len(markup) <= 256 and ( |
177 | # Print out warnings for a couple beginner problems | 316 | (isinstance(markup, bytes) and not b'<' in markup) |
317 | or (isinstance(markup, str) and not '<' in markup) | ||
318 | ): | ||
319 | # Issue warnings for a couple beginner problems | ||
178 | # involving passing non-markup to Beautiful Soup. | 320 | # involving passing non-markup to Beautiful Soup. |
179 | # Beautiful Soup will still parse the input as markup, | 321 | # Beautiful Soup will still parse the input as markup, |
180 | # just in case that's what the user really wants. | 322 | # since that is sometimes the intended behavior. |
181 | if (isinstance(markup, str) | 323 | if not self._markup_is_url(markup): |
182 | and not os.path.supports_unicode_filenames): | 324 | self._markup_resembles_filename(markup) |
183 | possible_filename = markup.encode("utf8") | ||
184 | else: | ||
185 | possible_filename = markup | ||
186 | is_file = False | ||
187 | try: | ||
188 | is_file = os.path.exists(possible_filename) | ||
189 | except Exception as e: | ||
190 | # This is almost certainly a problem involving | ||
191 | # characters not valid in filenames on this | ||
192 | # system. Just let it go. | ||
193 | pass | ||
194 | if is_file: | ||
195 | if isinstance(markup, str): | ||
196 | markup = markup.encode("utf8") | ||
197 | warnings.warn( | ||
198 | '"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup) | ||
199 | if markup[:5] == "http:" or markup[:6] == "https:": | ||
200 | # TODO: This is ugly but I couldn't get it to work in | ||
201 | # Python 3 otherwise. | ||
202 | if ((isinstance(markup, bytes) and not b' ' in markup) | ||
203 | or (isinstance(markup, str) and not ' ' in markup)): | ||
204 | if isinstance(markup, str): | ||
205 | markup = markup.encode("utf8") | ||
206 | warnings.warn( | ||
207 | '"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup) | ||
208 | 325 | ||
326 | rejections = [] | ||
327 | success = False | ||
209 | for (self.markup, self.original_encoding, self.declared_html_encoding, | 328 | for (self.markup, self.original_encoding, self.declared_html_encoding, |
210 | self.contains_replacement_characters) in ( | 329 | self.contains_replacement_characters) in ( |
211 | self.builder.prepare_markup( | 330 | self.builder.prepare_markup( |
212 | markup, from_encoding, exclude_encodings=exclude_encodings)): | 331 | markup, from_encoding, exclude_encodings=exclude_encodings)): |
213 | self.reset() | 332 | self.reset() |
333 | self.builder.initialize_soup(self) | ||
214 | try: | 334 | try: |
215 | self._feed() | 335 | self._feed() |
336 | success = True | ||
216 | break | 337 | break |
217 | except ParserRejectedMarkup: | 338 | except ParserRejectedMarkup as e: |
339 | rejections.append(e) | ||
218 | pass | 340 | pass |
219 | 341 | ||
342 | if not success: | ||
343 | other_exceptions = [str(e) for e in rejections] | ||
344 | raise ParserRejectedMarkup( | ||
345 | "The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.\n\nOriginal exception(s) from parser:\n " + "\n ".join(other_exceptions) | ||
346 | ) | ||
347 | |||
220 | # Clear out the markup and remove the builder's circular | 348 | # Clear out the markup and remove the builder's circular |
221 | # reference to this object. | 349 | # reference to this object. |
222 | self.markup = None | 350 | self.markup = None |
223 | self.builder.soup = None | 351 | self.builder.soup = None |
224 | 352 | ||
225 | def __copy__(self): | 353 | def _clone(self): |
226 | return type(self)(self.encode(), builder=self.builder) | 354 | """Create a new BeautifulSoup object with the same TreeBuilder, |
355 | but not associated with any markup. | ||
356 | |||
357 | This is the first step of the deepcopy process. | ||
358 | """ | ||
359 | clone = type(self)("", None, self.builder) | ||
227 | 360 | ||
361 | # Keep track of the encoding of the original document, | ||
362 | # since we won't be parsing it again. | ||
363 | clone.original_encoding = self.original_encoding | ||
364 | return clone | ||
365 | |||
228 | def __getstate__(self): | 366 | def __getstate__(self): |
229 | # Frequently a tree builder can't be pickled. | 367 | # Frequently a tree builder can't be pickled. |
230 | d = dict(self.__dict__) | 368 | d = dict(self.__dict__) |
231 | if 'builder' in d and not self.builder.picklable: | 369 | if 'builder' in d and d['builder'] is not None and not self.builder.picklable: |
232 | del d['builder'] | 370 | d['builder'] = type(self.builder) |
371 | # Store the contents as a Unicode string. | ||
372 | d['contents'] = [] | ||
373 | d['markup'] = self.decode() | ||
374 | |||
375 | # If _most_recent_element is present, it's a Tag object left | ||
376 | # over from initial parse. It might not be picklable and we | ||
377 | # don't need it. | ||
378 | if '_most_recent_element' in d: | ||
379 | del d['_most_recent_element'] | ||
233 | return d | 380 | return d |
234 | 381 | ||
382 | def __setstate__(self, state): | ||
383 | # If necessary, restore the TreeBuilder by looking it up. | ||
384 | self.__dict__ = state | ||
385 | if isinstance(self.builder, type): | ||
386 | self.builder = self.builder() | ||
387 | elif not self.builder: | ||
388 | # We don't know which builder was used to build this | ||
389 | # parse tree, so use a default we know is always available. | ||
390 | self.builder = HTMLParserTreeBuilder() | ||
391 | self.builder.soup = self | ||
392 | self.reset() | ||
393 | self._feed() | ||
394 | return state | ||
395 | |||
396 | |||
397 | @classmethod | ||
398 | def _decode_markup(cls, markup): | ||
399 | """Ensure `markup` is bytes so it's safe to send into warnings.warn. | ||
400 | |||
401 | TODO: warnings.warn had this problem back in 2010 but it might not | ||
402 | anymore. | ||
403 | """ | ||
404 | if isinstance(markup, bytes): | ||
405 | decoded = markup.decode('utf-8', 'replace') | ||
406 | else: | ||
407 | decoded = markup | ||
408 | return decoded | ||
409 | |||
410 | @classmethod | ||
411 | def _markup_is_url(cls, markup): | ||
412 | """Error-handling method to raise a warning if incoming markup looks | ||
413 | like a URL. | ||
414 | |||
415 | :param markup: A string. | ||
416 | :return: Whether or not the markup resembles a URL | ||
417 | closely enough to justify a warning. | ||
418 | """ | ||
419 | if isinstance(markup, bytes): | ||
420 | space = b' ' | ||
421 | cant_start_with = (b"http:", b"https:") | ||
422 | elif isinstance(markup, str): | ||
423 | space = ' ' | ||
424 | cant_start_with = ("http:", "https:") | ||
425 | else: | ||
426 | return False | ||
427 | |||
428 | if any(markup.startswith(prefix) for prefix in cant_start_with): | ||
429 | if not space in markup: | ||
430 | warnings.warn( | ||
431 | 'The input looks more like a URL than markup. You may want to use' | ||
432 | ' an HTTP client like requests to get the document behind' | ||
433 | ' the URL, and feed that document to Beautiful Soup.', | ||
434 | MarkupResemblesLocatorWarning, | ||
435 | stacklevel=3 | ||
436 | ) | ||
437 | return True | ||
438 | return False | ||
439 | |||
440 | @classmethod | ||
441 | def _markup_resembles_filename(cls, markup): | ||
442 | """Error-handling method to raise a warning if incoming markup | ||
443 | resembles a filename. | ||
444 | |||
445 | :param markup: A bytestring or string. | ||
446 | :return: Whether or not the markup resembles a filename | ||
447 | closely enough to justify a warning. | ||
448 | """ | ||
449 | path_characters = '/\\' | ||
450 | extensions = ['.html', '.htm', '.xml', '.xhtml', '.txt'] | ||
451 | if isinstance(markup, bytes): | ||
452 | path_characters = path_characters.encode("utf8") | ||
453 | extensions = [x.encode('utf8') for x in extensions] | ||
454 | filelike = False | ||
455 | if any(x in markup for x in path_characters): | ||
456 | filelike = True | ||
457 | else: | ||
458 | lower = markup.lower() | ||
459 | if any(lower.endswith(ext) for ext in extensions): | ||
460 | filelike = True | ||
461 | if filelike: | ||
462 | warnings.warn( | ||
463 | 'The input looks more like a filename than markup. You may' | ||
464 | ' want to open this file and pass the filehandle into' | ||
465 | ' Beautiful Soup.', | ||
466 | MarkupResemblesLocatorWarning, stacklevel=3 | ||
467 | ) | ||
468 | return True | ||
469 | return False | ||
470 | |||
235 | def _feed(self): | 471 | def _feed(self): |
472 | """Internal method that parses previously set markup, creating a large | ||
473 | number of Tag and NavigableString objects. | ||
474 | """ | ||
236 | # Convert the document to Unicode. | 475 | # Convert the document to Unicode. |
237 | self.builder.reset() | 476 | self.builder.reset() |
238 | 477 | ||
@@ -243,48 +482,111 @@ class BeautifulSoup(Tag): | |||
243 | self.popTag() | 482 | self.popTag() |
244 | 483 | ||
245 | def reset(self): | 484 | def reset(self): |
485 | """Reset this object to a state as though it had never parsed any | ||
486 | markup. | ||
487 | """ | ||
246 | Tag.__init__(self, self, self.builder, self.ROOT_TAG_NAME) | 488 | Tag.__init__(self, self, self.builder, self.ROOT_TAG_NAME) |
247 | self.hidden = 1 | 489 | self.hidden = 1 |
248 | self.builder.reset() | 490 | self.builder.reset() |
249 | self.current_data = [] | 491 | self.current_data = [] |
250 | self.currentTag = None | 492 | self.currentTag = None |
251 | self.tagStack = [] | 493 | self.tagStack = [] |
494 | self.open_tag_counter = Counter() | ||
252 | self.preserve_whitespace_tag_stack = [] | 495 | self.preserve_whitespace_tag_stack = [] |
496 | self.string_container_stack = [] | ||
497 | self._most_recent_element = None | ||
253 | self.pushTag(self) | 498 | self.pushTag(self) |
254 | 499 | ||
255 | def new_tag(self, name, namespace=None, nsprefix=None, **attrs): | 500 | def new_tag(self, name, namespace=None, nsprefix=None, attrs={}, |
256 | """Create a new tag associated with this soup.""" | 501 | sourceline=None, sourcepos=None, **kwattrs): |
257 | return Tag(None, self.builder, name, namespace, nsprefix, attrs) | 502 | """Create a new Tag associated with this BeautifulSoup object. |
503 | |||
504 | :param name: The name of the new Tag. | ||
505 | :param namespace: The URI of the new Tag's XML namespace, if any. | ||
506 | :param prefix: The prefix for the new Tag's XML namespace, if any. | ||
507 | :param attrs: A dictionary of this Tag's attribute values; can | ||
508 | be used instead of `kwattrs` for attributes like 'class' | ||
509 | that are reserved words in Python. | ||
510 | :param sourceline: The line number where this tag was | ||
511 | (purportedly) found in its source document. | ||
512 | :param sourcepos: The character position within `sourceline` where this | ||
513 | tag was (purportedly) found. | ||
514 | :param kwattrs: Keyword arguments for the new Tag's attribute values. | ||
258 | 515 | ||
259 | def new_string(self, s, subclass=NavigableString): | 516 | """ |
260 | """Create a new NavigableString associated with this soup.""" | 517 | kwattrs.update(attrs) |
261 | return subclass(s) | 518 | return self.element_classes.get(Tag, Tag)( |
519 | None, self.builder, name, namespace, nsprefix, kwattrs, | ||
520 | sourceline=sourceline, sourcepos=sourcepos | ||
521 | ) | ||
522 | |||
523 | def string_container(self, base_class=None): | ||
524 | container = base_class or NavigableString | ||
525 | |||
526 | # There may be a general override of NavigableString. | ||
527 | container = self.element_classes.get( | ||
528 | container, container | ||
529 | ) | ||
530 | |||
531 | # On top of that, we may be inside a tag that needs a special | ||
532 | # container class. | ||
533 | if self.string_container_stack and container is NavigableString: | ||
534 | container = self.builder.string_containers.get( | ||
535 | self.string_container_stack[-1].name, container | ||
536 | ) | ||
537 | return container | ||
538 | |||
539 | def new_string(self, s, subclass=None): | ||
540 | """Create a new NavigableString associated with this BeautifulSoup | ||
541 | object. | ||
542 | """ | ||
543 | container = self.string_container(subclass) | ||
544 | return container(s) | ||
262 | 545 | ||
263 | def insert_before(self, successor): | 546 | def insert_before(self, *args): |
547 | """This method is part of the PageElement API, but `BeautifulSoup` doesn't implement | ||
548 | it because there is nothing before or after it in the parse tree. | ||
549 | """ | ||
264 | raise NotImplementedError("BeautifulSoup objects don't support insert_before().") | 550 | raise NotImplementedError("BeautifulSoup objects don't support insert_before().") |
265 | 551 | ||
266 | def insert_after(self, successor): | 552 | def insert_after(self, *args): |
553 | """This method is part of the PageElement API, but `BeautifulSoup` doesn't implement | ||
554 | it because there is nothing before or after it in the parse tree. | ||
555 | """ | ||
267 | raise NotImplementedError("BeautifulSoup objects don't support insert_after().") | 556 | raise NotImplementedError("BeautifulSoup objects don't support insert_after().") |
268 | 557 | ||
269 | def popTag(self): | 558 | def popTag(self): |
559 | """Internal method called by _popToTag when a tag is closed.""" | ||
270 | tag = self.tagStack.pop() | 560 | tag = self.tagStack.pop() |
561 | if tag.name in self.open_tag_counter: | ||
562 | self.open_tag_counter[tag.name] -= 1 | ||
271 | if self.preserve_whitespace_tag_stack and tag == self.preserve_whitespace_tag_stack[-1]: | 563 | if self.preserve_whitespace_tag_stack and tag == self.preserve_whitespace_tag_stack[-1]: |
272 | self.preserve_whitespace_tag_stack.pop() | 564 | self.preserve_whitespace_tag_stack.pop() |
273 | #print "Pop", tag.name | 565 | if self.string_container_stack and tag == self.string_container_stack[-1]: |
566 | self.string_container_stack.pop() | ||
567 | #print("Pop", tag.name) | ||
274 | if self.tagStack: | 568 | if self.tagStack: |
275 | self.currentTag = self.tagStack[-1] | 569 | self.currentTag = self.tagStack[-1] |
276 | return self.currentTag | 570 | return self.currentTag |
277 | 571 | ||
278 | def pushTag(self, tag): | 572 | def pushTag(self, tag): |
279 | #print "Push", tag.name | 573 | """Internal method called by handle_starttag when a tag is opened.""" |
280 | if self.currentTag: | 574 | #print("Push", tag.name) |
575 | if self.currentTag is not None: | ||
281 | self.currentTag.contents.append(tag) | 576 | self.currentTag.contents.append(tag) |
282 | self.tagStack.append(tag) | 577 | self.tagStack.append(tag) |
283 | self.currentTag = self.tagStack[-1] | 578 | self.currentTag = self.tagStack[-1] |
579 | if tag.name != self.ROOT_TAG_NAME: | ||
580 | self.open_tag_counter[tag.name] += 1 | ||
284 | if tag.name in self.builder.preserve_whitespace_tags: | 581 | if tag.name in self.builder.preserve_whitespace_tags: |
285 | self.preserve_whitespace_tag_stack.append(tag) | 582 | self.preserve_whitespace_tag_stack.append(tag) |
583 | if tag.name in self.builder.string_containers: | ||
584 | self.string_container_stack.append(tag) | ||
286 | 585 | ||
287 | def endData(self, containerClass=NavigableString): | 586 | def endData(self, containerClass=None): |
587 | """Method called by the TreeBuilder when the end of a data segment | ||
588 | occurs. | ||
589 | """ | ||
288 | if self.current_data: | 590 | if self.current_data: |
289 | current_data = ''.join(self.current_data) | 591 | current_data = ''.join(self.current_data) |
290 | # If whitespace is not preserved, and this string contains | 592 | # If whitespace is not preserved, and this string contains |
@@ -311,61 +613,93 @@ class BeautifulSoup(Tag): | |||
311 | not self.parse_only.search(current_data)): | 613 | not self.parse_only.search(current_data)): |
312 | return | 614 | return |
313 | 615 | ||
616 | containerClass = self.string_container(containerClass) | ||
314 | o = containerClass(current_data) | 617 | o = containerClass(current_data) |
315 | self.object_was_parsed(o) | 618 | self.object_was_parsed(o) |
316 | 619 | ||
317 | def object_was_parsed(self, o, parent=None, most_recent_element=None): | 620 | def object_was_parsed(self, o, parent=None, most_recent_element=None): |
318 | """Add an object to the parse tree.""" | 621 | """Method called by the TreeBuilder to integrate an object into the parse tree.""" |
319 | parent = parent or self.currentTag | 622 | if parent is None: |
320 | previous_element = most_recent_element or self._most_recent_element | 623 | parent = self.currentTag |
624 | if most_recent_element is not None: | ||
625 | previous_element = most_recent_element | ||
626 | else: | ||
627 | previous_element = self._most_recent_element | ||
321 | 628 | ||
322 | next_element = previous_sibling = next_sibling = None | 629 | next_element = previous_sibling = next_sibling = None |
323 | if isinstance(o, Tag): | 630 | if isinstance(o, Tag): |
324 | next_element = o.next_element | 631 | next_element = o.next_element |
325 | next_sibling = o.next_sibling | 632 | next_sibling = o.next_sibling |
326 | previous_sibling = o.previous_sibling | 633 | previous_sibling = o.previous_sibling |
327 | if not previous_element: | 634 | if previous_element is None: |
328 | previous_element = o.previous_element | 635 | previous_element = o.previous_element |
329 | 636 | ||
637 | fix = parent.next_element is not None | ||
638 | |||
330 | o.setup(parent, previous_element, next_element, previous_sibling, next_sibling) | 639 | o.setup(parent, previous_element, next_element, previous_sibling, next_sibling) |
331 | 640 | ||
332 | self._most_recent_element = o | 641 | self._most_recent_element = o |
333 | parent.contents.append(o) | 642 | parent.contents.append(o) |
334 | 643 | ||
335 | if parent.next_sibling: | 644 | # Check if we are inserting into an already parsed node. |
336 | # This node is being inserted into an element that has | 645 | if fix: |
337 | # already been parsed. Deal with any dangling references. | 646 | self._linkage_fixer(parent) |
338 | index = parent.contents.index(o) | 647 | |
339 | if index == 0: | 648 | def _linkage_fixer(self, el): |
340 | previous_element = parent | 649 | """Make sure linkage of this fragment is sound.""" |
341 | previous_sibling = None | 650 | |
342 | else: | 651 | first = el.contents[0] |
343 | previous_element = previous_sibling = parent.contents[index-1] | 652 | child = el.contents[-1] |
344 | if index == len(parent.contents)-1: | 653 | descendant = child |
345 | next_element = parent.next_sibling | 654 | |
346 | next_sibling = None | 655 | if child is first and el.parent is not None: |
347 | else: | 656 | # Parent should be linked to first child |
348 | next_element = next_sibling = parent.contents[index+1] | 657 | el.next_element = child |
349 | 658 | # We are no longer linked to whatever this element is | |
350 | o.previous_element = previous_element | 659 | prev_el = child.previous_element |
351 | if previous_element: | 660 | if prev_el is not None and prev_el is not el: |
352 | previous_element.next_element = o | 661 | prev_el.next_element = None |
353 | o.next_element = next_element | 662 | # First child should be linked to the parent, and no previous siblings. |
354 | if next_element: | 663 | child.previous_element = el |
355 | next_element.previous_element = o | 664 | child.previous_sibling = None |
356 | o.next_sibling = next_sibling | 665 | |
357 | if next_sibling: | 666 | # We have no sibling as we've been appended as the last. |
358 | next_sibling.previous_sibling = o | 667 | child.next_sibling = None |
359 | o.previous_sibling = previous_sibling | 668 | |
360 | if previous_sibling: | 669 | # This index is a tag, dig deeper for a "last descendant" |
361 | previous_sibling.next_sibling = o | 670 | if isinstance(child, Tag) and child.contents: |
671 | descendant = child._last_descendant(False) | ||
672 | |||
673 | # As the final step, link last descendant. It should be linked | ||
674 | # to the parent's next sibling (if found), else walk up the chain | ||
675 | # and find a parent with a sibling. It should have no next sibling. | ||
676 | descendant.next_element = None | ||
677 | descendant.next_sibling = None | ||
678 | target = el | ||
679 | while True: | ||
680 | if target is None: | ||
681 | break | ||
682 | elif target.next_sibling is not None: | ||
683 | descendant.next_element = target.next_sibling | ||
684 | target.next_sibling.previous_element = child | ||
685 | break | ||
686 | target = target.parent | ||
362 | 687 | ||
363 | def _popToTag(self, name, nsprefix=None, inclusivePop=True): | 688 | def _popToTag(self, name, nsprefix=None, inclusivePop=True): |
364 | """Pops the tag stack up to and including the most recent | 689 | """Pops the tag stack up to and including the most recent |
365 | instance of the given tag. If inclusivePop is false, pops the tag | 690 | instance of the given tag. |
366 | stack up to but *not* including the most recent instqance of | 691 | |
367 | the given tag.""" | 692 | If there are no open tags with the given name, nothing will be |
368 | #print "Popping to %s" % name | 693 | popped. |
694 | |||
695 | :param name: Pop up to the most recent tag with this name. | ||
696 | :param nsprefix: The namespace prefix that goes with `name`. | ||
697 | :param inclusivePop: It this is false, pops the tag stack up | ||
698 | to but *not* including the most recent instqance of the | ||
699 | given tag. | ||
700 | |||
701 | """ | ||
702 | #print("Popping to %s" % name) | ||
369 | if name == self.ROOT_TAG_NAME: | 703 | if name == self.ROOT_TAG_NAME: |
370 | # The BeautifulSoup object itself can never be popped. | 704 | # The BeautifulSoup object itself can never be popped. |
371 | return | 705 | return |
@@ -374,6 +708,8 @@ class BeautifulSoup(Tag): | |||
374 | 708 | ||
375 | stack_size = len(self.tagStack) | 709 | stack_size = len(self.tagStack) |
376 | for i in range(stack_size - 1, 0, -1): | 710 | for i in range(stack_size - 1, 0, -1): |
711 | if not self.open_tag_counter.get(name): | ||
712 | break | ||
377 | t = self.tagStack[i] | 713 | t = self.tagStack[i] |
378 | if (name == t.name and nsprefix == t.prefix): | 714 | if (name == t.name and nsprefix == t.prefix): |
379 | if inclusivePop: | 715 | if inclusivePop: |
@@ -383,16 +719,26 @@ class BeautifulSoup(Tag): | |||
383 | 719 | ||
384 | return most_recently_popped | 720 | return most_recently_popped |
385 | 721 | ||
386 | def handle_starttag(self, name, namespace, nsprefix, attrs): | 722 | def handle_starttag(self, name, namespace, nsprefix, attrs, sourceline=None, |
387 | """Push a start tag on to the stack. | 723 | sourcepos=None, namespaces=None): |
388 | 724 | """Called by the tree builder when a new tag is encountered. | |
389 | If this method returns None, the tag was rejected by the | 725 | |
390 | SoupStrainer. You should proceed as if the tag had not occured | 726 | :param name: Name of the tag. |
727 | :param nsprefix: Namespace prefix for the tag. | ||
728 | :param attrs: A dictionary of attribute values. | ||
729 | :param sourceline: The line number where this tag was found in its | ||
730 | source document. | ||
731 | :param sourcepos: The character position within `sourceline` where this | ||
732 | tag was found. | ||
733 | :param namespaces: A dictionary of all namespace prefix mappings | ||
734 | currently in scope in the document. | ||
735 | |||
736 | If this method returns None, the tag was rejected by an active | ||
737 | SoupStrainer. You should proceed as if the tag had not occurred | ||
391 | in the document. For instance, if this was a self-closing tag, | 738 | in the document. For instance, if this was a self-closing tag, |
392 | don't call handle_endtag. | 739 | don't call handle_endtag. |
393 | """ | 740 | """ |
394 | 741 | # print("Start tag %s: %s" % (name, attrs)) | |
395 | # print "Start tag %s: %s" % (name, attrs) | ||
396 | self.endData() | 742 | self.endData() |
397 | 743 | ||
398 | if (self.parse_only and len(self.tagStack) <= 1 | 744 | if (self.parse_only and len(self.tagStack) <= 1 |
@@ -400,34 +746,54 @@ class BeautifulSoup(Tag): | |||
400 | or not self.parse_only.search_tag(name, attrs))): | 746 | or not self.parse_only.search_tag(name, attrs))): |
401 | return None | 747 | return None |
402 | 748 | ||
403 | tag = Tag(self, self.builder, name, namespace, nsprefix, attrs, | 749 | tag = self.element_classes.get(Tag, Tag)( |
404 | self.currentTag, self._most_recent_element) | 750 | self, self.builder, name, namespace, nsprefix, attrs, |
751 | self.currentTag, self._most_recent_element, | ||
752 | sourceline=sourceline, sourcepos=sourcepos, | ||
753 | namespaces=namespaces | ||
754 | ) | ||
405 | if tag is None: | 755 | if tag is None: |
406 | return tag | 756 | return tag |
407 | if self._most_recent_element: | 757 | if self._most_recent_element is not None: |
408 | self._most_recent_element.next_element = tag | 758 | self._most_recent_element.next_element = tag |
409 | self._most_recent_element = tag | 759 | self._most_recent_element = tag |
410 | self.pushTag(tag) | 760 | self.pushTag(tag) |
411 | return tag | 761 | return tag |
412 | 762 | ||
413 | def handle_endtag(self, name, nsprefix=None): | 763 | def handle_endtag(self, name, nsprefix=None): |
414 | #print "End tag: " + name | 764 | """Called by the tree builder when an ending tag is encountered. |
765 | |||
766 | :param name: Name of the tag. | ||
767 | :param nsprefix: Namespace prefix for the tag. | ||
768 | """ | ||
769 | #print("End tag: " + name) | ||
415 | self.endData() | 770 | self.endData() |
416 | self._popToTag(name, nsprefix) | 771 | self._popToTag(name, nsprefix) |
417 | 772 | ||
418 | def handle_data(self, data): | 773 | def handle_data(self, data): |
774 | """Called by the tree builder when a chunk of textual data is encountered.""" | ||
419 | self.current_data.append(data) | 775 | self.current_data.append(data) |
420 | 776 | ||
421 | def decode(self, pretty_print=False, | 777 | def decode(self, pretty_print=False, |
422 | eventual_encoding=DEFAULT_OUTPUT_ENCODING, | 778 | eventual_encoding=DEFAULT_OUTPUT_ENCODING, |
423 | formatter="minimal"): | 779 | formatter="minimal", iterator=None): |
424 | """Returns a string or Unicode representation of this document. | 780 | """Returns a string or Unicode representation of the parse tree |
425 | To get Unicode, pass None for encoding.""" | 781 | as an HTML or XML document. |
426 | 782 | ||
783 | :param pretty_print: If this is True, indentation will be used to | ||
784 | make the document more readable. | ||
785 | :param eventual_encoding: The encoding of the final document. | ||
786 | If this is None, the document will be a Unicode string. | ||
787 | """ | ||
427 | if self.is_xml: | 788 | if self.is_xml: |
428 | # Print the XML declaration | 789 | # Print the XML declaration |
429 | encoding_part = '' | 790 | encoding_part = '' |
430 | if eventual_encoding is not None: | 791 | if eventual_encoding in PYTHON_SPECIFIC_ENCODINGS: |
792 | # This is a special Python encoding; it can't actually | ||
793 | # go into an XML document because it means nothing | ||
794 | # outside of Python. | ||
795 | eventual_encoding = None | ||
796 | if eventual_encoding != None: | ||
431 | encoding_part = ' encoding="%s"' % eventual_encoding | 797 | encoding_part = ' encoding="%s"' % eventual_encoding |
432 | prefix = '<?xml version="1.0"%s?>\n' % encoding_part | 798 | prefix = '<?xml version="1.0"%s?>\n' % encoding_part |
433 | else: | 799 | else: |
@@ -437,9 +803,9 @@ class BeautifulSoup(Tag): | |||
437 | else: | 803 | else: |
438 | indent_level = 0 | 804 | indent_level = 0 |
439 | return prefix + super(BeautifulSoup, self).decode( | 805 | return prefix + super(BeautifulSoup, self).decode( |
440 | indent_level, eventual_encoding, formatter) | 806 | indent_level, eventual_encoding, formatter, iterator) |
441 | 807 | ||
442 | # Alias to make it easier to type import: 'from bs4 import _soup' | 808 | # Aliases to make it easier to get started quickly, e.g. 'from bs4 import _soup' |
443 | _s = BeautifulSoup | 809 | _s = BeautifulSoup |
444 | _soup = BeautifulSoup | 810 | _soup = BeautifulSoup |
445 | 811 | ||
@@ -450,19 +816,25 @@ class BeautifulStoneSoup(BeautifulSoup): | |||
450 | kwargs['features'] = 'xml' | 816 | kwargs['features'] = 'xml' |
451 | warnings.warn( | 817 | warnings.warn( |
452 | 'The BeautifulStoneSoup class is deprecated. Instead of using ' | 818 | 'The BeautifulStoneSoup class is deprecated. Instead of using ' |
453 | 'it, pass features="xml" into the BeautifulSoup constructor.') | 819 | 'it, pass features="xml" into the BeautifulSoup constructor.', |
820 | DeprecationWarning, stacklevel=2 | ||
821 | ) | ||
454 | super(BeautifulStoneSoup, self).__init__(*args, **kwargs) | 822 | super(BeautifulStoneSoup, self).__init__(*args, **kwargs) |
455 | 823 | ||
456 | 824 | ||
457 | class StopParsing(Exception): | 825 | class StopParsing(Exception): |
826 | """Exception raised by a TreeBuilder if it's unable to continue parsing.""" | ||
458 | pass | 827 | pass |
459 | 828 | ||
460 | class FeatureNotFound(ValueError): | 829 | class FeatureNotFound(ValueError): |
830 | """Exception raised by the BeautifulSoup constructor if no parser with the | ||
831 | requested features is found. | ||
832 | """ | ||
461 | pass | 833 | pass |
462 | 834 | ||
463 | 835 | ||
464 | #By default, act as an HTML pretty-printer. | 836 | #If this file is run as a script, act as an HTML pretty-printer. |
465 | if __name__ == '__main__': | 837 | if __name__ == '__main__': |
466 | import sys | 838 | import sys |
467 | soup = BeautifulSoup(sys.stdin) | 839 | soup = BeautifulSoup(sys.stdin) |
468 | print(soup.prettify()) | 840 | print((soup.prettify())) |