bs4: Update to 4.12.3 from 4.4.1

It makes sense to switch to a more recent version and keep up to date with upstream changes and things like new python version support. (Bitbake rev: f5462156036e71911c66d07dbf3303cde862785b) Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
author: Richard Purdie <richard.purdie@linuxfoundation.org> 2024-05-31 12:04:03 +0100
committer: Richard Purdie <richard.purdie@linuxfoundation.org> 2024-05-31 12:43:18 +0100
commit: 12fa81e8d67f0d9755decde5c5b766f56b2af8db (patch)
tree: de58af9a17e4760de36091d525d7eba8bc6f1578 /bitbake/lib/bs4/__init__.py
parent: 99ff46cc9bb12619af55c892452cee3b90a545f0 (diff)
download: poky-12fa81e8d67f0d9755decde5c5b766f56b2af8db.tar.gz
1 files changed, 526 insertions, 154 deletions
diff --git a/bitbake/lib/bs4/__init__.py b/bitbake/lib/bs4/__init__.py
index e35725b86e..d8ad5e1dc1 100644
--- a/bitbake/lib/bs4/__init__.py
+++ b/bitbake/lib/bs4/__init__.py
@@ -1,65 +1,99 @@
-"""Beautiful Soup
+"""Beautiful Soup Elixir and Tonic - "The Screen-Scraper's Friend".
-Elixir and Tonic
-"The Screen-Scraper's Friend"
 http://www.crummy.com/software/BeautifulSoup/
 Beautiful Soup uses a pluggable XML or HTML parser to parse a
 (possibly invalid) document into a tree representation. Beautiful Soup
-provides provides methods and Pythonic idioms that make it easy to
+provides methods and Pythonic idioms that make it easy to navigate,
-navigate, search, and modify the parse tree.
+search, and modify the parse tree.
-Beautiful Soup works with Python 2.6 and up. It works better if lxml
+Beautiful Soup works with Python 3.6 and up. It works better if lxml
 and/or html5lib is installed.
 For more than you ever wanted to know about Beautiful Soup, see the
-documentation:
+documentation: http://www.crummy.com/software/BeautifulSoup/bs4/doc/
-http://www.crummy.com/software/BeautifulSoup/bs4/doc/
 """
 __author__ = "Leonard Richardson (leonardr@segfault.org)"
-__version__ = "4.4.1"
+__version__ = "4.12.3"
-__copyright__ = "Copyright (c) 2004-2015 Leonard Richardson"
+__copyright__ = "Copyright (c) 2004-2024 Leonard Richardson"
+# Use of this source code is governed by the MIT license.
 __license__ = "MIT"
 __all__ = ['BeautifulSoup']
+from collections import Counter
 import os
 import re
+import sys
+import traceback
 import warnings
-from .builder import builder_registry, ParserRejectedMarkup
+# The very first thing we do is give a useful error if someone is
+# running this code under Python 2.
+if sys.version_info.major < 3:
+    raise ImportError('You are trying to use a Python 3-specific version of Beautiful Soup under Python 2. This will not work. The final version of Beautiful Soup to support Python 2 was 4.9.3.')
+from .builder import (
+    builder_registry,
+    ParserRejectedMarkup,
+    XMLParsedAsHTMLWarning,
+    HTMLParserTreeBuilder
+)
 from .dammit import UnicodeDammit
 from .element import (
    CData,
    Comment,
+    CSS,
    DEFAULT_OUTPUT_ENCODING,
    Declaration,
    Doctype,
    NavigableString,
    PageElement,
    ProcessingInstruction,
+    PYTHON_SPECIFIC_ENCODINGS,
    ResultSet,
+    Script,
+    Stylesheet,
    SoupStrainer,
    Tag,
+    TemplateString,
    )
-# The very first thing we do is give a useful error if someone is
+# Define some custom warnings.
-# running this code under Python 3 without converting it.
+class GuessedAtParserWarning(UserWarning):
-'You are trying to run the Python 2 version of Beautiful Soup under Python 3. This will not work.'!='You need to convert the code, either by installing it (`python setup.py install`) or by running 2to3 (`2to3 -w bs4`).'
+    """The warning issued when BeautifulSoup has to guess what parser to
+    use -- probably because no parser was specified in the constructor.
+    """
-class BeautifulSoup(Tag):
+class MarkupResemblesLocatorWarning(UserWarning):
+    """The warning issued when BeautifulSoup is given 'markup' that
+    actually looks like a resource locator -- a URL or a path to a file
+    on disk.
    """
-    This class defines the basic interface called by the tree builders.
-    These methods will be called by the parser:
+   
-      reset()
+class BeautifulSoup(Tag):
-      feed(markup)
+    """A data structure representing a parsed HTML or XML document.
+    Most of the methods you'll call on a BeautifulSoup object are inherited from
+    PageElement or Tag.
+    Internally, this class defines the basic interface called by the
+    tree builders when converting an HTML/XML document into a data
+    structure. The interface abstracts away the differences between
+    parsers. To write a new tree builder, you'll need to understand
+    these methods as a whole.
+    These methods will be called by the BeautifulSoup constructor:
+      * reset()
+      * feed(markup)
    The tree builder may call these methods from its feed() implementation:
-      handle_starttag(name, attrs) # See note about return value
+      * handle_starttag(name, attrs) # See note about return value
-      handle_endtag(name)
+      * handle_endtag(name)
-      handle_data(data) # Appends to the current data node
+      * handle_data(data) # Appends to the current data node
-      endData(containerClass=NavigableString) # Ends the current data node
+      * endData(containerClass) # Ends the current data node
    No matter how complicated the underlying parser is, you should be
    able to build a tree using 'start tag' events, 'end tag' events,
@@ -69,24 +103,77 @@ class BeautifulSoup(Tag):
    like HTML's <br> tag), call handle_starttag and then
    handle_endtag.
    """
+    # Since BeautifulSoup subclasses Tag, it's possible to treat it as
+    # a Tag with a .name. This name makes it clear the BeautifulSoup
+    # object isn't a real markup tag.
    ROOT_TAG_NAME = '[document]'
    # If the end-user gives no indication which tree builder they
    # want, look for one with these features.
    DEFAULT_BUILDER_FEATURES = ['html', 'fast']
+    # A string containing all ASCII whitespace characters, used in
+    # endData() to detect data chunks that seem 'empty'.
    ASCII_SPACES = '\x20\x0a\x09\x0c\x0d'
-    NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nTo get rid of this warning, change this:\n\n BeautifulSoup([your markup])\n\nto this:\n\n BeautifulSoup([your markup], \"%(parser)s\")\n"
+    NO_PARSER_SPECIFIED_WARNING = "No parser was explicitly specified, so I'm using the best available %(markup_type)s parser for this system (\"%(parser)s\"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.\n\nThe code that caused this warning is on line %(line_number)s of the file %(filename)s. To get rid of this warning, pass the additional argument 'features=\"%(parser)s\"' to the BeautifulSoup constructor.\n"
+   
    def __init__(self, markup="", features=None, builder=None,
                 parse_only=None, from_encoding=None, exclude_encodings=None,
-                 **kwargs):
+                 element_classes=None, **kwargs):
-        """The Soup object is initialized as the 'root tag', and the
+        """Constructor.
-        provided markup (which can be a string or a file-like object)
-        is fed into the underlying parser."""
+        :param markup: A string or a file-like object representing
+         markup to be parsed.
+        :param features: Desirable features of the parser to be
+         used. This may be the name of a specific parser ("lxml",
+         "lxml-xml", "html.parser", or "html5lib") or it may be the
+         type of markup to be used ("html", "html5", "xml"). It's
+         recommended that you name a specific parser, so that
+         Beautiful Soup gives you the same results across platforms
+         and virtual environments.
+        :param builder: A TreeBuilder subclass to instantiate (or
+         instance to use) instead of looking one up based on
+         `features`. You only need to use this if you've implemented a
+         custom TreeBuilder.
+        :param parse_only: A SoupStrainer. Only parts of the document
+         matching the SoupStrainer will be considered. This is useful
+         when parsing part of a document that would otherwise be too
+         large to fit into memory.
+        :param from_encoding: A string indicating the encoding of the
+         document to be parsed. Pass this in if Beautiful Soup is
+         guessing wrongly about the document's encoding.
+        :param exclude_encodings: A list of strings indicating
+         encodings known to be wrong. Pass this in if you don't know
+         the document's encoding but you know Beautiful Soup's guess is
+         wrong.
+        :param element_classes: A dictionary mapping BeautifulSoup
+         classes like Tag and NavigableString, to other classes you'd
+         like to be instantiated instead as the parse tree is
+         built. This is useful for subclassing Tag or NavigableString
+         to modify default behavior.
+        :param kwargs: For backwards compatibility purposes, the
+         constructor accepts certain keyword arguments used in
+         Beautiful Soup 3. None of these arguments do anything in
+         Beautiful Soup 4; they will result in a warning and then be
+         ignored.
+         
+         Apart from this, any keyword arguments passed into the
+         BeautifulSoup constructor are propagated to the TreeBuilder
+         constructor. This makes it possible to configure a
+         TreeBuilder by passing in arguments, not just by saying which
+         one to use.
+        """
        if 'convertEntities' in kwargs:
+            del kwargs['convertEntities']
            warnings.warn(
                "BS4 does not respect the convertEntities argument to the "
                "BeautifulSoup constructor. Entities are always converted "
@@ -125,10 +212,10 @@ class BeautifulSoup(Tag):
            if old_name in kwargs:
                warnings.warn(
                    'The "%s" argument to the BeautifulSoup constructor '
-                    'has been renamed to "%s."' % (old_name, new_name))
+                    'has been renamed to "%s."' % (old_name, new_name),
-                value = kwargs[old_name]
+                    DeprecationWarning, stacklevel=3
-                del kwargs[old_name]
+                )
-                return value
+                return kwargs.pop(old_name)
            return None
        parse_only = parse_only or deprecated_argument(
@@ -137,13 +224,23 @@ class BeautifulSoup(Tag):
        from_encoding = from_encoding or deprecated_argument(
            "fromEncoding", "from_encoding")
-        if len(kwargs) > 0:
+        if from_encoding and isinstance(markup, str):
-            arg = list(kwargs.keys()).pop()
+            warnings.warn("You provided Unicode markup but also provided a value for from_encoding. Your from_encoding will be ignored.")
-            raise TypeError(
+            from_encoding = None
-                "__init__() got an unexpected keyword argument '%s'" % arg)
+        self.element_classes = element_classes or dict()
-        if builder is None:
-            original_features = features
+        # We need this information to track whether or not the builder
+        # was specified well enough that we can omit the 'you need to
+        # specify a parser' warning.
+        original_builder = builder
+        original_features = features
+            
+        if isinstance(builder, type):
+            # A builder class was passed in; it needs to be instantiated.
+            builder_class = builder
+            builder = None
+        elif builder is None:
            if isinstance(features, str):
                features = [features]
            if features is None or len(features) == 0:
@@ -154,85 +251,227 @@ class BeautifulSoup(Tag):
                    "Couldn't find a tree builder with the features you "
                    "requested: %s. Do you need to install a parser library?"
                    % ",".join(features))
-            builder = builder_class()
-            if not (original_features == builder.NAME or
+        # At this point either we have a TreeBuilder instance in
-                    original_features in builder.ALTERNATE_NAMES):
+        # builder, or we have a builder_class that we can instantiate
+        # with the remaining **kwargs.
+        if builder is None:
+            builder = builder_class(**kwargs)
+            if not original_builder and not (
+                    original_features == builder.NAME or
+                    original_features in builder.ALTERNATE_NAMES
+            ) and markup:
+                # The user did not tell us which TreeBuilder to use,
+                # and we had to guess. Issue a warning.
                if builder.is_xml:
                    markup_type = "XML"
                else:
                    markup_type = "HTML"
-                warnings.warn(self.NO_PARSER_SPECIFIED_WARNING % dict(
-                    parser=builder.NAME,
-                    markup_type=markup_type))
+                # This code adapted from warnings.py so that we get the same line
+                # of code as our warnings.warn() call gets, even if the answer is wrong
+                # (as it may be in a multithreading situation).
+                caller = None
+                try:
+                    caller = sys._getframe(1)
+                except ValueError:
+                    pass
+                if caller:
+                    globals = caller.f_globals
+                    line_number = caller.f_lineno
+                else:
+                    globals = sys.__dict__
+                    line_number= 1                    
+                filename = globals.get('__file__')
+                if filename:
+                    fnl = filename.lower()
+                    if fnl.endswith((".pyc", ".pyo")):
+                        filename = filename[:-1]
+                if filename:
+                    # If there is no filename at all, the user is most likely in a REPL,
+                    # and the warning is not necessary.
+                    values = dict(
+                        filename=filename,
+                        line_number=line_number,
+                        parser=builder.NAME,
+                        markup_type=markup_type
+                    )
+                    warnings.warn(
+                        self.NO_PARSER_SPECIFIED_WARNING % values,
+                        GuessedAtParserWarning, stacklevel=2
+                    )
+        else:
+            if kwargs:
+                warnings.warn("Keyword arguments to the BeautifulSoup constructor will be ignored. These would normally be passed into the TreeBuilder constructor, but a TreeBuilder instance was passed in as `builder`.")
+                    
        self.builder = builder
        self.is_xml = builder.is_xml
-        self.builder.soup = self
+        self.known_xml = self.is_xml
+        self._namespaces = dict()
        self.parse_only = parse_only
        if hasattr(markup, 'read'):        # It's a file-type object.
            markup = markup.read()
-        elif len(markup) <= 256:
+        elif len(markup) <= 256 and (
-            # Print out warnings for a couple beginner problems
+                (isinstance(markup, bytes) and not b'<' in markup)
+                or (isinstance(markup, str) and not '<' in markup)
+        ):
+            # Issue warnings for a couple beginner problems
            # involving passing non-markup to Beautiful Soup.
            # Beautiful Soup will still parse the input as markup,
-            # just in case that's what the user really wants.
+            # since that is sometimes the intended behavior.
-            if (isinstance(markup, str)
+            if not self._markup_is_url(markup):
-                and not os.path.supports_unicode_filenames):
+                self._markup_resembles_filename(markup)                
-                possible_filename = markup.encode("utf8")
-            else:
-                possible_filename = markup
-            is_file = False
-            try:
-                is_file = os.path.exists(possible_filename)
-            except Exception as e:
-                # This is almost certainly a problem involving
-                # characters not valid in filenames on this
-                # system. Just let it go.
-                pass
-            if is_file:
-                if isinstance(markup, str):
-                    markup = markup.encode("utf8")
-                warnings.warn(
-                    '"%s" looks like a filename, not markup. You should probably open this file and pass the filehandle into Beautiful Soup.' % markup)
-            if markup[:5] == "http:" or markup[:6] == "https:":
-                # TODO: This is ugly but I couldn't get it to work in
-                # Python 3 otherwise.
-                if ((isinstance(markup, bytes) and not b' ' in markup)
-                    or (isinstance(markup, str) and not ' ' in markup)):
-                    if isinstance(markup, str):
-                        markup = markup.encode("utf8")
-                    warnings.warn(
-                        '"%s" looks like a URL. Beautiful Soup is not an HTTP client. You should probably use an HTTP client to get the document behind the URL, and feed that document to Beautiful Soup.' % markup)
+        rejections = []
+        success = False
        for (self.markup, self.original_encoding, self.declared_html_encoding,
         self.contains_replacement_characters) in (
             self.builder.prepare_markup(
                 markup, from_encoding, exclude_encodings=exclude_encodings)):
            self.reset()
+            self.builder.initialize_soup(self)
            try:
                self._feed()
+                success = True
                break
-            except ParserRejectedMarkup:
+            except ParserRejectedMarkup as e:
+                rejections.append(e)
                pass
+        if not success:
+            other_exceptions = [str(e) for e in rejections]
+            raise ParserRejectedMarkup(
+                "The markup you provided was rejected by the parser. Trying a different parser or a different encoding may help.\n\nOriginal exception(s) from parser:\n " + "\n ".join(other_exceptions)
+            )
        # Clear out the markup and remove the builder's circular
        # reference to this object.
        self.markup = None
        self.builder.soup = None
-    def __copy__(self):
+    def _clone(self):
-        return type(self)(self.encode(), builder=self.builder)
+        """Create a new BeautifulSoup object with the same TreeBuilder,
+        but not associated with any markup.
+        This is the first step of the deepcopy process.
+        """
+        clone = type(self)("", None, self.builder)
+        # Keep track of the encoding of the original document,
+        # since we won't be parsing it again.
+        clone.original_encoding = self.original_encoding
+        return clone
+        
    def __getstate__(self):
        # Frequently a tree builder can't be pickled.
        d = dict(self.__dict__)
-        if 'builder' in d and not self.builder.picklable:
+        if 'builder' in d and d['builder'] is not None and not self.builder.picklable:
-            del d['builder']
+            d['builder'] = type(self.builder)
+        # Store the contents as a Unicode string.
+        d['contents'] = []
+        d['markup'] = self.decode()
+        # If _most_recent_element is present, it's a Tag object left
+        # over from initial parse. It might not be picklable and we
+        # don't need it.
+        if '_most_recent_element' in d:
+            del d['_most_recent_element']
        return d
+    def __setstate__(self, state):
+        # If necessary, restore the TreeBuilder by looking it up.
+        self.__dict__ = state
+        if isinstance(self.builder, type):
+            self.builder = self.builder()
+        elif not self.builder:
+            # We don't know which builder was used to build this
+            # parse tree, so use a default we know is always available.
+            self.builder = HTMLParserTreeBuilder()
+        self.builder.soup = self
+        self.reset()
+        self._feed()
+        return state
+    
+    @classmethod
+    def _decode_markup(cls, markup):
+        """Ensure `markup` is bytes so it's safe to send into warnings.warn.
+        TODO: warnings.warn had this problem back in 2010 but it might not
+        anymore.
+        """
+        if isinstance(markup, bytes):
+            decoded = markup.decode('utf-8', 'replace')
+        else:
+            decoded = markup
+        return decoded
+    @classmethod
+    def _markup_is_url(cls, markup):
+        """Error-handling method to raise a warning if incoming markup looks
+        like a URL.
+        :param markup: A string.
+        :return: Whether or not the markup resembles a URL
+            closely enough to justify a warning.
+        """
+        if isinstance(markup, bytes):
+            space = b' '
+            cant_start_with = (b"http:", b"https:")
+        elif isinstance(markup, str):
+            space = ' '
+            cant_start_with = ("http:", "https:")
+        else:
+            return False
+        if any(markup.startswith(prefix) for prefix in cant_start_with):
+            if not space in markup:
+                warnings.warn(
+                    'The input looks more like a URL than markup. You may want to use'
+                    ' an HTTP client like requests to get the document behind'
+                    ' the URL, and feed that document to Beautiful Soup.',
+                    MarkupResemblesLocatorWarning,
+                    stacklevel=3
+                )
+                return True
+        return False
+    @classmethod
+    def _markup_resembles_filename(cls, markup):
+        """Error-handling method to raise a warning if incoming markup
+        resembles a filename.
+        :param markup: A bytestring or string.
+        :return: Whether or not the markup resembles a filename
+            closely enough to justify a warning.
+        """
+        path_characters = '/\\'
+        extensions = ['.html', '.htm', '.xml', '.xhtml', '.txt']
+        if isinstance(markup, bytes):
+            path_characters = path_characters.encode("utf8")
+            extensions = [x.encode('utf8') for x in extensions]
+        filelike = False
+        if any(x in markup for x in path_characters):
+            filelike = True
+        else:
+            lower = markup.lower()
+            if any(lower.endswith(ext) for ext in extensions):
+                filelike = True
+        if filelike:
+            warnings.warn(
+                'The input looks more like a filename than markup. You may'
+                ' want to open this file and pass the filehandle into'
+                ' Beautiful Soup.',
+                MarkupResemblesLocatorWarning, stacklevel=3
+            )
+            return True
+        return False
+    
    def _feed(self):
+        """Internal method that parses previously set markup, creating a large
+        number of Tag and NavigableString objects.
+        """
        # Convert the document to Unicode.
        self.builder.reset()
@@ -243,48 +482,111 @@ class BeautifulSoup(Tag):
            self.popTag()
    def reset(self):
+        """Reset this object to a state as though it had never parsed any
+        markup.
+        """
        Tag.__init__(self, self, self.builder, self.ROOT_TAG_NAME)
        self.hidden = 1
        self.builder.reset()
        self.current_data = []
        self.currentTag = None
        self.tagStack = []
+        self.open_tag_counter = Counter()
        self.preserve_whitespace_tag_stack = []
+        self.string_container_stack = []
+        self._most_recent_element = None
        self.pushTag(self)
-    def new_tag(self, name, namespace=None, nsprefix=None, **attrs):
+    def new_tag(self, name, namespace=None, nsprefix=None, attrs={},
-        """Create a new tag associated with this soup."""
+                sourceline=None, sourcepos=None, **kwattrs):
-        return Tag(None, self.builder, name, namespace, nsprefix, attrs)
+        """Create a new Tag associated with this BeautifulSoup object.
+        :param name: The name of the new Tag.
+        :param namespace: The URI of the new Tag's XML namespace, if any.
+        :param prefix: The prefix for the new Tag's XML namespace, if any.
+        :param attrs: A dictionary of this Tag's attribute values; can
+            be used instead of `kwattrs` for attributes like 'class'
+            that are reserved words in Python.
+        :param sourceline: The line number where this tag was
+            (purportedly) found in its source document.
+        :param sourcepos: The character position within `sourceline` where this
+            tag was (purportedly) found.
+        :param kwattrs: Keyword arguments for the new Tag's attribute values.
-    def new_string(self, s, subclass=NavigableString):
+        """
-        """Create a new NavigableString associated with this soup."""
+        kwattrs.update(attrs)
-        return subclass(s)
+        return self.element_classes.get(Tag, Tag)(
+            None, self.builder, name, namespace, nsprefix, kwattrs,
+            sourceline=sourceline, sourcepos=sourcepos
+        )
+    def string_container(self, base_class=None):
+        container = base_class or NavigableString
+        
+        # There may be a general override of NavigableString.
+        container = self.element_classes.get(
+            container, container
+        )
+        # On top of that, we may be inside a tag that needs a special
+        # container class.
+        if self.string_container_stack and container is NavigableString:
+            container = self.builder.string_containers.get(
+                self.string_container_stack[-1].name, container
+            )
+        return container
+        
+    def new_string(self, s, subclass=None):
+        """Create a new NavigableString associated with this BeautifulSoup
+        object.
+        """
+        container = self.string_container(subclass)
+        return container(s)
-    def insert_before(self, successor):
+    def insert_before(self, *args):
+        """This method is part of the PageElement API, but `BeautifulSoup` doesn't implement
+        it because there is nothing before or after it in the parse tree.
+        """
        raise NotImplementedError("BeautifulSoup objects don't support insert_before().")
-    def insert_after(self, successor):
+    def insert_after(self, *args):
+        """This method is part of the PageElement API, but `BeautifulSoup` doesn't implement
+        it because there is nothing before or after it in the parse tree.
+        """
        raise NotImplementedError("BeautifulSoup objects don't support insert_after().")
    def popTag(self):
+        """Internal method called by _popToTag when a tag is closed."""
        tag = self.tagStack.pop()
+        if tag.name in self.open_tag_counter:
+            self.open_tag_counter[tag.name] -= 1
        if self.preserve_whitespace_tag_stack and tag == self.preserve_whitespace_tag_stack[-1]:
            self.preserve_whitespace_tag_stack.pop()
-        #print "Pop", tag.name
+        if self.string_container_stack and tag == self.string_container_stack[-1]:
+            self.string_container_stack.pop()
+        #print("Pop", tag.name)
        if self.tagStack:
            self.currentTag = self.tagStack[-1]
        return self.currentTag
    def pushTag(self, tag):
-        #print "Push", tag.name
+        """Internal method called by handle_starttag when a tag is opened."""
-        if self.currentTag:
+        #print("Push", tag.name)
+        if self.currentTag is not None:
            self.currentTag.contents.append(tag)
        self.tagStack.append(tag)
        self.currentTag = self.tagStack[-1]
+        if tag.name != self.ROOT_TAG_NAME:
+            self.open_tag_counter[tag.name] += 1
        if tag.name in self.builder.preserve_whitespace_tags:
            self.preserve_whitespace_tag_stack.append(tag)
+        if tag.name in self.builder.string_containers:
+            self.string_container_stack.append(tag)
-    def endData(self, containerClass=NavigableString):
+    def endData(self, containerClass=None):
+        """Method called by the TreeBuilder when the end of a data segment
+        occurs.
+        """       
        if self.current_data:
            current_data = ''.join(self.current_data)
            # If whitespace is not preserved, and this string contains
@@ -311,61 +613,93 @@ class BeautifulSoup(Tag):
                    not self.parse_only.search(current_data)):
                return
+            containerClass = self.string_container(containerClass)
            o = containerClass(current_data)
            self.object_was_parsed(o)
    def object_was_parsed(self, o, parent=None, most_recent_element=None):
-        """Add an object to the parse tree."""
+        """Method called by the TreeBuilder to integrate an object into the parse tree."""
-        parent = parent or self.currentTag
+        if parent is None:
-        previous_element = most_recent_element or self._most_recent_element
+            parent = self.currentTag
+        if most_recent_element is not None:
+            previous_element = most_recent_element
+        else:
+            previous_element = self._most_recent_element
        next_element = previous_sibling = next_sibling = None
        if isinstance(o, Tag):
            next_element = o.next_element
            next_sibling = o.next_sibling
            previous_sibling = o.previous_sibling
-            if not previous_element:
+            if previous_element is None:
                previous_element = o.previous_element
+        fix = parent.next_element is not None
        o.setup(parent, previous_element, next_element, previous_sibling, next_sibling)
        self._most_recent_element = o
        parent.contents.append(o)
-        if parent.next_sibling:
+        # Check if we are inserting into an already parsed node.
-            # This node is being inserted into an element that has
+        if fix:
-            # already been parsed. Deal with any dangling references.
+            self._linkage_fixer(parent)
-            index = parent.contents.index(o)
-            if index == 0:
+    def _linkage_fixer(self, el):
-                previous_element = parent
+        """Make sure linkage of this fragment is sound."""
-                previous_sibling = None
-            else:
+        first = el.contents[0]
-                previous_element = previous_sibling = parent.contents[index-1]
+        child = el.contents[-1]
-            if index == len(parent.contents)-1:
+        descendant = child
-                next_element = parent.next_sibling
-                next_sibling = None
+        if child is first and el.parent is not None:
-            else:
+            # Parent should be linked to first child
-                next_element = next_sibling = parent.contents[index+1]
+            el.next_element = child
+            # We are no longer linked to whatever this element is
-            o.previous_element = previous_element
+            prev_el = child.previous_element
-            if previous_element:
+            if prev_el is not None and prev_el is not el:
-                previous_element.next_element = o
+                prev_el.next_element = None
-            o.next_element = next_element
+            # First child should be linked to the parent, and no previous siblings.
-            if next_element:
+            child.previous_element = el
-                next_element.previous_element = o
+            child.previous_sibling = None
-            o.next_sibling = next_sibling
-            if next_sibling:
+        # We have no sibling as we've been appended as the last.
-                next_sibling.previous_sibling = o
+        child.next_sibling = None
-            o.previous_sibling = previous_sibling
-            if previous_sibling:
+        # This index is a tag, dig deeper for a "last descendant"
-                previous_sibling.next_sibling = o
+        if isinstance(child, Tag) and child.contents:
+            descendant = child._last_descendant(False)
+        # As the final step, link last descendant. It should be linked
+        # to the parent's next sibling (if found), else walk up the chain
+        # and find a parent with a sibling. It should have no next sibling.
+        descendant.next_element = None
+        descendant.next_sibling = None
+        target = el
+        while True:
+            if target is None:
+                break
+            elif target.next_sibling is not None:
+                descendant.next_element = target.next_sibling
+                target.next_sibling.previous_element = child
+                break
+            target = target.parent
    def _popToTag(self, name, nsprefix=None, inclusivePop=True):
        """Pops the tag stack up to and including the most recent
-        instance of the given tag. If inclusivePop is false, pops the tag
+        instance of the given tag.
-        stack up to but *not* including the most recent instqance of
-        the given tag."""
+        If there are no open tags with the given name, nothing will be
-        #print "Popping to %s" % name
+        popped.
+        :param name: Pop up to the most recent tag with this name.
+        :param nsprefix: The namespace prefix that goes with `name`.
+        :param inclusivePop: It this is false, pops the tag stack up
+          to but *not* including the most recent instqance of the
+          given tag.
+        """
+        #print("Popping to %s" % name)
        if name == self.ROOT_TAG_NAME:
            # The BeautifulSoup object itself can never be popped.
            return
@@ -374,6 +708,8 @@ class BeautifulSoup(Tag):
        stack_size = len(self.tagStack)
        for i in range(stack_size - 1, 0, -1):
+            if not self.open_tag_counter.get(name):
+                break
            t = self.tagStack[i]
            if (name == t.name and nsprefix == t.prefix):
                if inclusivePop:
@@ -383,16 +719,26 @@ class BeautifulSoup(Tag):
        return most_recently_popped
-    def handle_starttag(self, name, namespace, nsprefix, attrs):
+    def handle_starttag(self, name, namespace, nsprefix, attrs, sourceline=None,
-        """Push a start tag on to the stack.
+                        sourcepos=None, namespaces=None):
+        """Called by the tree builder when a new tag is encountered.
-        If this method returns None, the tag was rejected by the
-        SoupStrainer. You should proceed as if the tag had not occured
+        :param name: Name of the tag.
+        :param nsprefix: Namespace prefix for the tag.
+        :param attrs: A dictionary of attribute values.
+        :param sourceline: The line number where this tag was found in its
+            source document.
+        :param sourcepos: The character position within `sourceline` where this
+            tag was found.
+        :param namespaces: A dictionary of all namespace prefix mappings 
+            currently in scope in the document.
+        If this method returns None, the tag was rejected by an active
+        SoupStrainer. You should proceed as if the tag had not occurred
        in the document. For instance, if this was a self-closing tag,
        don't call handle_endtag.
        """
+        # print("Start tag %s: %s" % (name, attrs))
-        # print "Start tag %s: %s" % (name, attrs)
        self.endData()
        if (self.parse_only and len(self.tagStack) <= 1
@@ -400,34 +746,54 @@ class BeautifulSoup(Tag):
                 or not self.parse_only.search_tag(name, attrs))):
            return None
-        tag = Tag(self, self.builder, name, namespace, nsprefix, attrs,
+        tag = self.element_classes.get(Tag, Tag)(
-                  self.currentTag, self._most_recent_element)
+            self, self.builder, name, namespace, nsprefix, attrs,
+            self.currentTag, self._most_recent_element,
+            sourceline=sourceline, sourcepos=sourcepos,
+            namespaces=namespaces
+        )
        if tag is None:
            return tag
-        if self._most_recent_element:
+        if self._most_recent_element is not None:
            self._most_recent_element.next_element = tag
        self._most_recent_element = tag
        self.pushTag(tag)
        return tag
    def handle_endtag(self, name, nsprefix=None):
-        #print "End tag: " + name
+        """Called by the tree builder when an ending tag is encountered.
+        :param name: Name of the tag.
+        :param nsprefix: Namespace prefix for the tag.
+        """
+        #print("End tag: " + name)
        self.endData()
        self._popToTag(name, nsprefix)
+        
    def handle_data(self, data):
+        """Called by the tree builder when a chunk of textual data is encountered."""
        self.current_data.append(data)
+       
    def decode(self, pretty_print=False,
               eventual_encoding=DEFAULT_OUTPUT_ENCODING,
-               formatter="minimal"):
+               formatter="minimal", iterator=None):
-        """Returns a string or Unicode representation of this document.
+        """Returns a string or Unicode representation of the parse tree
-        To get Unicode, pass None for encoding."""
+            as an HTML or XML document.
+        :param pretty_print: If this is True, indentation will be used to
+            make the document more readable.
+        :param eventual_encoding: The encoding of the final document.
+            If this is None, the document will be a Unicode string.
+        """
        if self.is_xml:
            # Print the XML declaration
            encoding_part = ''
-            if eventual_encoding is not None:
+            if eventual_encoding in PYTHON_SPECIFIC_ENCODINGS:
+                # This is a special Python encoding; it can't actually
+                # go into an XML document because it means nothing
+                # outside of Python.
+                eventual_encoding = None
+            if eventual_encoding != None:
                encoding_part = ' encoding="%s"' % eventual_encoding
            prefix = '<?xml version="1.0"%s?>\n' % encoding_part
        else:
@@ -437,9 +803,9 @@ class BeautifulSoup(Tag):
        else:
            indent_level = 0
        return prefix + super(BeautifulSoup, self).decode(
-            indent_level, eventual_encoding, formatter)
+            indent_level, eventual_encoding, formatter, iterator)
-# Alias to make it easier to type import: 'from bs4 import _soup'
+# Aliases to make it easier to get started quickly, e.g. 'from bs4 import _soup'
 _s = BeautifulSoup
 _soup = BeautifulSoup
@@ -450,19 +816,25 @@ class BeautifulStoneSoup(BeautifulSoup):
        kwargs['features'] = 'xml'
        warnings.warn(
            'The BeautifulStoneSoup class is deprecated. Instead of using '
-            'it, pass features="xml" into the BeautifulSoup constructor.')
+            'it, pass features="xml" into the BeautifulSoup constructor.',
+            DeprecationWarning, stacklevel=2
+        )
        super(BeautifulStoneSoup, self).__init__(*args, **kwargs)
 class StopParsing(Exception):
+    """Exception raised by a TreeBuilder if it's unable to continue parsing."""
    pass
 class FeatureNotFound(ValueError):
+    """Exception raised by the BeautifulSoup constructor if no parser with the
+    requested features is found.
+    """
    pass
-#By default, act as an HTML pretty-printer.
+#If this file is run as a script, act as an HTML pretty-printer.
 if __name__ == '__main__':
    import sys
    soup = BeautifulSoup(sys.stdin)
-    print(soup.prettify())
+    print((soup.prettify()))
author	Richard Purdie <richard.purdie@linuxfoundation.org>	2024-05-31 12:04:03 +0100
committer	Richard Purdie <richard.purdie@linuxfoundation.org>	2024-05-31 12:43:18 +0100
commit	12fa81e8d67f0d9755decde5c5b766f56b2af8db (patch)
tree	de58af9a17e4760de36091d525d7eba8bc6f1578 /bitbake/lib/bs4/__init__.py
parent	99ff46cc9bb12619af55c892452cee3b90a545f0 (diff)
download	poky-12fa81e8d67f0d9755decde5c5b766f56b2af8db.tar.gz