From 12fa81e8d67f0d9755decde5c5b766f56b2af8db Mon Sep 17 00:00:00 2001 From: Richard Purdie Date: Fri, 31 May 2024 12:04:03 +0100 Subject: bs4: Update to 4.12.3 from 4.4.1 It makes sense to switch to a more recent version and keep up to date with upstream changes and things like new python version support. (Bitbake rev: f5462156036e71911c66d07dbf3303cde862785b) Signed-off-by: Richard Purdie --- bitbake/lib/bs4/formatter.py | 185 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 185 insertions(+) create mode 100644 bitbake/lib/bs4/formatter.py (limited to 'bitbake/lib/bs4/formatter.py') diff --git a/bitbake/lib/bs4/formatter.py b/bitbake/lib/bs4/formatter.py new file mode 100644 index 0000000000..9fa1b57cb6 --- /dev/null +++ b/bitbake/lib/bs4/formatter.py @@ -0,0 +1,185 @@ +from bs4.dammit import EntitySubstitution + +class Formatter(EntitySubstitution): + """Describes a strategy to use when outputting a parse tree to a string. + + Some parts of this strategy come from the distinction between + HTML4, HTML5, and XML. Others are configurable by the user. + + Formatters are passed in as the `formatter` argument to methods + like `PageElement.encode`. Most people won't need to think about + formatters, and most people who need to think about them can pass + in one of these predefined strings as `formatter` rather than + making a new Formatter object: + + For HTML documents: + * 'html' - HTML entity substitution for generic HTML documents. (default) + * 'html5' - HTML entity substitution for HTML5 documents, as + well as some optimizations in the way tags are rendered. + * 'minimal' - Only make the substitutions necessary to guarantee + valid HTML. + * None - Do not perform any substitution. This will be faster + but may result in invalid markup. + + For XML documents: + * 'html' - Entity substitution for XHTML documents. + * 'minimal' - Only make the substitutions necessary to guarantee + valid XML. (default) + * None - Do not perform any substitution. This will be faster + but may result in invalid markup. + """ + # Registries of XML and HTML formatters. + XML_FORMATTERS = {} + HTML_FORMATTERS = {} + + HTML = 'html' + XML = 'xml' + + HTML_DEFAULTS = dict( + cdata_containing_tags=set(["script", "style"]), + ) + + def _default(self, language, value, kwarg): + if value is not None: + return value + if language == self.XML: + return set() + return self.HTML_DEFAULTS[kwarg] + + def __init__( + self, language=None, entity_substitution=None, + void_element_close_prefix='/', cdata_containing_tags=None, + empty_attributes_are_booleans=False, indent=1, + ): + r"""Constructor. + + :param language: This should be Formatter.XML if you are formatting + XML markup and Formatter.HTML if you are formatting HTML markup. + + :param entity_substitution: A function to call to replace special + characters with XML/HTML entities. For examples, see + bs4.dammit.EntitySubstitution.substitute_html and substitute_xml. + :param void_element_close_prefix: By default, void elements + are represented as (XML rules) rather than + (HTML rules). To get , pass in the empty string. + :param cdata_containing_tags: The list of tags that are defined + as containing CDATA in this dialect. For example, in HTML, +