diff options
| author | Richard Purdie <richard.purdie@linuxfoundation.org> | 2024-05-31 12:04:03 +0100 |
|---|---|---|
| committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2024-05-31 12:43:18 +0100 |
| commit | 12fa81e8d67f0d9755decde5c5b766f56b2af8db (patch) | |
| tree | de58af9a17e4760de36091d525d7eba8bc6f1578 /bitbake/lib/bs4/formatter.py | |
| parent | 99ff46cc9bb12619af55c892452cee3b90a545f0 (diff) | |
| download | poky-12fa81e8d67f0d9755decde5c5b766f56b2af8db.tar.gz | |
bs4: Update to 4.12.3 from 4.4.1
It makes sense to switch to a more recent version and keep up to date
with upstream changes and things like new python version support.
(Bitbake rev: f5462156036e71911c66d07dbf3303cde862785b)
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'bitbake/lib/bs4/formatter.py')
| -rw-r--r-- | bitbake/lib/bs4/formatter.py | 185 |
1 files changed, 185 insertions, 0 deletions
diff --git a/bitbake/lib/bs4/formatter.py b/bitbake/lib/bs4/formatter.py new file mode 100644 index 0000000000..9fa1b57cb6 --- /dev/null +++ b/bitbake/lib/bs4/formatter.py | |||
| @@ -0,0 +1,185 @@ | |||
| 1 | from bs4.dammit import EntitySubstitution | ||
| 2 | |||
| 3 | class Formatter(EntitySubstitution): | ||
| 4 | """Describes a strategy to use when outputting a parse tree to a string. | ||
| 5 | |||
| 6 | Some parts of this strategy come from the distinction between | ||
| 7 | HTML4, HTML5, and XML. Others are configurable by the user. | ||
| 8 | |||
| 9 | Formatters are passed in as the `formatter` argument to methods | ||
| 10 | like `PageElement.encode`. Most people won't need to think about | ||
| 11 | formatters, and most people who need to think about them can pass | ||
| 12 | in one of these predefined strings as `formatter` rather than | ||
| 13 | making a new Formatter object: | ||
| 14 | |||
| 15 | For HTML documents: | ||
| 16 | * 'html' - HTML entity substitution for generic HTML documents. (default) | ||
| 17 | * 'html5' - HTML entity substitution for HTML5 documents, as | ||
| 18 | well as some optimizations in the way tags are rendered. | ||
| 19 | * 'minimal' - Only make the substitutions necessary to guarantee | ||
| 20 | valid HTML. | ||
| 21 | * None - Do not perform any substitution. This will be faster | ||
| 22 | but may result in invalid markup. | ||
| 23 | |||
| 24 | For XML documents: | ||
| 25 | * 'html' - Entity substitution for XHTML documents. | ||
| 26 | * 'minimal' - Only make the substitutions necessary to guarantee | ||
| 27 | valid XML. (default) | ||
| 28 | * None - Do not perform any substitution. This will be faster | ||
| 29 | but may result in invalid markup. | ||
| 30 | """ | ||
| 31 | # Registries of XML and HTML formatters. | ||
| 32 | XML_FORMATTERS = {} | ||
| 33 | HTML_FORMATTERS = {} | ||
| 34 | |||
| 35 | HTML = 'html' | ||
| 36 | XML = 'xml' | ||
| 37 | |||
| 38 | HTML_DEFAULTS = dict( | ||
| 39 | cdata_containing_tags=set(["script", "style"]), | ||
| 40 | ) | ||
| 41 | |||
| 42 | def _default(self, language, value, kwarg): | ||
| 43 | if value is not None: | ||
| 44 | return value | ||
| 45 | if language == self.XML: | ||
| 46 | return set() | ||
| 47 | return self.HTML_DEFAULTS[kwarg] | ||
| 48 | |||
| 49 | def __init__( | ||
| 50 | self, language=None, entity_substitution=None, | ||
| 51 | void_element_close_prefix='/', cdata_containing_tags=None, | ||
| 52 | empty_attributes_are_booleans=False, indent=1, | ||
| 53 | ): | ||
| 54 | r"""Constructor. | ||
| 55 | |||
| 56 | :param language: This should be Formatter.XML if you are formatting | ||
| 57 | XML markup and Formatter.HTML if you are formatting HTML markup. | ||
| 58 | |||
| 59 | :param entity_substitution: A function to call to replace special | ||
| 60 | characters with XML/HTML entities. For examples, see | ||
| 61 | bs4.dammit.EntitySubstitution.substitute_html and substitute_xml. | ||
| 62 | :param void_element_close_prefix: By default, void elements | ||
| 63 | are represented as <tag/> (XML rules) rather than <tag> | ||
| 64 | (HTML rules). To get <tag>, pass in the empty string. | ||
| 65 | :param cdata_containing_tags: The list of tags that are defined | ||
| 66 | as containing CDATA in this dialect. For example, in HTML, | ||
| 67 | <script> and <style> tags are defined as containing CDATA, | ||
| 68 | and their contents should not be formatted. | ||
| 69 | :param blank_attributes_are_booleans: Render attributes whose value | ||
| 70 | is the empty string as HTML-style boolean attributes. | ||
| 71 | (Attributes whose value is None are always rendered this way.) | ||
| 72 | |||
| 73 | :param indent: If indent is a non-negative integer or string, | ||
| 74 | then the contents of elements will be indented | ||
| 75 | appropriately when pretty-printing. An indent level of 0, | ||
| 76 | negative, or "" will only insert newlines. Using a | ||
| 77 | positive integer indent indents that many spaces per | ||
| 78 | level. If indent is a string (such as "\t"), that string | ||
| 79 | is used to indent each level. The default behavior is to | ||
| 80 | indent one space per level. | ||
| 81 | """ | ||
| 82 | self.language = language | ||
| 83 | self.entity_substitution = entity_substitution | ||
| 84 | self.void_element_close_prefix = void_element_close_prefix | ||
| 85 | self.cdata_containing_tags = self._default( | ||
| 86 | language, cdata_containing_tags, 'cdata_containing_tags' | ||
| 87 | ) | ||
| 88 | self.empty_attributes_are_booleans=empty_attributes_are_booleans | ||
| 89 | if indent is None: | ||
| 90 | indent = 0 | ||
| 91 | if isinstance(indent, int): | ||
| 92 | if indent < 0: | ||
| 93 | indent = 0 | ||
| 94 | indent = ' ' * indent | ||
| 95 | elif isinstance(indent, str): | ||
| 96 | indent = indent | ||
| 97 | else: | ||
| 98 | indent = ' ' | ||
| 99 | self.indent = indent | ||
| 100 | |||
| 101 | def substitute(self, ns): | ||
| 102 | """Process a string that needs to undergo entity substitution. | ||
| 103 | This may be a string encountered in an attribute value or as | ||
| 104 | text. | ||
| 105 | |||
| 106 | :param ns: A string. | ||
| 107 | :return: A string with certain characters replaced by named | ||
| 108 | or numeric entities. | ||
| 109 | """ | ||
| 110 | if not self.entity_substitution: | ||
| 111 | return ns | ||
| 112 | from .element import NavigableString | ||
| 113 | if (isinstance(ns, NavigableString) | ||
| 114 | and ns.parent is not None | ||
| 115 | and ns.parent.name in self.cdata_containing_tags): | ||
| 116 | # Do nothing. | ||
| 117 | return ns | ||
| 118 | # Substitute. | ||
| 119 | return self.entity_substitution(ns) | ||
| 120 | |||
| 121 | def attribute_value(self, value): | ||
| 122 | """Process the value of an attribute. | ||
| 123 | |||
| 124 | :param ns: A string. | ||
| 125 | :return: A string with certain characters replaced by named | ||
| 126 | or numeric entities. | ||
| 127 | """ | ||
| 128 | return self.substitute(value) | ||
| 129 | |||
| 130 | def attributes(self, tag): | ||
| 131 | """Reorder a tag's attributes however you want. | ||
| 132 | |||
| 133 | By default, attributes are sorted alphabetically. This makes | ||
| 134 | behavior consistent between Python 2 and Python 3, and preserves | ||
| 135 | backwards compatibility with older versions of Beautiful Soup. | ||
| 136 | |||
| 137 | If `empty_boolean_attributes` is True, then attributes whose | ||
| 138 | values are set to the empty string will be treated as boolean | ||
| 139 | attributes. | ||
| 140 | """ | ||
| 141 | if tag.attrs is None: | ||
| 142 | return [] | ||
| 143 | return sorted( | ||
| 144 | (k, (None if self.empty_attributes_are_booleans and v == '' else v)) | ||
| 145 | for k, v in list(tag.attrs.items()) | ||
| 146 | ) | ||
| 147 | |||
| 148 | class HTMLFormatter(Formatter): | ||
| 149 | """A generic Formatter for HTML.""" | ||
| 150 | REGISTRY = {} | ||
| 151 | def __init__(self, *args, **kwargs): | ||
| 152 | super(HTMLFormatter, self).__init__(self.HTML, *args, **kwargs) | ||
| 153 | |||
| 154 | |||
| 155 | class XMLFormatter(Formatter): | ||
| 156 | """A generic Formatter for XML.""" | ||
| 157 | REGISTRY = {} | ||
| 158 | def __init__(self, *args, **kwargs): | ||
| 159 | super(XMLFormatter, self).__init__(self.XML, *args, **kwargs) | ||
| 160 | |||
| 161 | |||
| 162 | # Set up aliases for the default formatters. | ||
| 163 | HTMLFormatter.REGISTRY['html'] = HTMLFormatter( | ||
| 164 | entity_substitution=EntitySubstitution.substitute_html | ||
| 165 | ) | ||
| 166 | HTMLFormatter.REGISTRY["html5"] = HTMLFormatter( | ||
| 167 | entity_substitution=EntitySubstitution.substitute_html, | ||
| 168 | void_element_close_prefix=None, | ||
| 169 | empty_attributes_are_booleans=True, | ||
| 170 | ) | ||
| 171 | HTMLFormatter.REGISTRY["minimal"] = HTMLFormatter( | ||
| 172 | entity_substitution=EntitySubstitution.substitute_xml | ||
| 173 | ) | ||
| 174 | HTMLFormatter.REGISTRY[None] = HTMLFormatter( | ||
| 175 | entity_substitution=None | ||
| 176 | ) | ||
| 177 | XMLFormatter.REGISTRY["html"] = XMLFormatter( | ||
| 178 | entity_substitution=EntitySubstitution.substitute_html | ||
| 179 | ) | ||
| 180 | XMLFormatter.REGISTRY["minimal"] = XMLFormatter( | ||
| 181 | entity_substitution=EntitySubstitution.substitute_xml | ||
| 182 | ) | ||
| 183 | XMLFormatter.REGISTRY[None] = Formatter( | ||
| 184 | Formatter(Formatter.XML, entity_substitution=None) | ||
| 185 | ) | ||
