diff options
Diffstat (limited to 'bitbake/lib/bs4/formatter.py')
-rw-r--r-- | bitbake/lib/bs4/formatter.py | 185 |
1 files changed, 185 insertions, 0 deletions
diff --git a/bitbake/lib/bs4/formatter.py b/bitbake/lib/bs4/formatter.py new file mode 100644 index 0000000000..9fa1b57cb6 --- /dev/null +++ b/bitbake/lib/bs4/formatter.py | |||
@@ -0,0 +1,185 @@ | |||
1 | from bs4.dammit import EntitySubstitution | ||
2 | |||
3 | class Formatter(EntitySubstitution): | ||
4 | """Describes a strategy to use when outputting a parse tree to a string. | ||
5 | |||
6 | Some parts of this strategy come from the distinction between | ||
7 | HTML4, HTML5, and XML. Others are configurable by the user. | ||
8 | |||
9 | Formatters are passed in as the `formatter` argument to methods | ||
10 | like `PageElement.encode`. Most people won't need to think about | ||
11 | formatters, and most people who need to think about them can pass | ||
12 | in one of these predefined strings as `formatter` rather than | ||
13 | making a new Formatter object: | ||
14 | |||
15 | For HTML documents: | ||
16 | * 'html' - HTML entity substitution for generic HTML documents. (default) | ||
17 | * 'html5' - HTML entity substitution for HTML5 documents, as | ||
18 | well as some optimizations in the way tags are rendered. | ||
19 | * 'minimal' - Only make the substitutions necessary to guarantee | ||
20 | valid HTML. | ||
21 | * None - Do not perform any substitution. This will be faster | ||
22 | but may result in invalid markup. | ||
23 | |||
24 | For XML documents: | ||
25 | * 'html' - Entity substitution for XHTML documents. | ||
26 | * 'minimal' - Only make the substitutions necessary to guarantee | ||
27 | valid XML. (default) | ||
28 | * None - Do not perform any substitution. This will be faster | ||
29 | but may result in invalid markup. | ||
30 | """ | ||
31 | # Registries of XML and HTML formatters. | ||
32 | XML_FORMATTERS = {} | ||
33 | HTML_FORMATTERS = {} | ||
34 | |||
35 | HTML = 'html' | ||
36 | XML = 'xml' | ||
37 | |||
38 | HTML_DEFAULTS = dict( | ||
39 | cdata_containing_tags=set(["script", "style"]), | ||
40 | ) | ||
41 | |||
42 | def _default(self, language, value, kwarg): | ||
43 | if value is not None: | ||
44 | return value | ||
45 | if language == self.XML: | ||
46 | return set() | ||
47 | return self.HTML_DEFAULTS[kwarg] | ||
48 | |||
49 | def __init__( | ||
50 | self, language=None, entity_substitution=None, | ||
51 | void_element_close_prefix='/', cdata_containing_tags=None, | ||
52 | empty_attributes_are_booleans=False, indent=1, | ||
53 | ): | ||
54 | r"""Constructor. | ||
55 | |||
56 | :param language: This should be Formatter.XML if you are formatting | ||
57 | XML markup and Formatter.HTML if you are formatting HTML markup. | ||
58 | |||
59 | :param entity_substitution: A function to call to replace special | ||
60 | characters with XML/HTML entities. For examples, see | ||
61 | bs4.dammit.EntitySubstitution.substitute_html and substitute_xml. | ||
62 | :param void_element_close_prefix: By default, void elements | ||
63 | are represented as <tag/> (XML rules) rather than <tag> | ||
64 | (HTML rules). To get <tag>, pass in the empty string. | ||
65 | :param cdata_containing_tags: The list of tags that are defined | ||
66 | as containing CDATA in this dialect. For example, in HTML, | ||
67 | <script> and <style> tags are defined as containing CDATA, | ||
68 | and their contents should not be formatted. | ||
69 | :param blank_attributes_are_booleans: Render attributes whose value | ||
70 | is the empty string as HTML-style boolean attributes. | ||
71 | (Attributes whose value is None are always rendered this way.) | ||
72 | |||
73 | :param indent: If indent is a non-negative integer or string, | ||
74 | then the contents of elements will be indented | ||
75 | appropriately when pretty-printing. An indent level of 0, | ||
76 | negative, or "" will only insert newlines. Using a | ||
77 | positive integer indent indents that many spaces per | ||
78 | level. If indent is a string (such as "\t"), that string | ||
79 | is used to indent each level. The default behavior is to | ||
80 | indent one space per level. | ||
81 | """ | ||
82 | self.language = language | ||
83 | self.entity_substitution = entity_substitution | ||
84 | self.void_element_close_prefix = void_element_close_prefix | ||
85 | self.cdata_containing_tags = self._default( | ||
86 | language, cdata_containing_tags, 'cdata_containing_tags' | ||
87 | ) | ||
88 | self.empty_attributes_are_booleans=empty_attributes_are_booleans | ||
89 | if indent is None: | ||
90 | indent = 0 | ||
91 | if isinstance(indent, int): | ||
92 | if indent < 0: | ||
93 | indent = 0 | ||
94 | indent = ' ' * indent | ||
95 | elif isinstance(indent, str): | ||
96 | indent = indent | ||
97 | else: | ||
98 | indent = ' ' | ||
99 | self.indent = indent | ||
100 | |||
101 | def substitute(self, ns): | ||
102 | """Process a string that needs to undergo entity substitution. | ||
103 | This may be a string encountered in an attribute value or as | ||
104 | text. | ||
105 | |||
106 | :param ns: A string. | ||
107 | :return: A string with certain characters replaced by named | ||
108 | or numeric entities. | ||
109 | """ | ||
110 | if not self.entity_substitution: | ||
111 | return ns | ||
112 | from .element import NavigableString | ||
113 | if (isinstance(ns, NavigableString) | ||
114 | and ns.parent is not None | ||
115 | and ns.parent.name in self.cdata_containing_tags): | ||
116 | # Do nothing. | ||
117 | return ns | ||
118 | # Substitute. | ||
119 | return self.entity_substitution(ns) | ||
120 | |||
121 | def attribute_value(self, value): | ||
122 | """Process the value of an attribute. | ||
123 | |||
124 | :param ns: A string. | ||
125 | :return: A string with certain characters replaced by named | ||
126 | or numeric entities. | ||
127 | """ | ||
128 | return self.substitute(value) | ||
129 | |||
130 | def attributes(self, tag): | ||
131 | """Reorder a tag's attributes however you want. | ||
132 | |||
133 | By default, attributes are sorted alphabetically. This makes | ||
134 | behavior consistent between Python 2 and Python 3, and preserves | ||
135 | backwards compatibility with older versions of Beautiful Soup. | ||
136 | |||
137 | If `empty_boolean_attributes` is True, then attributes whose | ||
138 | values are set to the empty string will be treated as boolean | ||
139 | attributes. | ||
140 | """ | ||
141 | if tag.attrs is None: | ||
142 | return [] | ||
143 | return sorted( | ||
144 | (k, (None if self.empty_attributes_are_booleans and v == '' else v)) | ||
145 | for k, v in list(tag.attrs.items()) | ||
146 | ) | ||
147 | |||
148 | class HTMLFormatter(Formatter): | ||
149 | """A generic Formatter for HTML.""" | ||
150 | REGISTRY = {} | ||
151 | def __init__(self, *args, **kwargs): | ||
152 | super(HTMLFormatter, self).__init__(self.HTML, *args, **kwargs) | ||
153 | |||
154 | |||
155 | class XMLFormatter(Formatter): | ||
156 | """A generic Formatter for XML.""" | ||
157 | REGISTRY = {} | ||
158 | def __init__(self, *args, **kwargs): | ||
159 | super(XMLFormatter, self).__init__(self.XML, *args, **kwargs) | ||
160 | |||
161 | |||
162 | # Set up aliases for the default formatters. | ||
163 | HTMLFormatter.REGISTRY['html'] = HTMLFormatter( | ||
164 | entity_substitution=EntitySubstitution.substitute_html | ||
165 | ) | ||
166 | HTMLFormatter.REGISTRY["html5"] = HTMLFormatter( | ||
167 | entity_substitution=EntitySubstitution.substitute_html, | ||
168 | void_element_close_prefix=None, | ||
169 | empty_attributes_are_booleans=True, | ||
170 | ) | ||
171 | HTMLFormatter.REGISTRY["minimal"] = HTMLFormatter( | ||
172 | entity_substitution=EntitySubstitution.substitute_xml | ||
173 | ) | ||
174 | HTMLFormatter.REGISTRY[None] = HTMLFormatter( | ||
175 | entity_substitution=None | ||
176 | ) | ||
177 | XMLFormatter.REGISTRY["html"] = XMLFormatter( | ||
178 | entity_substitution=EntitySubstitution.substitute_html | ||
179 | ) | ||
180 | XMLFormatter.REGISTRY["minimal"] = XMLFormatter( | ||
181 | entity_substitution=EntitySubstitution.substitute_xml | ||
182 | ) | ||
183 | XMLFormatter.REGISTRY[None] = Formatter( | ||
184 | Formatter(Formatter.XML, entity_substitution=None) | ||
185 | ) | ||