summaryrefslogtreecommitdiffstats
path: root/bitbake/lib/bs4/formatter.py
diff options
context:
space:
mode:
Diffstat (limited to 'bitbake/lib/bs4/formatter.py')
-rw-r--r--bitbake/lib/bs4/formatter.py185
1 files changed, 185 insertions, 0 deletions
diff --git a/bitbake/lib/bs4/formatter.py b/bitbake/lib/bs4/formatter.py
new file mode 100644
index 0000000000..9fa1b57cb6
--- /dev/null
+++ b/bitbake/lib/bs4/formatter.py
@@ -0,0 +1,185 @@
1from bs4.dammit import EntitySubstitution
2
3class Formatter(EntitySubstitution):
4 """Describes a strategy to use when outputting a parse tree to a string.
5
6 Some parts of this strategy come from the distinction between
7 HTML4, HTML5, and XML. Others are configurable by the user.
8
9 Formatters are passed in as the `formatter` argument to methods
10 like `PageElement.encode`. Most people won't need to think about
11 formatters, and most people who need to think about them can pass
12 in one of these predefined strings as `formatter` rather than
13 making a new Formatter object:
14
15 For HTML documents:
16 * 'html' - HTML entity substitution for generic HTML documents. (default)
17 * 'html5' - HTML entity substitution for HTML5 documents, as
18 well as some optimizations in the way tags are rendered.
19 * 'minimal' - Only make the substitutions necessary to guarantee
20 valid HTML.
21 * None - Do not perform any substitution. This will be faster
22 but may result in invalid markup.
23
24 For XML documents:
25 * 'html' - Entity substitution for XHTML documents.
26 * 'minimal' - Only make the substitutions necessary to guarantee
27 valid XML. (default)
28 * None - Do not perform any substitution. This will be faster
29 but may result in invalid markup.
30 """
31 # Registries of XML and HTML formatters.
32 XML_FORMATTERS = {}
33 HTML_FORMATTERS = {}
34
35 HTML = 'html'
36 XML = 'xml'
37
38 HTML_DEFAULTS = dict(
39 cdata_containing_tags=set(["script", "style"]),
40 )
41
42 def _default(self, language, value, kwarg):
43 if value is not None:
44 return value
45 if language == self.XML:
46 return set()
47 return self.HTML_DEFAULTS[kwarg]
48
49 def __init__(
50 self, language=None, entity_substitution=None,
51 void_element_close_prefix='/', cdata_containing_tags=None,
52 empty_attributes_are_booleans=False, indent=1,
53 ):
54 r"""Constructor.
55
56 :param language: This should be Formatter.XML if you are formatting
57 XML markup and Formatter.HTML if you are formatting HTML markup.
58
59 :param entity_substitution: A function to call to replace special
60 characters with XML/HTML entities. For examples, see
61 bs4.dammit.EntitySubstitution.substitute_html and substitute_xml.
62 :param void_element_close_prefix: By default, void elements
63 are represented as <tag/> (XML rules) rather than <tag>
64 (HTML rules). To get <tag>, pass in the empty string.
65 :param cdata_containing_tags: The list of tags that are defined
66 as containing CDATA in this dialect. For example, in HTML,
67 <script> and <style> tags are defined as containing CDATA,
68 and their contents should not be formatted.
69 :param blank_attributes_are_booleans: Render attributes whose value
70 is the empty string as HTML-style boolean attributes.
71 (Attributes whose value is None are always rendered this way.)
72
73 :param indent: If indent is a non-negative integer or string,
74 then the contents of elements will be indented
75 appropriately when pretty-printing. An indent level of 0,
76 negative, or "" will only insert newlines. Using a
77 positive integer indent indents that many spaces per
78 level. If indent is a string (such as "\t"), that string
79 is used to indent each level. The default behavior is to
80 indent one space per level.
81 """
82 self.language = language
83 self.entity_substitution = entity_substitution
84 self.void_element_close_prefix = void_element_close_prefix
85 self.cdata_containing_tags = self._default(
86 language, cdata_containing_tags, 'cdata_containing_tags'
87 )
88 self.empty_attributes_are_booleans=empty_attributes_are_booleans
89 if indent is None:
90 indent = 0
91 if isinstance(indent, int):
92 if indent < 0:
93 indent = 0
94 indent = ' ' * indent
95 elif isinstance(indent, str):
96 indent = indent
97 else:
98 indent = ' '
99 self.indent = indent
100
101 def substitute(self, ns):
102 """Process a string that needs to undergo entity substitution.
103 This may be a string encountered in an attribute value or as
104 text.
105
106 :param ns: A string.
107 :return: A string with certain characters replaced by named
108 or numeric entities.
109 """
110 if not self.entity_substitution:
111 return ns
112 from .element import NavigableString
113 if (isinstance(ns, NavigableString)
114 and ns.parent is not None
115 and ns.parent.name in self.cdata_containing_tags):
116 # Do nothing.
117 return ns
118 # Substitute.
119 return self.entity_substitution(ns)
120
121 def attribute_value(self, value):
122 """Process the value of an attribute.
123
124 :param ns: A string.
125 :return: A string with certain characters replaced by named
126 or numeric entities.
127 """
128 return self.substitute(value)
129
130 def attributes(self, tag):
131 """Reorder a tag's attributes however you want.
132
133 By default, attributes are sorted alphabetically. This makes
134 behavior consistent between Python 2 and Python 3, and preserves
135 backwards compatibility with older versions of Beautiful Soup.
136
137 If `empty_boolean_attributes` is True, then attributes whose
138 values are set to the empty string will be treated as boolean
139 attributes.
140 """
141 if tag.attrs is None:
142 return []
143 return sorted(
144 (k, (None if self.empty_attributes_are_booleans and v == '' else v))
145 for k, v in list(tag.attrs.items())
146 )
147
148class HTMLFormatter(Formatter):
149 """A generic Formatter for HTML."""
150 REGISTRY = {}
151 def __init__(self, *args, **kwargs):
152 super(HTMLFormatter, self).__init__(self.HTML, *args, **kwargs)
153
154
155class XMLFormatter(Formatter):
156 """A generic Formatter for XML."""
157 REGISTRY = {}
158 def __init__(self, *args, **kwargs):
159 super(XMLFormatter, self).__init__(self.XML, *args, **kwargs)
160
161
162# Set up aliases for the default formatters.
163HTMLFormatter.REGISTRY['html'] = HTMLFormatter(
164 entity_substitution=EntitySubstitution.substitute_html
165)
166HTMLFormatter.REGISTRY["html5"] = HTMLFormatter(
167 entity_substitution=EntitySubstitution.substitute_html,
168 void_element_close_prefix=None,
169 empty_attributes_are_booleans=True,
170)
171HTMLFormatter.REGISTRY["minimal"] = HTMLFormatter(
172 entity_substitution=EntitySubstitution.substitute_xml
173)
174HTMLFormatter.REGISTRY[None] = HTMLFormatter(
175 entity_substitution=None
176)
177XMLFormatter.REGISTRY["html"] = XMLFormatter(
178 entity_substitution=EntitySubstitution.substitute_html
179)
180XMLFormatter.REGISTRY["minimal"] = XMLFormatter(
181 entity_substitution=EntitySubstitution.substitute_xml
182)
183XMLFormatter.REGISTRY[None] = Formatter(
184 Formatter(Formatter.XML, entity_substitution=None)
185)