summaryrefslogtreecommitdiffstats
path: root/bitbake/lib/bs4/diagnose.py
diff options
context:
space:
mode:
Diffstat (limited to 'bitbake/lib/bs4/diagnose.py')
-rw-r--r--bitbake/lib/bs4/diagnose.py68
1 files changed, 40 insertions, 28 deletions
diff --git a/bitbake/lib/bs4/diagnose.py b/bitbake/lib/bs4/diagnose.py
index 4d0b00afad..083395fb46 100644
--- a/bitbake/lib/bs4/diagnose.py
+++ b/bitbake/lib/bs4/diagnose.py
@@ -1,7 +1,10 @@
1"""Diagnostic functions, mainly for use when doing tech support.""" 1"""Diagnostic functions, mainly for use when doing tech support."""
2
3__license__ = "MIT"
4
2import cProfile 5import cProfile
3from StringIO import StringIO 6from io import StringIO
4from HTMLParser import HTMLParser 7from html.parser import HTMLParser
5import bs4 8import bs4
6from bs4 import BeautifulSoup, __version__ 9from bs4 import BeautifulSoup, __version__
7from bs4.builder import builder_registry 10from bs4.builder import builder_registry
@@ -17,8 +20,8 @@ import cProfile
17 20
18def diagnose(data): 21def diagnose(data):
19 """Diagnostic suite for isolating common problems.""" 22 """Diagnostic suite for isolating common problems."""
20 print "Diagnostic running on Beautiful Soup %s" % __version__ 23 print("Diagnostic running on Beautiful Soup %s" % __version__)
21 print "Python version %s" % sys.version 24 print("Python version %s" % sys.version)
22 25
23 basic_parsers = ["html.parser", "html5lib", "lxml"] 26 basic_parsers = ["html.parser", "html5lib", "lxml"]
24 for name in basic_parsers: 27 for name in basic_parsers:
@@ -27,44 +30,53 @@ def diagnose(data):
27 break 30 break
28 else: 31 else:
29 basic_parsers.remove(name) 32 basic_parsers.remove(name)
30 print ( 33 print((
31 "I noticed that %s is not installed. Installing it may help." % 34 "I noticed that %s is not installed. Installing it may help." %
32 name) 35 name))
33 36
34 if 'lxml' in basic_parsers: 37 if 'lxml' in basic_parsers:
35 basic_parsers.append(["lxml", "xml"]) 38 basic_parsers.append(["lxml", "xml"])
36 from lxml import etree 39 try:
37 print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)) 40 from lxml import etree
41 print("Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)))
42 except ImportError as e:
43 print (
44 "lxml is not installed or couldn't be imported.")
45
38 46
39 if 'html5lib' in basic_parsers: 47 if 'html5lib' in basic_parsers:
40 import html5lib 48 try:
41 print "Found html5lib version %s" % html5lib.__version__ 49 import html5lib
50 print("Found html5lib version %s" % html5lib.__version__)
51 except ImportError as e:
52 print (
53 "html5lib is not installed or couldn't be imported.")
42 54
43 if hasattr(data, 'read'): 55 if hasattr(data, 'read'):
44 data = data.read() 56 data = data.read()
45 elif os.path.exists(data): 57 elif os.path.exists(data):
46 print '"%s" looks like a filename. Reading data from the file.' % data 58 print('"%s" looks like a filename. Reading data from the file.' % data)
47 data = open(data).read() 59 data = open(data).read()
48 elif data.startswith("http:") or data.startswith("https:"): 60 elif data.startswith("http:") or data.startswith("https:"):
49 print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data 61 print('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data)
50 print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup." 62 print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.")
51 return 63 return
52 print 64 print()
53 65
54 for parser in basic_parsers: 66 for parser in basic_parsers:
55 print "Trying to parse your markup with %s" % parser 67 print("Trying to parse your markup with %s" % parser)
56 success = False 68 success = False
57 try: 69 try:
58 soup = BeautifulSoup(data, parser) 70 soup = BeautifulSoup(data, parser)
59 success = True 71 success = True
60 except Exception, e: 72 except Exception as e:
61 print "%s could not parse the markup." % parser 73 print("%s could not parse the markup." % parser)
62 traceback.print_exc() 74 traceback.print_exc()
63 if success: 75 if success:
64 print "Here's what %s did with the markup:" % parser 76 print("Here's what %s did with the markup:" % parser)
65 print soup.prettify() 77 print(soup.prettify())
66 78
67 print "-" * 80 79 print("-" * 80)
68 80
69def lxml_trace(data, html=True, **kwargs): 81def lxml_trace(data, html=True, **kwargs):
70 """Print out the lxml events that occur during parsing. 82 """Print out the lxml events that occur during parsing.
@@ -74,7 +86,7 @@ def lxml_trace(data, html=True, **kwargs):
74 """ 86 """
75 from lxml import etree 87 from lxml import etree
76 for event, element in etree.iterparse(StringIO(data), html=html, **kwargs): 88 for event, element in etree.iterparse(StringIO(data), html=html, **kwargs):
77 print("%s, %4s, %s" % (event, element.tag, element.text)) 89 print(("%s, %4s, %s" % (event, element.tag, element.text)))
78 90
79class AnnouncingParser(HTMLParser): 91class AnnouncingParser(HTMLParser):
80 """Announces HTMLParser parse events, without doing anything else.""" 92 """Announces HTMLParser parse events, without doing anything else."""
@@ -156,9 +168,9 @@ def rdoc(num_elements=1000):
156 168
157def benchmark_parsers(num_elements=100000): 169def benchmark_parsers(num_elements=100000):
158 """Very basic head-to-head performance benchmark.""" 170 """Very basic head-to-head performance benchmark."""
159 print "Comparative parser benchmark on Beautiful Soup %s" % __version__ 171 print("Comparative parser benchmark on Beautiful Soup %s" % __version__)
160 data = rdoc(num_elements) 172 data = rdoc(num_elements)
161 print "Generated a large invalid HTML document (%d bytes)." % len(data) 173 print("Generated a large invalid HTML document (%d bytes)." % len(data))
162 174
163 for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]: 175 for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]:
164 success = False 176 success = False
@@ -167,24 +179,24 @@ def benchmark_parsers(num_elements=100000):
167 soup = BeautifulSoup(data, parser) 179 soup = BeautifulSoup(data, parser)
168 b = time.time() 180 b = time.time()
169 success = True 181 success = True
170 except Exception, e: 182 except Exception as e:
171 print "%s could not parse the markup." % parser 183 print("%s could not parse the markup." % parser)
172 traceback.print_exc() 184 traceback.print_exc()
173 if success: 185 if success:
174 print "BS4+%s parsed the markup in %.2fs." % (parser, b-a) 186 print("BS4+%s parsed the markup in %.2fs." % (parser, b-a))
175 187
176 from lxml import etree 188 from lxml import etree
177 a = time.time() 189 a = time.time()
178 etree.HTML(data) 190 etree.HTML(data)
179 b = time.time() 191 b = time.time()
180 print "Raw lxml parsed the markup in %.2fs." % (b-a) 192 print("Raw lxml parsed the markup in %.2fs." % (b-a))
181 193
182 import html5lib 194 import html5lib
183 parser = html5lib.HTMLParser() 195 parser = html5lib.HTMLParser()
184 a = time.time() 196 a = time.time()
185 parser.parse(data) 197 parser.parse(data)
186 b = time.time() 198 b = time.time()
187 print "Raw html5lib parsed the markup in %.2fs." % (b-a) 199 print("Raw html5lib parsed the markup in %.2fs." % (b-a))
188 200
189def profile(num_elements=100000, parser="lxml"): 201def profile(num_elements=100000, parser="lxml"):
190 202