diff options
Diffstat (limited to 'bitbake/lib/bs4/diagnose.py')
-rw-r--r-- | bitbake/lib/bs4/diagnose.py | 68 |
1 files changed, 40 insertions, 28 deletions
diff --git a/bitbake/lib/bs4/diagnose.py b/bitbake/lib/bs4/diagnose.py index 4d0b00afad..083395fb46 100644 --- a/bitbake/lib/bs4/diagnose.py +++ b/bitbake/lib/bs4/diagnose.py | |||
@@ -1,7 +1,10 @@ | |||
1 | """Diagnostic functions, mainly for use when doing tech support.""" | 1 | """Diagnostic functions, mainly for use when doing tech support.""" |
2 | |||
3 | __license__ = "MIT" | ||
4 | |||
2 | import cProfile | 5 | import cProfile |
3 | from StringIO import StringIO | 6 | from io import StringIO |
4 | from HTMLParser import HTMLParser | 7 | from html.parser import HTMLParser |
5 | import bs4 | 8 | import bs4 |
6 | from bs4 import BeautifulSoup, __version__ | 9 | from bs4 import BeautifulSoup, __version__ |
7 | from bs4.builder import builder_registry | 10 | from bs4.builder import builder_registry |
@@ -17,8 +20,8 @@ import cProfile | |||
17 | 20 | ||
18 | def diagnose(data): | 21 | def diagnose(data): |
19 | """Diagnostic suite for isolating common problems.""" | 22 | """Diagnostic suite for isolating common problems.""" |
20 | print "Diagnostic running on Beautiful Soup %s" % __version__ | 23 | print("Diagnostic running on Beautiful Soup %s" % __version__) |
21 | print "Python version %s" % sys.version | 24 | print("Python version %s" % sys.version) |
22 | 25 | ||
23 | basic_parsers = ["html.parser", "html5lib", "lxml"] | 26 | basic_parsers = ["html.parser", "html5lib", "lxml"] |
24 | for name in basic_parsers: | 27 | for name in basic_parsers: |
@@ -27,44 +30,53 @@ def diagnose(data): | |||
27 | break | 30 | break |
28 | else: | 31 | else: |
29 | basic_parsers.remove(name) | 32 | basic_parsers.remove(name) |
30 | print ( | 33 | print(( |
31 | "I noticed that %s is not installed. Installing it may help." % | 34 | "I noticed that %s is not installed. Installing it may help." % |
32 | name) | 35 | name)) |
33 | 36 | ||
34 | if 'lxml' in basic_parsers: | 37 | if 'lxml' in basic_parsers: |
35 | basic_parsers.append(["lxml", "xml"]) | 38 | basic_parsers.append(["lxml", "xml"]) |
36 | from lxml import etree | 39 | try: |
37 | print "Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION)) | 40 | from lxml import etree |
41 | print("Found lxml version %s" % ".".join(map(str,etree.LXML_VERSION))) | ||
42 | except ImportError as e: | ||
43 | print ( | ||
44 | "lxml is not installed or couldn't be imported.") | ||
45 | |||
38 | 46 | ||
39 | if 'html5lib' in basic_parsers: | 47 | if 'html5lib' in basic_parsers: |
40 | import html5lib | 48 | try: |
41 | print "Found html5lib version %s" % html5lib.__version__ | 49 | import html5lib |
50 | print("Found html5lib version %s" % html5lib.__version__) | ||
51 | except ImportError as e: | ||
52 | print ( | ||
53 | "html5lib is not installed or couldn't be imported.") | ||
42 | 54 | ||
43 | if hasattr(data, 'read'): | 55 | if hasattr(data, 'read'): |
44 | data = data.read() | 56 | data = data.read() |
45 | elif os.path.exists(data): | 57 | elif os.path.exists(data): |
46 | print '"%s" looks like a filename. Reading data from the file.' % data | 58 | print('"%s" looks like a filename. Reading data from the file.' % data) |
47 | data = open(data).read() | 59 | data = open(data).read() |
48 | elif data.startswith("http:") or data.startswith("https:"): | 60 | elif data.startswith("http:") or data.startswith("https:"): |
49 | print '"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data | 61 | print('"%s" looks like a URL. Beautiful Soup is not an HTTP client.' % data) |
50 | print "You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup." | 62 | print("You need to use some other library to get the document behind the URL, and feed that document to Beautiful Soup.") |
51 | return | 63 | return |
52 | 64 | print() | |
53 | 65 | ||
54 | for parser in basic_parsers: | 66 | for parser in basic_parsers: |
55 | print "Trying to parse your markup with %s" % parser | 67 | print("Trying to parse your markup with %s" % parser) |
56 | success = False | 68 | success = False |
57 | try: | 69 | try: |
58 | soup = BeautifulSoup(data, parser) | 70 | soup = BeautifulSoup(data, parser) |
59 | success = True | 71 | success = True |
60 | except Exception, e: | 72 | except Exception as e: |
61 | print "%s could not parse the markup." % parser | 73 | print("%s could not parse the markup." % parser) |
62 | traceback.print_exc() | 74 | traceback.print_exc() |
63 | if success: | 75 | if success: |
64 | print "Here's what %s did with the markup:" % parser | 76 | print("Here's what %s did with the markup:" % parser) |
65 | print soup.prettify() | 77 | print(soup.prettify()) |
66 | 78 | ||
67 | print "-" * 80 | 79 | print("-" * 80) |
68 | 80 | ||
69 | def lxml_trace(data, html=True, **kwargs): | 81 | def lxml_trace(data, html=True, **kwargs): |
70 | """Print out the lxml events that occur during parsing. | 82 | """Print out the lxml events that occur during parsing. |
@@ -74,7 +86,7 @@ def lxml_trace(data, html=True, **kwargs): | |||
74 | """ | 86 | """ |
75 | from lxml import etree | 87 | from lxml import etree |
76 | for event, element in etree.iterparse(StringIO(data), html=html, **kwargs): | 88 | for event, element in etree.iterparse(StringIO(data), html=html, **kwargs): |
77 | print("%s, %4s, %s" % (event, element.tag, element.text)) | 89 | print(("%s, %4s, %s" % (event, element.tag, element.text))) |
78 | 90 | ||
79 | class AnnouncingParser(HTMLParser): | 91 | class AnnouncingParser(HTMLParser): |
80 | """Announces HTMLParser parse events, without doing anything else.""" | 92 | """Announces HTMLParser parse events, without doing anything else.""" |
@@ -156,9 +168,9 @@ def rdoc(num_elements=1000): | |||
156 | 168 | ||
157 | def benchmark_parsers(num_elements=100000): | 169 | def benchmark_parsers(num_elements=100000): |
158 | """Very basic head-to-head performance benchmark.""" | 170 | """Very basic head-to-head performance benchmark.""" |
159 | print "Comparative parser benchmark on Beautiful Soup %s" % __version__ | 171 | print("Comparative parser benchmark on Beautiful Soup %s" % __version__) |
160 | data = rdoc(num_elements) | 172 | data = rdoc(num_elements) |
161 | print "Generated a large invalid HTML document (%d bytes)." % len(data) | 173 | print("Generated a large invalid HTML document (%d bytes)." % len(data)) |
162 | 174 | ||
163 | for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]: | 175 | for parser in ["lxml", ["lxml", "html"], "html5lib", "html.parser"]: |
164 | success = False | 176 | success = False |
@@ -167,24 +179,24 @@ def benchmark_parsers(num_elements=100000): | |||
167 | soup = BeautifulSoup(data, parser) | 179 | soup = BeautifulSoup(data, parser) |
168 | b = time.time() | 180 | b = time.time() |
169 | success = True | 181 | success = True |
170 | except Exception, e: | 182 | except Exception as e: |
171 | print "%s could not parse the markup." % parser | 183 | print("%s could not parse the markup." % parser) |
172 | traceback.print_exc() | 184 | traceback.print_exc() |
173 | if success: | 185 | if success: |
174 | print "BS4+%s parsed the markup in %.2fs." % (parser, b-a) | 186 | print("BS4+%s parsed the markup in %.2fs." % (parser, b-a)) |
175 | 187 | ||
176 | from lxml import etree | 188 | from lxml import etree |
177 | a = time.time() | 189 | a = time.time() |
178 | etree.HTML(data) | 190 | etree.HTML(data) |
179 | b = time.time() | 191 | b = time.time() |
180 | print "Raw lxml parsed the markup in %.2fs." % (b-a) | 192 | print("Raw lxml parsed the markup in %.2fs." % (b-a)) |
181 | 193 | ||
182 | import html5lib | 194 | import html5lib |
183 | parser = html5lib.HTMLParser() | 195 | parser = html5lib.HTMLParser() |
184 | a = time.time() | 196 | a = time.time() |
185 | parser.parse(data) | 197 | parser.parse(data) |
186 | b = time.time() | 198 | b = time.time() |
187 | print "Raw html5lib parsed the markup in %.2fs." % (b-a) | 199 | print("Raw html5lib parsed the markup in %.2fs." % (b-a)) |
188 | 200 | ||
189 | def profile(num_elements=100000, parser="lxml"): | 201 | def profile(num_elements=100000, parser="lxml"): |
190 | 202 | ||