diff options
Diffstat (limited to 'bitbake/lib/bb/codeparser.py')
-rw-r--r-- | bitbake/lib/bb/codeparser.py | 319 |
1 files changed, 319 insertions, 0 deletions
diff --git a/bitbake/lib/bb/codeparser.py b/bitbake/lib/bb/codeparser.py new file mode 100644 index 0000000000..e44e791585 --- /dev/null +++ b/bitbake/lib/bb/codeparser.py | |||
@@ -0,0 +1,319 @@ | |||
1 | import ast | ||
2 | import codegen | ||
3 | import logging | ||
4 | import os.path | ||
5 | import bb.utils, bb.data | ||
6 | from itertools import chain | ||
7 | from pysh import pyshyacc, pyshlex, sherrors | ||
8 | from bb.cache import MultiProcessCache | ||
9 | |||
10 | |||
11 | logger = logging.getLogger('BitBake.CodeParser') | ||
12 | |||
13 | try: | ||
14 | import cPickle as pickle | ||
15 | except ImportError: | ||
16 | import pickle | ||
17 | logger.info('Importing cPickle failed. Falling back to a very slow implementation.') | ||
18 | |||
19 | |||
20 | def check_indent(codestr): | ||
21 | """If the code is indented, add a top level piece of code to 'remove' the indentation""" | ||
22 | |||
23 | i = 0 | ||
24 | while codestr[i] in ["\n", "\t", " "]: | ||
25 | i = i + 1 | ||
26 | |||
27 | if i == 0: | ||
28 | return codestr | ||
29 | |||
30 | if codestr[i-1] == "\t" or codestr[i-1] == " ": | ||
31 | return "if 1:\n" + codestr | ||
32 | |||
33 | return codestr | ||
34 | |||
35 | |||
36 | class CodeParserCache(MultiProcessCache): | ||
37 | cache_file_name = "bb_codeparser.dat" | ||
38 | CACHE_VERSION = 3 | ||
39 | |||
40 | def __init__(self): | ||
41 | MultiProcessCache.__init__(self) | ||
42 | self.pythoncache = self.cachedata[0] | ||
43 | self.shellcache = self.cachedata[1] | ||
44 | self.pythoncacheextras = self.cachedata_extras[0] | ||
45 | self.shellcacheextras = self.cachedata_extras[1] | ||
46 | |||
47 | def init_cache(self, d): | ||
48 | MultiProcessCache.init_cache(self, d) | ||
49 | |||
50 | # cachedata gets re-assigned in the parent | ||
51 | self.pythoncache = self.cachedata[0] | ||
52 | self.shellcache = self.cachedata[1] | ||
53 | |||
54 | def compress_keys(self, data): | ||
55 | # When the dicts are originally created, python calls intern() on the set keys | ||
56 | # which significantly improves memory usage. Sadly the pickle/unpickle process | ||
57 | # doesn't call intern() on the keys and results in the same strings being duplicated | ||
58 | # in memory. This also means pickle will save the same string multiple times in | ||
59 | # the cache file. By interning the data here, the cache file shrinks dramatically | ||
60 | # meaning faster load times and the reloaded cache files also consume much less | ||
61 | # memory. This is worth any performance hit from this loops and the use of the | ||
62 | # intern() data storage. | ||
63 | # Python 3.x may behave better in this area | ||
64 | for h in data[0]: | ||
65 | data[0][h]["refs"] = self.internSet(data[0][h]["refs"]) | ||
66 | data[0][h]["execs"] = self.internSet(data[0][h]["execs"]) | ||
67 | for h in data[1]: | ||
68 | data[1][h]["execs"] = self.internSet(data[1][h]["execs"]) | ||
69 | return | ||
70 | |||
71 | def create_cachedata(self): | ||
72 | data = [{}, {}] | ||
73 | return data | ||
74 | |||
75 | codeparsercache = CodeParserCache() | ||
76 | |||
77 | def parser_cache_init(d): | ||
78 | codeparsercache.init_cache(d) | ||
79 | |||
80 | def parser_cache_save(d): | ||
81 | codeparsercache.save_extras(d) | ||
82 | |||
83 | def parser_cache_savemerge(d): | ||
84 | codeparsercache.save_merge(d) | ||
85 | |||
86 | Logger = logging.getLoggerClass() | ||
87 | class BufferedLogger(Logger): | ||
88 | def __init__(self, name, level=0, target=None): | ||
89 | Logger.__init__(self, name) | ||
90 | self.setLevel(level) | ||
91 | self.buffer = [] | ||
92 | self.target = target | ||
93 | |||
94 | def handle(self, record): | ||
95 | self.buffer.append(record) | ||
96 | |||
97 | def flush(self): | ||
98 | for record in self.buffer: | ||
99 | self.target.handle(record) | ||
100 | self.buffer = [] | ||
101 | |||
102 | class PythonParser(): | ||
103 | getvars = ("d.getVar", "bb.data.getVar", "data.getVar", "d.appendVar", "d.prependVar") | ||
104 | containsfuncs = ("bb.utils.contains", "base_contains", "oe.utils.contains") | ||
105 | execfuncs = ("bb.build.exec_func", "bb.build.exec_task") | ||
106 | |||
107 | def warn(self, func, arg): | ||
108 | """Warn about calls of bitbake APIs which pass a non-literal | ||
109 | argument for the variable name, as we're not able to track such | ||
110 | a reference. | ||
111 | """ | ||
112 | |||
113 | try: | ||
114 | funcstr = codegen.to_source(func) | ||
115 | argstr = codegen.to_source(arg) | ||
116 | except TypeError: | ||
117 | self.log.debug(2, 'Failed to convert function and argument to source form') | ||
118 | else: | ||
119 | self.log.debug(1, self.unhandled_message % (funcstr, argstr)) | ||
120 | |||
121 | def visit_Call(self, node): | ||
122 | name = self.called_node_name(node.func) | ||
123 | if name in self.getvars or name in self.containsfuncs: | ||
124 | if isinstance(node.args[0], ast.Str): | ||
125 | self.var_references.add(node.args[0].s) | ||
126 | else: | ||
127 | self.warn(node.func, node.args[0]) | ||
128 | elif name in self.execfuncs: | ||
129 | if isinstance(node.args[0], ast.Str): | ||
130 | self.var_execs.add(node.args[0].s) | ||
131 | else: | ||
132 | self.warn(node.func, node.args[0]) | ||
133 | elif name and isinstance(node.func, (ast.Name, ast.Attribute)): | ||
134 | self.execs.add(name) | ||
135 | |||
136 | def called_node_name(self, node): | ||
137 | """Given a called node, return its original string form""" | ||
138 | components = [] | ||
139 | while node: | ||
140 | if isinstance(node, ast.Attribute): | ||
141 | components.append(node.attr) | ||
142 | node = node.value | ||
143 | elif isinstance(node, ast.Name): | ||
144 | components.append(node.id) | ||
145 | return '.'.join(reversed(components)) | ||
146 | else: | ||
147 | break | ||
148 | |||
149 | def __init__(self, name, log): | ||
150 | self.var_references = set() | ||
151 | self.var_execs = set() | ||
152 | self.execs = set() | ||
153 | self.references = set() | ||
154 | self.log = BufferedLogger('BitBake.Data.%s' % name, logging.DEBUG, log) | ||
155 | |||
156 | self.unhandled_message = "in call of %s, argument '%s' is not a string literal" | ||
157 | self.unhandled_message = "while parsing %s, %s" % (name, self.unhandled_message) | ||
158 | |||
159 | def parse_python(self, node): | ||
160 | h = hash(str(node)) | ||
161 | |||
162 | if h in codeparsercache.pythoncache: | ||
163 | self.references = codeparsercache.pythoncache[h]["refs"] | ||
164 | self.execs = codeparsercache.pythoncache[h]["execs"] | ||
165 | return | ||
166 | |||
167 | if h in codeparsercache.pythoncacheextras: | ||
168 | self.references = codeparsercache.pythoncacheextras[h]["refs"] | ||
169 | self.execs = codeparsercache.pythoncacheextras[h]["execs"] | ||
170 | return | ||
171 | |||
172 | |||
173 | code = compile(check_indent(str(node)), "<string>", "exec", | ||
174 | ast.PyCF_ONLY_AST) | ||
175 | |||
176 | for n in ast.walk(code): | ||
177 | if n.__class__.__name__ == "Call": | ||
178 | self.visit_Call(n) | ||
179 | |||
180 | self.references.update(self.var_references) | ||
181 | self.references.update(self.var_execs) | ||
182 | |||
183 | codeparsercache.pythoncacheextras[h] = {} | ||
184 | codeparsercache.pythoncacheextras[h]["refs"] = self.references | ||
185 | codeparsercache.pythoncacheextras[h]["execs"] = self.execs | ||
186 | |||
187 | class ShellParser(): | ||
188 | def __init__(self, name, log): | ||
189 | self.funcdefs = set() | ||
190 | self.allexecs = set() | ||
191 | self.execs = set() | ||
192 | self.log = BufferedLogger('BitBake.Data.%s' % name, logging.DEBUG, log) | ||
193 | self.unhandled_template = "unable to handle non-literal command '%s'" | ||
194 | self.unhandled_template = "while parsing %s, %s" % (name, self.unhandled_template) | ||
195 | |||
196 | def parse_shell(self, value): | ||
197 | """Parse the supplied shell code in a string, returning the external | ||
198 | commands it executes. | ||
199 | """ | ||
200 | |||
201 | h = hash(str(value)) | ||
202 | |||
203 | if h in codeparsercache.shellcache: | ||
204 | self.execs = codeparsercache.shellcache[h]["execs"] | ||
205 | return self.execs | ||
206 | |||
207 | if h in codeparsercache.shellcacheextras: | ||
208 | self.execs = codeparsercache.shellcacheextras[h]["execs"] | ||
209 | return self.execs | ||
210 | |||
211 | try: | ||
212 | tokens, _ = pyshyacc.parse(value, eof=True, debug=False) | ||
213 | except pyshlex.NeedMore: | ||
214 | raise sherrors.ShellSyntaxError("Unexpected EOF") | ||
215 | |||
216 | for token in tokens: | ||
217 | self.process_tokens(token) | ||
218 | self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs) | ||
219 | |||
220 | codeparsercache.shellcacheextras[h] = {} | ||
221 | codeparsercache.shellcacheextras[h]["execs"] = self.execs | ||
222 | |||
223 | return self.execs | ||
224 | |||
225 | def process_tokens(self, tokens): | ||
226 | """Process a supplied portion of the syntax tree as returned by | ||
227 | pyshyacc.parse. | ||
228 | """ | ||
229 | |||
230 | def function_definition(value): | ||
231 | self.funcdefs.add(value.name) | ||
232 | return [value.body], None | ||
233 | |||
234 | def case_clause(value): | ||
235 | # Element 0 of each item in the case is the list of patterns, and | ||
236 | # Element 1 of each item in the case is the list of commands to be | ||
237 | # executed when that pattern matches. | ||
238 | words = chain(*[item[0] for item in value.items]) | ||
239 | cmds = chain(*[item[1] for item in value.items]) | ||
240 | return cmds, words | ||
241 | |||
242 | def if_clause(value): | ||
243 | main = chain(value.cond, value.if_cmds) | ||
244 | rest = value.else_cmds | ||
245 | if isinstance(rest, tuple) and rest[0] == "elif": | ||
246 | return chain(main, if_clause(rest[1])) | ||
247 | else: | ||
248 | return chain(main, rest) | ||
249 | |||
250 | def simple_command(value): | ||
251 | return None, chain(value.words, (assign[1] for assign in value.assigns)) | ||
252 | |||
253 | token_handlers = { | ||
254 | "and_or": lambda x: ((x.left, x.right), None), | ||
255 | "async": lambda x: ([x], None), | ||
256 | "brace_group": lambda x: (x.cmds, None), | ||
257 | "for_clause": lambda x: (x.cmds, x.items), | ||
258 | "function_definition": function_definition, | ||
259 | "if_clause": lambda x: (if_clause(x), None), | ||
260 | "pipeline": lambda x: (x.commands, None), | ||
261 | "redirect_list": lambda x: ([x.cmd], None), | ||
262 | "subshell": lambda x: (x.cmds, None), | ||
263 | "while_clause": lambda x: (chain(x.condition, x.cmds), None), | ||
264 | "until_clause": lambda x: (chain(x.condition, x.cmds), None), | ||
265 | "simple_command": simple_command, | ||
266 | "case_clause": case_clause, | ||
267 | } | ||
268 | |||
269 | for token in tokens: | ||
270 | name, value = token | ||
271 | try: | ||
272 | more_tokens, words = token_handlers[name](value) | ||
273 | except KeyError: | ||
274 | raise NotImplementedError("Unsupported token type " + name) | ||
275 | |||
276 | if more_tokens: | ||
277 | self.process_tokens(more_tokens) | ||
278 | |||
279 | if words: | ||
280 | self.process_words(words) | ||
281 | |||
282 | def process_words(self, words): | ||
283 | """Process a set of 'words' in pyshyacc parlance, which includes | ||
284 | extraction of executed commands from $() blocks, as well as grabbing | ||
285 | the command name argument. | ||
286 | """ | ||
287 | |||
288 | words = list(words) | ||
289 | for word in list(words): | ||
290 | wtree = pyshlex.make_wordtree(word[1]) | ||
291 | for part in wtree: | ||
292 | if not isinstance(part, list): | ||
293 | continue | ||
294 | |||
295 | if part[0] in ('`', '$('): | ||
296 | command = pyshlex.wordtree_as_string(part[1:-1]) | ||
297 | self.parse_shell(command) | ||
298 | |||
299 | if word[0] in ("cmd_name", "cmd_word"): | ||
300 | if word in words: | ||
301 | words.remove(word) | ||
302 | |||
303 | usetoken = False | ||
304 | for word in words: | ||
305 | if word[0] in ("cmd_name", "cmd_word") or \ | ||
306 | (usetoken and word[0] == "TOKEN"): | ||
307 | if "=" in word[1]: | ||
308 | usetoken = True | ||
309 | continue | ||
310 | |||
311 | cmd = word[1] | ||
312 | if cmd.startswith("$"): | ||
313 | self.log.debug(1, self.unhandled_template % cmd) | ||
314 | elif cmd == "eval": | ||
315 | command = " ".join(word for _, word in words[1:]) | ||
316 | self.parse_shell(command) | ||
317 | else: | ||
318 | self.allexecs.add(cmd) | ||
319 | break | ||