diff options
Diffstat (limited to 'bitbake/lib/bb/codeparser.py')
-rw-r--r-- | bitbake/lib/bb/codeparser.py | 328 |
1 files changed, 328 insertions, 0 deletions
diff --git a/bitbake/lib/bb/codeparser.py b/bitbake/lib/bb/codeparser.py new file mode 100644 index 0000000000..a50b9f268a --- /dev/null +++ b/bitbake/lib/bb/codeparser.py | |||
@@ -0,0 +1,328 @@ | |||
1 | import ast | ||
2 | import codegen | ||
3 | import logging | ||
4 | import os.path | ||
5 | import bb.utils, bb.data | ||
6 | from itertools import chain | ||
7 | from pysh import pyshyacc, pyshlex, sherrors | ||
8 | from bb.cache import MultiProcessCache | ||
9 | |||
10 | |||
11 | logger = logging.getLogger('BitBake.CodeParser') | ||
12 | |||
13 | try: | ||
14 | import cPickle as pickle | ||
15 | except ImportError: | ||
16 | import pickle | ||
17 | logger.info('Importing cPickle failed. Falling back to a very slow implementation.') | ||
18 | |||
19 | |||
20 | def check_indent(codestr): | ||
21 | """If the code is indented, add a top level piece of code to 'remove' the indentation""" | ||
22 | |||
23 | i = 0 | ||
24 | while codestr[i] in ["\n", "\t", " "]: | ||
25 | i = i + 1 | ||
26 | |||
27 | if i == 0: | ||
28 | return codestr | ||
29 | |||
30 | if codestr[i-1] == "\t" or codestr[i-1] == " ": | ||
31 | return "if 1:\n" + codestr | ||
32 | |||
33 | return codestr | ||
34 | |||
35 | |||
36 | class CodeParserCache(MultiProcessCache): | ||
37 | cache_file_name = "bb_codeparser.dat" | ||
38 | CACHE_VERSION = 4 | ||
39 | |||
40 | def __init__(self): | ||
41 | MultiProcessCache.__init__(self) | ||
42 | self.pythoncache = self.cachedata[0] | ||
43 | self.shellcache = self.cachedata[1] | ||
44 | self.pythoncacheextras = self.cachedata_extras[0] | ||
45 | self.shellcacheextras = self.cachedata_extras[1] | ||
46 | |||
47 | def init_cache(self, d): | ||
48 | MultiProcessCache.init_cache(self, d) | ||
49 | |||
50 | # cachedata gets re-assigned in the parent | ||
51 | self.pythoncache = self.cachedata[0] | ||
52 | self.shellcache = self.cachedata[1] | ||
53 | |||
54 | def compress_keys(self, data): | ||
55 | # When the dicts are originally created, python calls intern() on the set keys | ||
56 | # which significantly improves memory usage. Sadly the pickle/unpickle process | ||
57 | # doesn't call intern() on the keys and results in the same strings being duplicated | ||
58 | # in memory. This also means pickle will save the same string multiple times in | ||
59 | # the cache file. By interning the data here, the cache file shrinks dramatically | ||
60 | # meaning faster load times and the reloaded cache files also consume much less | ||
61 | # memory. This is worth any performance hit from this loops and the use of the | ||
62 | # intern() data storage. | ||
63 | # Python 3.x may behave better in this area | ||
64 | for h in data[0]: | ||
65 | data[0][h]["refs"] = self.internSet(data[0][h]["refs"]) | ||
66 | data[0][h]["execs"] = self.internSet(data[0][h]["execs"]) | ||
67 | for k in data[0][h]["contains"]: | ||
68 | data[0][h]["contains"][k] = self.internSet(data[0][h]["contains"][k]) | ||
69 | for h in data[1]: | ||
70 | data[1][h]["execs"] = self.internSet(data[1][h]["execs"]) | ||
71 | return | ||
72 | |||
73 | def create_cachedata(self): | ||
74 | data = [{}, {}] | ||
75 | return data | ||
76 | |||
77 | codeparsercache = CodeParserCache() | ||
78 | |||
79 | def parser_cache_init(d): | ||
80 | codeparsercache.init_cache(d) | ||
81 | |||
82 | def parser_cache_save(d): | ||
83 | codeparsercache.save_extras(d) | ||
84 | |||
85 | def parser_cache_savemerge(d): | ||
86 | codeparsercache.save_merge(d) | ||
87 | |||
88 | Logger = logging.getLoggerClass() | ||
89 | class BufferedLogger(Logger): | ||
90 | def __init__(self, name, level=0, target=None): | ||
91 | Logger.__init__(self, name) | ||
92 | self.setLevel(level) | ||
93 | self.buffer = [] | ||
94 | self.target = target | ||
95 | |||
96 | def handle(self, record): | ||
97 | self.buffer.append(record) | ||
98 | |||
99 | def flush(self): | ||
100 | for record in self.buffer: | ||
101 | self.target.handle(record) | ||
102 | self.buffer = [] | ||
103 | |||
104 | class PythonParser(): | ||
105 | getvars = ("d.getVar", "bb.data.getVar", "data.getVar", "d.appendVar", "d.prependVar") | ||
106 | containsfuncs = ("bb.utils.contains", "base_contains", "oe.utils.contains") | ||
107 | execfuncs = ("bb.build.exec_func", "bb.build.exec_task") | ||
108 | |||
109 | def warn(self, func, arg): | ||
110 | """Warn about calls of bitbake APIs which pass a non-literal | ||
111 | argument for the variable name, as we're not able to track such | ||
112 | a reference. | ||
113 | """ | ||
114 | |||
115 | try: | ||
116 | funcstr = codegen.to_source(func) | ||
117 | argstr = codegen.to_source(arg) | ||
118 | except TypeError: | ||
119 | self.log.debug(2, 'Failed to convert function and argument to source form') | ||
120 | else: | ||
121 | self.log.debug(1, self.unhandled_message % (funcstr, argstr)) | ||
122 | |||
123 | def visit_Call(self, node): | ||
124 | name = self.called_node_name(node.func) | ||
125 | if name in self.getvars or name in self.containsfuncs: | ||
126 | if isinstance(node.args[0], ast.Str): | ||
127 | varname = node.args[0].s | ||
128 | if name in self.containsfuncs and isinstance(node.args[1], ast.Str): | ||
129 | if varname not in self.contains: | ||
130 | self.contains[varname] = set() | ||
131 | self.contains[varname].add(node.args[1].s) | ||
132 | else: | ||
133 | self.references.add(node.args[0].s) | ||
134 | else: | ||
135 | self.warn(node.func, node.args[0]) | ||
136 | elif name in self.execfuncs: | ||
137 | if isinstance(node.args[0], ast.Str): | ||
138 | self.var_execs.add(node.args[0].s) | ||
139 | else: | ||
140 | self.warn(node.func, node.args[0]) | ||
141 | elif name and isinstance(node.func, (ast.Name, ast.Attribute)): | ||
142 | self.execs.add(name) | ||
143 | |||
144 | def called_node_name(self, node): | ||
145 | """Given a called node, return its original string form""" | ||
146 | components = [] | ||
147 | while node: | ||
148 | if isinstance(node, ast.Attribute): | ||
149 | components.append(node.attr) | ||
150 | node = node.value | ||
151 | elif isinstance(node, ast.Name): | ||
152 | components.append(node.id) | ||
153 | return '.'.join(reversed(components)) | ||
154 | else: | ||
155 | break | ||
156 | |||
157 | def __init__(self, name, log): | ||
158 | self.var_execs = set() | ||
159 | self.contains = {} | ||
160 | self.execs = set() | ||
161 | self.references = set() | ||
162 | self.log = BufferedLogger('BitBake.Data.PythonParser', logging.DEBUG, log) | ||
163 | |||
164 | self.unhandled_message = "in call of %s, argument '%s' is not a string literal" | ||
165 | self.unhandled_message = "while parsing %s, %s" % (name, self.unhandled_message) | ||
166 | |||
167 | def parse_python(self, node): | ||
168 | h = hash(str(node)) | ||
169 | |||
170 | if h in codeparsercache.pythoncache: | ||
171 | self.references = codeparsercache.pythoncache[h]["refs"] | ||
172 | self.execs = codeparsercache.pythoncache[h]["execs"] | ||
173 | self.contains = codeparsercache.pythoncache[h]["contains"] | ||
174 | return | ||
175 | |||
176 | if h in codeparsercache.pythoncacheextras: | ||
177 | self.references = codeparsercache.pythoncacheextras[h]["refs"] | ||
178 | self.execs = codeparsercache.pythoncacheextras[h]["execs"] | ||
179 | self.contains = codeparsercache.pythoncacheextras[h]["contains"] | ||
180 | return | ||
181 | |||
182 | code = compile(check_indent(str(node)), "<string>", "exec", | ||
183 | ast.PyCF_ONLY_AST) | ||
184 | |||
185 | for n in ast.walk(code): | ||
186 | if n.__class__.__name__ == "Call": | ||
187 | self.visit_Call(n) | ||
188 | |||
189 | self.execs.update(self.var_execs) | ||
190 | |||
191 | codeparsercache.pythoncacheextras[h] = {} | ||
192 | codeparsercache.pythoncacheextras[h]["refs"] = self.references | ||
193 | codeparsercache.pythoncacheextras[h]["execs"] = self.execs | ||
194 | codeparsercache.pythoncacheextras[h]["contains"] = self.contains | ||
195 | |||
196 | class ShellParser(): | ||
197 | def __init__(self, name, log): | ||
198 | self.funcdefs = set() | ||
199 | self.allexecs = set() | ||
200 | self.execs = set() | ||
201 | self.log = BufferedLogger('BitBake.Data.%s' % name, logging.DEBUG, log) | ||
202 | self.unhandled_template = "unable to handle non-literal command '%s'" | ||
203 | self.unhandled_template = "while parsing %s, %s" % (name, self.unhandled_template) | ||
204 | |||
205 | def parse_shell(self, value): | ||
206 | """Parse the supplied shell code in a string, returning the external | ||
207 | commands it executes. | ||
208 | """ | ||
209 | |||
210 | h = hash(str(value)) | ||
211 | |||
212 | if h in codeparsercache.shellcache: | ||
213 | self.execs = codeparsercache.shellcache[h]["execs"] | ||
214 | return self.execs | ||
215 | |||
216 | if h in codeparsercache.shellcacheextras: | ||
217 | self.execs = codeparsercache.shellcacheextras[h]["execs"] | ||
218 | return self.execs | ||
219 | |||
220 | try: | ||
221 | tokens, _ = pyshyacc.parse(value, eof=True, debug=False) | ||
222 | except pyshlex.NeedMore: | ||
223 | raise sherrors.ShellSyntaxError("Unexpected EOF") | ||
224 | |||
225 | for token in tokens: | ||
226 | self.process_tokens(token) | ||
227 | self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs) | ||
228 | |||
229 | codeparsercache.shellcacheextras[h] = {} | ||
230 | codeparsercache.shellcacheextras[h]["execs"] = self.execs | ||
231 | |||
232 | return self.execs | ||
233 | |||
234 | def process_tokens(self, tokens): | ||
235 | """Process a supplied portion of the syntax tree as returned by | ||
236 | pyshyacc.parse. | ||
237 | """ | ||
238 | |||
239 | def function_definition(value): | ||
240 | self.funcdefs.add(value.name) | ||
241 | return [value.body], None | ||
242 | |||
243 | def case_clause(value): | ||
244 | # Element 0 of each item in the case is the list of patterns, and | ||
245 | # Element 1 of each item in the case is the list of commands to be | ||
246 | # executed when that pattern matches. | ||
247 | words = chain(*[item[0] for item in value.items]) | ||
248 | cmds = chain(*[item[1] for item in value.items]) | ||
249 | return cmds, words | ||
250 | |||
251 | def if_clause(value): | ||
252 | main = chain(value.cond, value.if_cmds) | ||
253 | rest = value.else_cmds | ||
254 | if isinstance(rest, tuple) and rest[0] == "elif": | ||
255 | return chain(main, if_clause(rest[1])) | ||
256 | else: | ||
257 | return chain(main, rest) | ||
258 | |||
259 | def simple_command(value): | ||
260 | return None, chain(value.words, (assign[1] for assign in value.assigns)) | ||
261 | |||
262 | token_handlers = { | ||
263 | "and_or": lambda x: ((x.left, x.right), None), | ||
264 | "async": lambda x: ([x], None), | ||
265 | "brace_group": lambda x: (x.cmds, None), | ||
266 | "for_clause": lambda x: (x.cmds, x.items), | ||
267 | "function_definition": function_definition, | ||
268 | "if_clause": lambda x: (if_clause(x), None), | ||
269 | "pipeline": lambda x: (x.commands, None), | ||
270 | "redirect_list": lambda x: ([x.cmd], None), | ||
271 | "subshell": lambda x: (x.cmds, None), | ||
272 | "while_clause": lambda x: (chain(x.condition, x.cmds), None), | ||
273 | "until_clause": lambda x: (chain(x.condition, x.cmds), None), | ||
274 | "simple_command": simple_command, | ||
275 | "case_clause": case_clause, | ||
276 | } | ||
277 | |||
278 | for token in tokens: | ||
279 | name, value = token | ||
280 | try: | ||
281 | more_tokens, words = token_handlers[name](value) | ||
282 | except KeyError: | ||
283 | raise NotImplementedError("Unsupported token type " + name) | ||
284 | |||
285 | if more_tokens: | ||
286 | self.process_tokens(more_tokens) | ||
287 | |||
288 | if words: | ||
289 | self.process_words(words) | ||
290 | |||
291 | def process_words(self, words): | ||
292 | """Process a set of 'words' in pyshyacc parlance, which includes | ||
293 | extraction of executed commands from $() blocks, as well as grabbing | ||
294 | the command name argument. | ||
295 | """ | ||
296 | |||
297 | words = list(words) | ||
298 | for word in list(words): | ||
299 | wtree = pyshlex.make_wordtree(word[1]) | ||
300 | for part in wtree: | ||
301 | if not isinstance(part, list): | ||
302 | continue | ||
303 | |||
304 | if part[0] in ('`', '$('): | ||
305 | command = pyshlex.wordtree_as_string(part[1:-1]) | ||
306 | self.parse_shell(command) | ||
307 | |||
308 | if word[0] in ("cmd_name", "cmd_word"): | ||
309 | if word in words: | ||
310 | words.remove(word) | ||
311 | |||
312 | usetoken = False | ||
313 | for word in words: | ||
314 | if word[0] in ("cmd_name", "cmd_word") or \ | ||
315 | (usetoken and word[0] == "TOKEN"): | ||
316 | if "=" in word[1]: | ||
317 | usetoken = True | ||
318 | continue | ||
319 | |||
320 | cmd = word[1] | ||
321 | if cmd.startswith("$"): | ||
322 | self.log.debug(1, self.unhandled_template % cmd) | ||
323 | elif cmd == "eval": | ||
324 | command = " ".join(word for _, word in words[1:]) | ||
325 | self.parse_shell(command) | ||
326 | else: | ||
327 | self.allexecs.add(cmd) | ||
328 | break | ||