diff options
author | Tudor Florea <tudor.florea@enea.com> | 2015-10-09 22:59:03 +0200 |
---|---|---|
committer | Tudor Florea <tudor.florea@enea.com> | 2015-10-09 22:59:03 +0200 |
commit | 972dcfcdbfe75dcfeb777150c136576cf1a71e99 (patch) | |
tree | 97a61cd7e293d7ae9d56ef7ed0f81253365bb026 /bitbake/lib/bb/codeparser.py | |
download | poky-972dcfcdbfe75dcfeb777150c136576cf1a71e99.tar.gz |
initial commit for Enea Linux 5.0 arm
Signed-off-by: Tudor Florea <tudor.florea@enea.com>
Diffstat (limited to 'bitbake/lib/bb/codeparser.py')
-rw-r--r-- | bitbake/lib/bb/codeparser.py | 406 |
1 files changed, 406 insertions, 0 deletions
diff --git a/bitbake/lib/bb/codeparser.py b/bitbake/lib/bb/codeparser.py new file mode 100644 index 0000000000..8b8f91a762 --- /dev/null +++ b/bitbake/lib/bb/codeparser.py | |||
@@ -0,0 +1,406 @@ | |||
1 | import ast | ||
2 | import codegen | ||
3 | import logging | ||
4 | import os.path | ||
5 | import bb.utils, bb.data | ||
6 | from itertools import chain | ||
7 | from pysh import pyshyacc, pyshlex, sherrors | ||
8 | from bb.cache import MultiProcessCache | ||
9 | |||
10 | |||
11 | logger = logging.getLogger('BitBake.CodeParser') | ||
12 | |||
13 | try: | ||
14 | import cPickle as pickle | ||
15 | except ImportError: | ||
16 | import pickle | ||
17 | logger.info('Importing cPickle failed. Falling back to a very slow implementation.') | ||
18 | |||
19 | |||
20 | def check_indent(codestr): | ||
21 | """If the code is indented, add a top level piece of code to 'remove' the indentation""" | ||
22 | |||
23 | i = 0 | ||
24 | while codestr[i] in ["\n", "\t", " "]: | ||
25 | i = i + 1 | ||
26 | |||
27 | if i == 0: | ||
28 | return codestr | ||
29 | |||
30 | if codestr[i-1] == "\t" or codestr[i-1] == " ": | ||
31 | return "if 1:\n" + codestr | ||
32 | |||
33 | return codestr | ||
34 | |||
35 | |||
36 | # Basically pickle, in python 2.7.3 at least, does badly with data duplication | ||
37 | # upon pickling and unpickling. Combine this with duplicate objects and things | ||
38 | # are a mess. | ||
39 | # | ||
40 | # When the sets are originally created, python calls intern() on the set keys | ||
41 | # which significantly improves memory usage. Sadly the pickle/unpickle process | ||
42 | # doesn't call intern() on the keys and results in the same strings being duplicated | ||
43 | # in memory. This also means pickle will save the same string multiple times in | ||
44 | # the cache file. | ||
45 | # | ||
46 | # By having shell and python cacheline objects with setstate/getstate, we force | ||
47 | # the object creation through our own routine where we can call intern (via internSet). | ||
48 | # | ||
49 | # We also use hashable frozensets and ensure we use references to these so that | ||
50 | # duplicates can be removed, both in memory and in the resulting pickled data. | ||
51 | # | ||
52 | # By playing these games, the size of the cache file shrinks dramatically | ||
53 | # meaning faster load times and the reloaded cache files also consume much less | ||
54 | # memory. Smaller cache files, faster load times and lower memory usage is good. | ||
55 | # | ||
56 | # A custom getstate/setstate using tuples is actually worth 15% cachesize by | ||
57 | # avoiding duplication of the attribute names! | ||
58 | |||
59 | class SetCache(object): | ||
60 | def __init__(self): | ||
61 | self.setcache = {} | ||
62 | |||
63 | def internSet(self, items): | ||
64 | |||
65 | new = [] | ||
66 | for i in items: | ||
67 | new.append(intern(i)) | ||
68 | s = frozenset(new) | ||
69 | if hash(s) in self.setcache: | ||
70 | return self.setcache[hash(s)] | ||
71 | self.setcache[hash(s)] = s | ||
72 | return s | ||
73 | |||
74 | codecache = SetCache() | ||
75 | |||
76 | class pythonCacheLine(object): | ||
77 | def __init__(self, refs, execs, contains): | ||
78 | self.refs = codecache.internSet(refs) | ||
79 | self.execs = codecache.internSet(execs) | ||
80 | self.contains = {} | ||
81 | for c in contains: | ||
82 | self.contains[c] = codecache.internSet(contains[c]) | ||
83 | |||
84 | def __getstate__(self): | ||
85 | return (self.refs, self.execs, self.contains) | ||
86 | |||
87 | def __setstate__(self, state): | ||
88 | (refs, execs, contains) = state | ||
89 | self.__init__(refs, execs, contains) | ||
90 | def __hash__(self): | ||
91 | l = (hash(self.refs), hash(self.execs)) | ||
92 | for c in sorted(self.contains.keys()): | ||
93 | l = l + (c, hash(self.contains[c])) | ||
94 | return hash(l) | ||
95 | |||
96 | class shellCacheLine(object): | ||
97 | def __init__(self, execs): | ||
98 | self.execs = codecache.internSet(execs) | ||
99 | |||
100 | def __getstate__(self): | ||
101 | return (self.execs) | ||
102 | |||
103 | def __setstate__(self, state): | ||
104 | (execs) = state | ||
105 | self.__init__(execs) | ||
106 | def __hash__(self): | ||
107 | return hash(self.execs) | ||
108 | |||
109 | class CodeParserCache(MultiProcessCache): | ||
110 | cache_file_name = "bb_codeparser.dat" | ||
111 | CACHE_VERSION = 7 | ||
112 | |||
113 | def __init__(self): | ||
114 | MultiProcessCache.__init__(self) | ||
115 | self.pythoncache = self.cachedata[0] | ||
116 | self.shellcache = self.cachedata[1] | ||
117 | self.pythoncacheextras = self.cachedata_extras[0] | ||
118 | self.shellcacheextras = self.cachedata_extras[1] | ||
119 | |||
120 | # To avoid duplication in the codeparser cache, keep | ||
121 | # a lookup of hashes of objects we already have | ||
122 | self.pythoncachelines = {} | ||
123 | self.shellcachelines = {} | ||
124 | |||
125 | def newPythonCacheLine(self, refs, execs, contains): | ||
126 | cacheline = pythonCacheLine(refs, execs, contains) | ||
127 | h = hash(cacheline) | ||
128 | if h in self.pythoncachelines: | ||
129 | return self.pythoncachelines[h] | ||
130 | self.pythoncachelines[h] = cacheline | ||
131 | return cacheline | ||
132 | |||
133 | def newShellCacheLine(self, execs): | ||
134 | cacheline = shellCacheLine(execs) | ||
135 | h = hash(cacheline) | ||
136 | if h in self.shellcachelines: | ||
137 | return self.shellcachelines[h] | ||
138 | self.shellcachelines[h] = cacheline | ||
139 | return cacheline | ||
140 | |||
141 | def init_cache(self, d): | ||
142 | MultiProcessCache.init_cache(self, d) | ||
143 | |||
144 | # cachedata gets re-assigned in the parent | ||
145 | self.pythoncache = self.cachedata[0] | ||
146 | self.shellcache = self.cachedata[1] | ||
147 | |||
148 | def create_cachedata(self): | ||
149 | data = [{}, {}] | ||
150 | return data | ||
151 | |||
152 | codeparsercache = CodeParserCache() | ||
153 | |||
154 | def parser_cache_init(d): | ||
155 | codeparsercache.init_cache(d) | ||
156 | |||
157 | def parser_cache_save(d): | ||
158 | codeparsercache.save_extras(d) | ||
159 | |||
160 | def parser_cache_savemerge(d): | ||
161 | codeparsercache.save_merge(d) | ||
162 | |||
163 | Logger = logging.getLoggerClass() | ||
164 | class BufferedLogger(Logger): | ||
165 | def __init__(self, name, level=0, target=None): | ||
166 | Logger.__init__(self, name) | ||
167 | self.setLevel(level) | ||
168 | self.buffer = [] | ||
169 | self.target = target | ||
170 | |||
171 | def handle(self, record): | ||
172 | self.buffer.append(record) | ||
173 | |||
174 | def flush(self): | ||
175 | for record in self.buffer: | ||
176 | self.target.handle(record) | ||
177 | self.buffer = [] | ||
178 | |||
179 | class PythonParser(): | ||
180 | getvars = (".getVar", ".appendVar", ".prependVar") | ||
181 | containsfuncs = ("bb.utils.contains", "base_contains", "oe.utils.contains", "bb.utils.contains_any") | ||
182 | execfuncs = ("bb.build.exec_func", "bb.build.exec_task") | ||
183 | |||
184 | def warn(self, func, arg): | ||
185 | """Warn about calls of bitbake APIs which pass a non-literal | ||
186 | argument for the variable name, as we're not able to track such | ||
187 | a reference. | ||
188 | """ | ||
189 | |||
190 | try: | ||
191 | funcstr = codegen.to_source(func) | ||
192 | argstr = codegen.to_source(arg) | ||
193 | except TypeError: | ||
194 | self.log.debug(2, 'Failed to convert function and argument to source form') | ||
195 | else: | ||
196 | self.log.debug(1, self.unhandled_message % (funcstr, argstr)) | ||
197 | |||
198 | def visit_Call(self, node): | ||
199 | name = self.called_node_name(node.func) | ||
200 | if name and name.endswith(self.getvars) or name in self.containsfuncs: | ||
201 | if isinstance(node.args[0], ast.Str): | ||
202 | varname = node.args[0].s | ||
203 | if name in self.containsfuncs and isinstance(node.args[1], ast.Str): | ||
204 | if varname not in self.contains: | ||
205 | self.contains[varname] = set() | ||
206 | self.contains[varname].add(node.args[1].s) | ||
207 | else: | ||
208 | self.references.add(node.args[0].s) | ||
209 | else: | ||
210 | self.warn(node.func, node.args[0]) | ||
211 | elif name in self.execfuncs: | ||
212 | if isinstance(node.args[0], ast.Str): | ||
213 | self.var_execs.add(node.args[0].s) | ||
214 | else: | ||
215 | self.warn(node.func, node.args[0]) | ||
216 | elif name and isinstance(node.func, (ast.Name, ast.Attribute)): | ||
217 | self.execs.add(name) | ||
218 | |||
219 | def called_node_name(self, node): | ||
220 | """Given a called node, return its original string form""" | ||
221 | components = [] | ||
222 | while node: | ||
223 | if isinstance(node, ast.Attribute): | ||
224 | components.append(node.attr) | ||
225 | node = node.value | ||
226 | elif isinstance(node, ast.Name): | ||
227 | components.append(node.id) | ||
228 | return '.'.join(reversed(components)) | ||
229 | else: | ||
230 | break | ||
231 | |||
232 | def __init__(self, name, log): | ||
233 | self.var_execs = set() | ||
234 | self.contains = {} | ||
235 | self.execs = set() | ||
236 | self.references = set() | ||
237 | self.log = BufferedLogger('BitBake.Data.PythonParser', logging.DEBUG, log) | ||
238 | |||
239 | self.unhandled_message = "in call of %s, argument '%s' is not a string literal" | ||
240 | self.unhandled_message = "while parsing %s, %s" % (name, self.unhandled_message) | ||
241 | |||
242 | def parse_python(self, node): | ||
243 | h = hash(str(node)) | ||
244 | |||
245 | if h in codeparsercache.pythoncache: | ||
246 | self.references = set(codeparsercache.pythoncache[h].refs) | ||
247 | self.execs = set(codeparsercache.pythoncache[h].execs) | ||
248 | self.contains = {} | ||
249 | for i in codeparsercache.pythoncache[h].contains: | ||
250 | self.contains[i] = set(codeparsercache.pythoncache[h].contains[i]) | ||
251 | return | ||
252 | |||
253 | if h in codeparsercache.pythoncacheextras: | ||
254 | self.references = set(codeparsercache.pythoncacheextras[h].refs) | ||
255 | self.execs = set(codeparsercache.pythoncacheextras[h].execs) | ||
256 | self.contains = {} | ||
257 | for i in codeparsercache.pythoncacheextras[h].contains: | ||
258 | self.contains[i] = set(codeparsercache.pythoncacheextras[h].contains[i]) | ||
259 | return | ||
260 | |||
261 | code = compile(check_indent(str(node)), "<string>", "exec", | ||
262 | ast.PyCF_ONLY_AST) | ||
263 | |||
264 | for n in ast.walk(code): | ||
265 | if n.__class__.__name__ == "Call": | ||
266 | self.visit_Call(n) | ||
267 | |||
268 | self.execs.update(self.var_execs) | ||
269 | |||
270 | codeparsercache.pythoncacheextras[h] = codeparsercache.newPythonCacheLine(self.references, self.execs, self.contains) | ||
271 | |||
272 | class ShellParser(): | ||
273 | def __init__(self, name, log): | ||
274 | self.funcdefs = set() | ||
275 | self.allexecs = set() | ||
276 | self.execs = set() | ||
277 | self.log = BufferedLogger('BitBake.Data.%s' % name, logging.DEBUG, log) | ||
278 | self.unhandled_template = "unable to handle non-literal command '%s'" | ||
279 | self.unhandled_template = "while parsing %s, %s" % (name, self.unhandled_template) | ||
280 | |||
281 | def parse_shell(self, value): | ||
282 | """Parse the supplied shell code in a string, returning the external | ||
283 | commands it executes. | ||
284 | """ | ||
285 | |||
286 | h = hash(str(value)) | ||
287 | |||
288 | if h in codeparsercache.shellcache: | ||
289 | self.execs = set(codeparsercache.shellcache[h].execs) | ||
290 | return self.execs | ||
291 | |||
292 | if h in codeparsercache.shellcacheextras: | ||
293 | self.execs = set(codeparsercache.shellcacheextras[h].execs) | ||
294 | return self.execs | ||
295 | |||
296 | self._parse_shell(value) | ||
297 | self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs) | ||
298 | |||
299 | codeparsercache.shellcacheextras[h] = codeparsercache.newShellCacheLine(self.execs) | ||
300 | |||
301 | return self.execs | ||
302 | |||
303 | def _parse_shell(self, value): | ||
304 | try: | ||
305 | tokens, _ = pyshyacc.parse(value, eof=True, debug=False) | ||
306 | except pyshlex.NeedMore: | ||
307 | raise sherrors.ShellSyntaxError("Unexpected EOF") | ||
308 | |||
309 | for token in tokens: | ||
310 | self.process_tokens(token) | ||
311 | |||
312 | def process_tokens(self, tokens): | ||
313 | """Process a supplied portion of the syntax tree as returned by | ||
314 | pyshyacc.parse. | ||
315 | """ | ||
316 | |||
317 | def function_definition(value): | ||
318 | self.funcdefs.add(value.name) | ||
319 | return [value.body], None | ||
320 | |||
321 | def case_clause(value): | ||
322 | # Element 0 of each item in the case is the list of patterns, and | ||
323 | # Element 1 of each item in the case is the list of commands to be | ||
324 | # executed when that pattern matches. | ||
325 | words = chain(*[item[0] for item in value.items]) | ||
326 | cmds = chain(*[item[1] for item in value.items]) | ||
327 | return cmds, words | ||
328 | |||
329 | def if_clause(value): | ||
330 | main = chain(value.cond, value.if_cmds) | ||
331 | rest = value.else_cmds | ||
332 | if isinstance(rest, tuple) and rest[0] == "elif": | ||
333 | return chain(main, if_clause(rest[1])) | ||
334 | else: | ||
335 | return chain(main, rest) | ||
336 | |||
337 | def simple_command(value): | ||
338 | return None, chain(value.words, (assign[1] for assign in value.assigns)) | ||
339 | |||
340 | token_handlers = { | ||
341 | "and_or": lambda x: ((x.left, x.right), None), | ||
342 | "async": lambda x: ([x], None), | ||
343 | "brace_group": lambda x: (x.cmds, None), | ||
344 | "for_clause": lambda x: (x.cmds, x.items), | ||
345 | "function_definition": function_definition, | ||
346 | "if_clause": lambda x: (if_clause(x), None), | ||
347 | "pipeline": lambda x: (x.commands, None), | ||
348 | "redirect_list": lambda x: ([x.cmd], None), | ||
349 | "subshell": lambda x: (x.cmds, None), | ||
350 | "while_clause": lambda x: (chain(x.condition, x.cmds), None), | ||
351 | "until_clause": lambda x: (chain(x.condition, x.cmds), None), | ||
352 | "simple_command": simple_command, | ||
353 | "case_clause": case_clause, | ||
354 | } | ||
355 | |||
356 | for token in tokens: | ||
357 | name, value = token | ||
358 | try: | ||
359 | more_tokens, words = token_handlers[name](value) | ||
360 | except KeyError: | ||
361 | raise NotImplementedError("Unsupported token type " + name) | ||
362 | |||
363 | if more_tokens: | ||
364 | self.process_tokens(more_tokens) | ||
365 | |||
366 | if words: | ||
367 | self.process_words(words) | ||
368 | |||
369 | def process_words(self, words): | ||
370 | """Process a set of 'words' in pyshyacc parlance, which includes | ||
371 | extraction of executed commands from $() blocks, as well as grabbing | ||
372 | the command name argument. | ||
373 | """ | ||
374 | |||
375 | words = list(words) | ||
376 | for word in list(words): | ||
377 | wtree = pyshlex.make_wordtree(word[1]) | ||
378 | for part in wtree: | ||
379 | if not isinstance(part, list): | ||
380 | continue | ||
381 | |||
382 | if part[0] in ('`', '$('): | ||
383 | command = pyshlex.wordtree_as_string(part[1:-1]) | ||
384 | self._parse_shell(command) | ||
385 | |||
386 | if word[0] in ("cmd_name", "cmd_word"): | ||
387 | if word in words: | ||
388 | words.remove(word) | ||
389 | |||
390 | usetoken = False | ||
391 | for word in words: | ||
392 | if word[0] in ("cmd_name", "cmd_word") or \ | ||
393 | (usetoken and word[0] == "TOKEN"): | ||
394 | if "=" in word[1]: | ||
395 | usetoken = True | ||
396 | continue | ||
397 | |||
398 | cmd = word[1] | ||
399 | if cmd.startswith("$"): | ||
400 | self.log.debug(1, self.unhandled_template % cmd) | ||
401 | elif cmd == "eval": | ||
402 | command = " ".join(word for _, word in words[1:]) | ||
403 | self._parse_shell(command) | ||
404 | else: | ||
405 | self.allexecs.add(cmd) | ||
406 | break | ||