From 972dcfcdbfe75dcfeb777150c136576cf1a71e99 Mon Sep 17 00:00:00 2001 From: Tudor Florea Date: Fri, 9 Oct 2015 22:59:03 +0200 Subject: initial commit for Enea Linux 5.0 arm Signed-off-by: Tudor Florea --- bitbake/lib/bb/codeparser.py | 406 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 406 insertions(+) create mode 100644 bitbake/lib/bb/codeparser.py (limited to 'bitbake/lib/bb/codeparser.py') diff --git a/bitbake/lib/bb/codeparser.py b/bitbake/lib/bb/codeparser.py new file mode 100644 index 0000000000..8b8f91a762 --- /dev/null +++ b/bitbake/lib/bb/codeparser.py @@ -0,0 +1,406 @@ +import ast +import codegen +import logging +import os.path +import bb.utils, bb.data +from itertools import chain +from pysh import pyshyacc, pyshlex, sherrors +from bb.cache import MultiProcessCache + + +logger = logging.getLogger('BitBake.CodeParser') + +try: + import cPickle as pickle +except ImportError: + import pickle + logger.info('Importing cPickle failed. Falling back to a very slow implementation.') + + +def check_indent(codestr): + """If the code is indented, add a top level piece of code to 'remove' the indentation""" + + i = 0 + while codestr[i] in ["\n", "\t", " "]: + i = i + 1 + + if i == 0: + return codestr + + if codestr[i-1] == "\t" or codestr[i-1] == " ": + return "if 1:\n" + codestr + + return codestr + + +# Basically pickle, in python 2.7.3 at least, does badly with data duplication +# upon pickling and unpickling. Combine this with duplicate objects and things +# are a mess. +# +# When the sets are originally created, python calls intern() on the set keys +# which significantly improves memory usage. Sadly the pickle/unpickle process +# doesn't call intern() on the keys and results in the same strings being duplicated +# in memory. This also means pickle will save the same string multiple times in +# the cache file. +# +# By having shell and python cacheline objects with setstate/getstate, we force +# the object creation through our own routine where we can call intern (via internSet). +# +# We also use hashable frozensets and ensure we use references to these so that +# duplicates can be removed, both in memory and in the resulting pickled data. +# +# By playing these games, the size of the cache file shrinks dramatically +# meaning faster load times and the reloaded cache files also consume much less +# memory. Smaller cache files, faster load times and lower memory usage is good. +# +# A custom getstate/setstate using tuples is actually worth 15% cachesize by +# avoiding duplication of the attribute names! + +class SetCache(object): + def __init__(self): + self.setcache = {} + + def internSet(self, items): + + new = [] + for i in items: + new.append(intern(i)) + s = frozenset(new) + if hash(s) in self.setcache: + return self.setcache[hash(s)] + self.setcache[hash(s)] = s + return s + +codecache = SetCache() + +class pythonCacheLine(object): + def __init__(self, refs, execs, contains): + self.refs = codecache.internSet(refs) + self.execs = codecache.internSet(execs) + self.contains = {} + for c in contains: + self.contains[c] = codecache.internSet(contains[c]) + + def __getstate__(self): + return (self.refs, self.execs, self.contains) + + def __setstate__(self, state): + (refs, execs, contains) = state + self.__init__(refs, execs, contains) + def __hash__(self): + l = (hash(self.refs), hash(self.execs)) + for c in sorted(self.contains.keys()): + l = l + (c, hash(self.contains[c])) + return hash(l) + +class shellCacheLine(object): + def __init__(self, execs): + self.execs = codecache.internSet(execs) + + def __getstate__(self): + return (self.execs) + + def __setstate__(self, state): + (execs) = state + self.__init__(execs) + def __hash__(self): + return hash(self.execs) + +class CodeParserCache(MultiProcessCache): + cache_file_name = "bb_codeparser.dat" + CACHE_VERSION = 7 + + def __init__(self): + MultiProcessCache.__init__(self) + self.pythoncache = self.cachedata[0] + self.shellcache = self.cachedata[1] + self.pythoncacheextras = self.cachedata_extras[0] + self.shellcacheextras = self.cachedata_extras[1] + + # To avoid duplication in the codeparser cache, keep + # a lookup of hashes of objects we already have + self.pythoncachelines = {} + self.shellcachelines = {} + + def newPythonCacheLine(self, refs, execs, contains): + cacheline = pythonCacheLine(refs, execs, contains) + h = hash(cacheline) + if h in self.pythoncachelines: + return self.pythoncachelines[h] + self.pythoncachelines[h] = cacheline + return cacheline + + def newShellCacheLine(self, execs): + cacheline = shellCacheLine(execs) + h = hash(cacheline) + if h in self.shellcachelines: + return self.shellcachelines[h] + self.shellcachelines[h] = cacheline + return cacheline + + def init_cache(self, d): + MultiProcessCache.init_cache(self, d) + + # cachedata gets re-assigned in the parent + self.pythoncache = self.cachedata[0] + self.shellcache = self.cachedata[1] + + def create_cachedata(self): + data = [{}, {}] + return data + +codeparsercache = CodeParserCache() + +def parser_cache_init(d): + codeparsercache.init_cache(d) + +def parser_cache_save(d): + codeparsercache.save_extras(d) + +def parser_cache_savemerge(d): + codeparsercache.save_merge(d) + +Logger = logging.getLoggerClass() +class BufferedLogger(Logger): + def __init__(self, name, level=0, target=None): + Logger.__init__(self, name) + self.setLevel(level) + self.buffer = [] + self.target = target + + def handle(self, record): + self.buffer.append(record) + + def flush(self): + for record in self.buffer: + self.target.handle(record) + self.buffer = [] + +class PythonParser(): + getvars = (".getVar", ".appendVar", ".prependVar") + containsfuncs = ("bb.utils.contains", "base_contains", "oe.utils.contains", "bb.utils.contains_any") + execfuncs = ("bb.build.exec_func", "bb.build.exec_task") + + def warn(self, func, arg): + """Warn about calls of bitbake APIs which pass a non-literal + argument for the variable name, as we're not able to track such + a reference. + """ + + try: + funcstr = codegen.to_source(func) + argstr = codegen.to_source(arg) + except TypeError: + self.log.debug(2, 'Failed to convert function and argument to source form') + else: + self.log.debug(1, self.unhandled_message % (funcstr, argstr)) + + def visit_Call(self, node): + name = self.called_node_name(node.func) + if name and name.endswith(self.getvars) or name in self.containsfuncs: + if isinstance(node.args[0], ast.Str): + varname = node.args[0].s + if name in self.containsfuncs and isinstance(node.args[1], ast.Str): + if varname not in self.contains: + self.contains[varname] = set() + self.contains[varname].add(node.args[1].s) + else: + self.references.add(node.args[0].s) + else: + self.warn(node.func, node.args[0]) + elif name in self.execfuncs: + if isinstance(node.args[0], ast.Str): + self.var_execs.add(node.args[0].s) + else: + self.warn(node.func, node.args[0]) + elif name and isinstance(node.func, (ast.Name, ast.Attribute)): + self.execs.add(name) + + def called_node_name(self, node): + """Given a called node, return its original string form""" + components = [] + while node: + if isinstance(node, ast.Attribute): + components.append(node.attr) + node = node.value + elif isinstance(node, ast.Name): + components.append(node.id) + return '.'.join(reversed(components)) + else: + break + + def __init__(self, name, log): + self.var_execs = set() + self.contains = {} + self.execs = set() + self.references = set() + self.log = BufferedLogger('BitBake.Data.PythonParser', logging.DEBUG, log) + + self.unhandled_message = "in call of %s, argument '%s' is not a string literal" + self.unhandled_message = "while parsing %s, %s" % (name, self.unhandled_message) + + def parse_python(self, node): + h = hash(str(node)) + + if h in codeparsercache.pythoncache: + self.references = set(codeparsercache.pythoncache[h].refs) + self.execs = set(codeparsercache.pythoncache[h].execs) + self.contains = {} + for i in codeparsercache.pythoncache[h].contains: + self.contains[i] = set(codeparsercache.pythoncache[h].contains[i]) + return + + if h in codeparsercache.pythoncacheextras: + self.references = set(codeparsercache.pythoncacheextras[h].refs) + self.execs = set(codeparsercache.pythoncacheextras[h].execs) + self.contains = {} + for i in codeparsercache.pythoncacheextras[h].contains: + self.contains[i] = set(codeparsercache.pythoncacheextras[h].contains[i]) + return + + code = compile(check_indent(str(node)), "", "exec", + ast.PyCF_ONLY_AST) + + for n in ast.walk(code): + if n.__class__.__name__ == "Call": + self.visit_Call(n) + + self.execs.update(self.var_execs) + + codeparsercache.pythoncacheextras[h] = codeparsercache.newPythonCacheLine(self.references, self.execs, self.contains) + +class ShellParser(): + def __init__(self, name, log): + self.funcdefs = set() + self.allexecs = set() + self.execs = set() + self.log = BufferedLogger('BitBake.Data.%s' % name, logging.DEBUG, log) + self.unhandled_template = "unable to handle non-literal command '%s'" + self.unhandled_template = "while parsing %s, %s" % (name, self.unhandled_template) + + def parse_shell(self, value): + """Parse the supplied shell code in a string, returning the external + commands it executes. + """ + + h = hash(str(value)) + + if h in codeparsercache.shellcache: + self.execs = set(codeparsercache.shellcache[h].execs) + return self.execs + + if h in codeparsercache.shellcacheextras: + self.execs = set(codeparsercache.shellcacheextras[h].execs) + return self.execs + + self._parse_shell(value) + self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs) + + codeparsercache.shellcacheextras[h] = codeparsercache.newShellCacheLine(self.execs) + + return self.execs + + def _parse_shell(self, value): + try: + tokens, _ = pyshyacc.parse(value, eof=True, debug=False) + except pyshlex.NeedMore: + raise sherrors.ShellSyntaxError("Unexpected EOF") + + for token in tokens: + self.process_tokens(token) + + def process_tokens(self, tokens): + """Process a supplied portion of the syntax tree as returned by + pyshyacc.parse. + """ + + def function_definition(value): + self.funcdefs.add(value.name) + return [value.body], None + + def case_clause(value): + # Element 0 of each item in the case is the list of patterns, and + # Element 1 of each item in the case is the list of commands to be + # executed when that pattern matches. + words = chain(*[item[0] for item in value.items]) + cmds = chain(*[item[1] for item in value.items]) + return cmds, words + + def if_clause(value): + main = chain(value.cond, value.if_cmds) + rest = value.else_cmds + if isinstance(rest, tuple) and rest[0] == "elif": + return chain(main, if_clause(rest[1])) + else: + return chain(main, rest) + + def simple_command(value): + return None, chain(value.words, (assign[1] for assign in value.assigns)) + + token_handlers = { + "and_or": lambda x: ((x.left, x.right), None), + "async": lambda x: ([x], None), + "brace_group": lambda x: (x.cmds, None), + "for_clause": lambda x: (x.cmds, x.items), + "function_definition": function_definition, + "if_clause": lambda x: (if_clause(x), None), + "pipeline": lambda x: (x.commands, None), + "redirect_list": lambda x: ([x.cmd], None), + "subshell": lambda x: (x.cmds, None), + "while_clause": lambda x: (chain(x.condition, x.cmds), None), + "until_clause": lambda x: (chain(x.condition, x.cmds), None), + "simple_command": simple_command, + "case_clause": case_clause, + } + + for token in tokens: + name, value = token + try: + more_tokens, words = token_handlers[name](value) + except KeyError: + raise NotImplementedError("Unsupported token type " + name) + + if more_tokens: + self.process_tokens(more_tokens) + + if words: + self.process_words(words) + + def process_words(self, words): + """Process a set of 'words' in pyshyacc parlance, which includes + extraction of executed commands from $() blocks, as well as grabbing + the command name argument. + """ + + words = list(words) + for word in list(words): + wtree = pyshlex.make_wordtree(word[1]) + for part in wtree: + if not isinstance(part, list): + continue + + if part[0] in ('`', '$('): + command = pyshlex.wordtree_as_string(part[1:-1]) + self._parse_shell(command) + + if word[0] in ("cmd_name", "cmd_word"): + if word in words: + words.remove(word) + + usetoken = False + for word in words: + if word[0] in ("cmd_name", "cmd_word") or \ + (usetoken and word[0] == "TOKEN"): + if "=" in word[1]: + usetoken = True + continue + + cmd = word[1] + if cmd.startswith("$"): + self.log.debug(1, self.unhandled_template % cmd) + elif cmd == "eval": + command = " ".join(word for _, word in words[1:]) + self._parse_shell(command) + else: + self.allexecs.add(cmd) + break -- cgit v1.2.3-54-g00ecf