1 files changed, 319 insertions, 0 deletions
diff --git a/bitbake/lib/bb/codeparser.py b/bitbake/lib/bb/codeparser.py
new file mode 100644
index 0000000000..e44e791585
--- /dev/null
+++ b/bitbake/lib/bb/codeparser.py
@@ -0,0 +1,319 @@
+import ast
+import codegen
+import logging
+import os.path
+import bb.utils, bb.data
+from itertools import chain
+from pysh import pyshyacc, pyshlex, sherrors
+from bb.cache import MultiProcessCache
+logger = logging.getLogger('BitBake.CodeParser')
+try:
+    import cPickle as pickle
+except ImportError:
+    import pickle
+    logger.info('Importing cPickle failed.  Falling back to a very slow implementation.')
+def check_indent(codestr):
+    """If the code is indented, add a top level piece of code to 'remove' the indentation"""
+    i = 0
+    while codestr[i] in ["\n", "\t", " "]:
+        i = i + 1
+    if i == 0:
+        return codestr
+    if codestr[i-1] == "\t" or codestr[i-1] == " ":
+        return "if 1:\n" + codestr
+    return codestr
+class CodeParserCache(MultiProcessCache):
+    cache_file_name = "bb_codeparser.dat"
+    CACHE_VERSION = 3
+    def __init__(self):
+        MultiProcessCache.__init__(self)
+        self.pythoncache = self.cachedata[0]
+        self.shellcache = self.cachedata[1]
+        self.pythoncacheextras = self.cachedata_extras[0]
+        self.shellcacheextras = self.cachedata_extras[1]
+    def init_cache(self, d):
+        MultiProcessCache.init_cache(self, d)
+        # cachedata gets re-assigned in the parent
+        self.pythoncache = self.cachedata[0]
+        self.shellcache = self.cachedata[1]
+    def compress_keys(self, data):
+        # When the dicts are originally created, python calls intern() on the set keys
+        # which significantly improves memory usage. Sadly the pickle/unpickle process
+        # doesn't call intern() on the keys and results in the same strings being duplicated
+        # in memory. This also means pickle will save the same string multiple times in
+        # the cache file. By interning the data here, the cache file shrinks dramatically
+        # meaning faster load times and the reloaded cache files also consume much less
+        # memory. This is worth any performance hit from this loops and the use of the
+        # intern() data storage.
+        # Python 3.x may behave better in this area
+        for h in data[0]:
+            data[0][h]["refs"] = self.internSet(data[0][h]["refs"])
+            data[0][h]["execs"] = self.internSet(data[0][h]["execs"])
+        for h in data[1]:
+            data[1][h]["execs"] = self.internSet(data[1][h]["execs"])
+        return
+    def create_cachedata(self):
+        data = [{}, {}]
+        return data
+codeparsercache = CodeParserCache()
+def parser_cache_init(d):
+    codeparsercache.init_cache(d)
+def parser_cache_save(d):
+    codeparsercache.save_extras(d)
+def parser_cache_savemerge(d):
+    codeparsercache.save_merge(d)
+Logger = logging.getLoggerClass()
+class BufferedLogger(Logger):
+    def __init__(self, name, level=0, target=None):
+        Logger.__init__(self, name)
+        self.setLevel(level)
+        self.buffer = []
+        self.target = target
+    def handle(self, record):
+        self.buffer.append(record)
+    def flush(self):
+        for record in self.buffer:
+            self.target.handle(record)
+        self.buffer = []
+class PythonParser():
+    getvars = ("d.getVar", "bb.data.getVar", "data.getVar", "d.appendVar", "d.prependVar")
+    containsfuncs = ("bb.utils.contains", "base_contains", "oe.utils.contains")
+    execfuncs = ("bb.build.exec_func", "bb.build.exec_task")
+    def warn(self, func, arg):
+        """Warn about calls of bitbake APIs which pass a non-literal
+        argument for the variable name, as we're not able to track such
+        a reference.
+        """
+        try:
+            funcstr = codegen.to_source(func)
+            argstr = codegen.to_source(arg)
+        except TypeError:
+            self.log.debug(2, 'Failed to convert function and argument to source form')
+        else:
+            self.log.debug(1, self.unhandled_message % (funcstr, argstr))
+    def visit_Call(self, node):
+        name = self.called_node_name(node.func)
+        if name in self.getvars or name in self.containsfuncs:
+            if isinstance(node.args[0], ast.Str):
+                self.var_references.add(node.args[0].s)
+            else:
+                self.warn(node.func, node.args[0])
+        elif name in self.execfuncs:
+            if isinstance(node.args[0], ast.Str):
+                self.var_execs.add(node.args[0].s)
+            else:
+                self.warn(node.func, node.args[0])
+        elif name and isinstance(node.func, (ast.Name, ast.Attribute)):
+            self.execs.add(name)
+    def called_node_name(self, node):
+        """Given a called node, return its original string form"""
+        components = []
+        while node:
+            if isinstance(node, ast.Attribute):
+                components.append(node.attr)
+                node = node.value
+            elif isinstance(node, ast.Name):
+                components.append(node.id)
+                return '.'.join(reversed(components))
+            else:
+                break
+    def __init__(self, name, log):
+        self.var_references = set()
+        self.var_execs = set()
+        self.execs = set()
+        self.references = set()
+        self.log = BufferedLogger('BitBake.Data.%s' % name, logging.DEBUG, log)
+        self.unhandled_message = "in call of %s, argument '%s' is not a string literal"
+        self.unhandled_message = "while parsing %s, %s" % (name, self.unhandled_message)
+    def parse_python(self, node):
+        h = hash(str(node))
+        if h in codeparsercache.pythoncache:
+            self.references = codeparsercache.pythoncache[h]["refs"]
+            self.execs = codeparsercache.pythoncache[h]["execs"]
+            return
+        if h in codeparsercache.pythoncacheextras:
+            self.references = codeparsercache.pythoncacheextras[h]["refs"]
+            self.execs = codeparsercache.pythoncacheextras[h]["execs"]
+            return
+        code = compile(check_indent(str(node)), "<string>", "exec",
+                       ast.PyCF_ONLY_AST)
+        for n in ast.walk(code):
+            if n.__class__.__name__ == "Call":
+                self.visit_Call(n)
+        self.references.update(self.var_references)
+        self.references.update(self.var_execs)
+        codeparsercache.pythoncacheextras[h] = {}
+        codeparsercache.pythoncacheextras[h]["refs"] = self.references
+        codeparsercache.pythoncacheextras[h]["execs"] = self.execs
+class ShellParser():
+    def __init__(self, name, log):
+        self.funcdefs = set()
+        self.allexecs = set()
+        self.execs = set()
+        self.log = BufferedLogger('BitBake.Data.%s' % name, logging.DEBUG, log)
+        self.unhandled_template = "unable to handle non-literal command '%s'"
+        self.unhandled_template = "while parsing %s, %s" % (name, self.unhandled_template)
+    def parse_shell(self, value):
+        """Parse the supplied shell code in a string, returning the external
+        commands it executes.
+        """
+        h = hash(str(value))
+        if h in codeparsercache.shellcache:
+            self.execs = codeparsercache.shellcache[h]["execs"]
+            return self.execs
+        if h in codeparsercache.shellcacheextras:
+            self.execs = codeparsercache.shellcacheextras[h]["execs"]
+            return self.execs
+        try:
+            tokens, _ = pyshyacc.parse(value, eof=True, debug=False)
+        except pyshlex.NeedMore:
+            raise sherrors.ShellSyntaxError("Unexpected EOF")
+        for token in tokens:
+            self.process_tokens(token)
+        self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs)
+        codeparsercache.shellcacheextras[h] = {}
+        codeparsercache.shellcacheextras[h]["execs"] = self.execs
+        return self.execs
+    def process_tokens(self, tokens):
+        """Process a supplied portion of the syntax tree as returned by
+        pyshyacc.parse.
+        """
+        def function_definition(value):
+            self.funcdefs.add(value.name)
+            return [value.body], None
+        def case_clause(value):
+            # Element 0 of each item in the case is the list of patterns, and
+            # Element 1 of each item in the case is the list of commands to be
+            # executed when that pattern matches.
+            words = chain(*[item[0] for item in value.items])
+            cmds  = chain(*[item[1] for item in value.items])
+            return cmds, words
+        def if_clause(value):
+            main = chain(value.cond, value.if_cmds)
+            rest = value.else_cmds
+            if isinstance(rest, tuple) and rest[0] == "elif":
+                return chain(main, if_clause(rest[1]))
+            else:
+                return chain(main, rest)
+        def simple_command(value):
+            return None, chain(value.words, (assign[1] for assign in value.assigns))
+        token_handlers = {
+            "and_or": lambda x: ((x.left, x.right), None),
+            "async": lambda x: ([x], None),
+            "brace_group": lambda x: (x.cmds, None),
+            "for_clause": lambda x: (x.cmds, x.items),
+            "function_definition": function_definition,
+            "if_clause": lambda x: (if_clause(x), None),
+            "pipeline": lambda x: (x.commands, None),
+            "redirect_list": lambda x: ([x.cmd], None),
+            "subshell": lambda x: (x.cmds, None),
+            "while_clause": lambda x: (chain(x.condition, x.cmds), None),
+            "until_clause": lambda x: (chain(x.condition, x.cmds), None),
+            "simple_command": simple_command,
+            "case_clause": case_clause,
+        }
+        for token in tokens:
+            name, value = token
+            try:
+                more_tokens, words = token_handlers[name](value)
+            except KeyError:
+                raise NotImplementedError("Unsupported token type " + name)
+            if more_tokens:
+                self.process_tokens(more_tokens)
+            if words:
+                self.process_words(words)
+    def process_words(self, words):
+        """Process a set of 'words' in pyshyacc parlance, which includes
+        extraction of executed commands from $() blocks, as well as grabbing
+        the command name argument.
+        """
+        words = list(words)
+        for word in list(words):
+            wtree = pyshlex.make_wordtree(word[1])
+            for part in wtree:
+                if not isinstance(part, list):
+                    continue
+                if part[0] in ('`', '$('):
+                    command = pyshlex.wordtree_as_string(part[1:-1])
+                    self.parse_shell(command)
+                    if word[0] in ("cmd_name", "cmd_word"):
+                        if word in words:
+                            words.remove(word)
+        usetoken = False
+        for word in words:
+            if word[0] in ("cmd_name", "cmd_word") or \
+               (usetoken and word[0] == "TOKEN"):
+                if "=" in word[1]:
+                    usetoken = True
+                    continue
+                cmd = word[1]
+                if cmd.startswith("$"):
+                    self.log.debug(1, self.unhandled_template % cmd)
+                elif cmd == "eval":
+                    command = " ".join(word for _, word in words[1:])
+                    self.parse_shell(command)
+                else:
+                    self.allexecs.add(cmd)
+                break

diff --git a/bitbake/lib/bb/codeparser.py b/bitbake/lib/bb/codeparser.py new file mode 100644 index 0000000000..e44e791585 --- /dev/null +++ b/bitbake/lib/bb/codeparser.py
@@ -0,0 +1,319 @@
	1	import ast
	2	import codegen
	3	import logging
	4	import os.path
	5	import bb.utils, bb.data
	6	from itertools import chain
	7	from pysh import pyshyacc, pyshlex, sherrors
	8	from bb.cache import MultiProcessCache
	9
	10
	11	logger = logging.getLogger('BitBake.CodeParser')
	12
	13	try:
	14	import cPickle as pickle
	15	except ImportError:
	16	import pickle
	17	logger.info('Importing cPickle failed. Falling back to a very slow implementation.')
	18
	19
	20	def check_indent(codestr):
	21	"""If the code is indented, add a top level piece of code to 'remove' the indentation"""
	22
	23	i = 0
	24	while codestr[i] in ["\n", "\t", " "]:
	25	i = i + 1
	26
	27	if i == 0:
	28	return codestr
	29
	30	if codestr[i-1] == "\t" or codestr[i-1] == " ":
	31	return "if 1:\n" + codestr
	32
	33	return codestr
	34
	35
	36	class CodeParserCache(MultiProcessCache):
	37	cache_file_name = "bb_codeparser.dat"
	38	CACHE_VERSION = 3
	39
	40	def __init__(self):
	41	MultiProcessCache.__init__(self)
	42	self.pythoncache = self.cachedata[0]
	43	self.shellcache = self.cachedata[1]
	44	self.pythoncacheextras = self.cachedata_extras[0]
	45	self.shellcacheextras = self.cachedata_extras[1]
	46
	47	def init_cache(self, d):
	48	MultiProcessCache.init_cache(self, d)
	49
	50	# cachedata gets re-assigned in the parent
	51	self.pythoncache = self.cachedata[0]
	52	self.shellcache = self.cachedata[1]
	53
	54	def compress_keys(self, data):
	55	# When the dicts are originally created, python calls intern() on the set keys
	56	# which significantly improves memory usage. Sadly the pickle/unpickle process
	57	# doesn't call intern() on the keys and results in the same strings being duplicated
	58	# in memory. This also means pickle will save the same string multiple times in
	59	# the cache file. By interning the data here, the cache file shrinks dramatically
	60	# meaning faster load times and the reloaded cache files also consume much less
	61	# memory. This is worth any performance hit from this loops and the use of the
	62	# intern() data storage.
	63	# Python 3.x may behave better in this area
	64	for h in data[0]:
	65	data[0][h]["refs"] = self.internSet(data[0][h]["refs"])
	66	data[0][h]["execs"] = self.internSet(data[0][h]["execs"])
	67	for h in data[1]:
	68	data[1][h]["execs"] = self.internSet(data[1][h]["execs"])
	69	return
	70
	71	def create_cachedata(self):
	72	data = [{}, {}]
	73	return data
	74
	75	codeparsercache = CodeParserCache()
	76
	77	def parser_cache_init(d):
	78	codeparsercache.init_cache(d)
	79
	80	def parser_cache_save(d):
	81	codeparsercache.save_extras(d)
	82
	83	def parser_cache_savemerge(d):
	84	codeparsercache.save_merge(d)
	85
	86	Logger = logging.getLoggerClass()
	87	class BufferedLogger(Logger):
	88	def __init__(self, name, level=0, target=None):
	89	Logger.__init__(self, name)
	90	self.setLevel(level)
	91	self.buffer = []
	92	self.target = target
	93
	94	def handle(self, record):
	95	self.buffer.append(record)
	96
	97	def flush(self):
	98	for record in self.buffer:
	99	self.target.handle(record)
	100	self.buffer = []
	101
	102	class PythonParser():
	103	getvars = ("d.getVar", "bb.data.getVar", "data.getVar", "d.appendVar", "d.prependVar")
	104	containsfuncs = ("bb.utils.contains", "base_contains", "oe.utils.contains")
	105	execfuncs = ("bb.build.exec_func", "bb.build.exec_task")
	106
	107	def warn(self, func, arg):
	108	"""Warn about calls of bitbake APIs which pass a non-literal
	109	argument for the variable name, as we're not able to track such
	110	a reference.
	111	"""
	112
	113	try:
	114	funcstr = codegen.to_source(func)
	115	argstr = codegen.to_source(arg)
	116	except TypeError:
	117	self.log.debug(2, 'Failed to convert function and argument to source form')
	118	else:
	119	self.log.debug(1, self.unhandled_message % (funcstr, argstr))
	120
	121	def visit_Call(self, node):
	122	name = self.called_node_name(node.func)
	123	if name in self.getvars or name in self.containsfuncs:
	124	if isinstance(node.args[0], ast.Str):
	125	self.var_references.add(node.args[0].s)
	126	else:
	127	self.warn(node.func, node.args[0])
	128	elif name in self.execfuncs:
	129	if isinstance(node.args[0], ast.Str):
	130	self.var_execs.add(node.args[0].s)
	131	else:
	132	self.warn(node.func, node.args[0])
	133	elif name and isinstance(node.func, (ast.Name, ast.Attribute)):
	134	self.execs.add(name)
	135
	136	def called_node_name(self, node):
	137	"""Given a called node, return its original string form"""
	138	components = []
	139	while node:
	140	if isinstance(node, ast.Attribute):
	141	components.append(node.attr)
	142	node = node.value
	143	elif isinstance(node, ast.Name):
	144	components.append(node.id)
	145	return '.'.join(reversed(components))
	146	else:
	147	break
	148
	149	def __init__(self, name, log):
	150	self.var_references = set()
	151	self.var_execs = set()
	152	self.execs = set()
	153	self.references = set()
	154	self.log = BufferedLogger('BitBake.Data.%s' % name, logging.DEBUG, log)
	155
	156	self.unhandled_message = "in call of %s, argument '%s' is not a string literal"
	157	self.unhandled_message = "while parsing %s, %s" % (name, self.unhandled_message)
	158
	159	def parse_python(self, node):
	160	h = hash(str(node))
	161
	162	if h in codeparsercache.pythoncache:
	163	self.references = codeparsercache.pythoncache[h]["refs"]
	164	self.execs = codeparsercache.pythoncache[h]["execs"]
	165	return
	166
	167	if h in codeparsercache.pythoncacheextras:
	168	self.references = codeparsercache.pythoncacheextras[h]["refs"]
	169	self.execs = codeparsercache.pythoncacheextras[h]["execs"]
	170	return
	171
	172
	173	code = compile(check_indent(str(node)), "<string>", "exec",
	174	ast.PyCF_ONLY_AST)
	175
	176	for n in ast.walk(code):
	177	if n.__class__.__name__ == "Call":
	178	self.visit_Call(n)
	179
	180	self.references.update(self.var_references)
	181	self.references.update(self.var_execs)
	182
	183	codeparsercache.pythoncacheextras[h] = {}
	184	codeparsercache.pythoncacheextras[h]["refs"] = self.references
	185	codeparsercache.pythoncacheextras[h]["execs"] = self.execs
	186
	187	class ShellParser():
	188	def __init__(self, name, log):
	189	self.funcdefs = set()
	190	self.allexecs = set()
	191	self.execs = set()
	192	self.log = BufferedLogger('BitBake.Data.%s' % name, logging.DEBUG, log)
	193	self.unhandled_template = "unable to handle non-literal command '%s'"
	194	self.unhandled_template = "while parsing %s, %s" % (name, self.unhandled_template)
	195
	196	def parse_shell(self, value):
	197	"""Parse the supplied shell code in a string, returning the external
	198	commands it executes.
	199	"""
	200
	201	h = hash(str(value))
	202
	203	if h in codeparsercache.shellcache:
	204	self.execs = codeparsercache.shellcache[h]["execs"]
	205	return self.execs
	206
	207	if h in codeparsercache.shellcacheextras:
	208	self.execs = codeparsercache.shellcacheextras[h]["execs"]
	209	return self.execs
	210
	211	try:
	212	tokens, _ = pyshyacc.parse(value, eof=True, debug=False)
	213	except pyshlex.NeedMore:
	214	raise sherrors.ShellSyntaxError("Unexpected EOF")
	215
	216	for token in tokens:
	217	self.process_tokens(token)
	218	self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs)
	219
	220	codeparsercache.shellcacheextras[h] = {}
	221	codeparsercache.shellcacheextras[h]["execs"] = self.execs
	222
	223	return self.execs
	224
	225	def process_tokens(self, tokens):
	226	"""Process a supplied portion of the syntax tree as returned by
	227	pyshyacc.parse.
	228	"""
	229
	230	def function_definition(value):
	231	self.funcdefs.add(value.name)
	232	return [value.body], None
	233
	234	def case_clause(value):
	235	# Element 0 of each item in the case is the list of patterns, and
	236	# Element 1 of each item in the case is the list of commands to be
	237	# executed when that pattern matches.
	238	words = chain(*[item[0] for item in value.items])
	239	cmds = chain(*[item[1] for item in value.items])
	240	return cmds, words
	241
	242	def if_clause(value):
	243	main = chain(value.cond, value.if_cmds)
	244	rest = value.else_cmds
	245	if isinstance(rest, tuple) and rest[0] == "elif":
	246	return chain(main, if_clause(rest[1]))
	247	else:
	248	return chain(main, rest)
	249
	250	def simple_command(value):
	251	return None, chain(value.words, (assign[1] for assign in value.assigns))
	252
	253	token_handlers = {
	254	"and_or": lambda x: ((x.left, x.right), None),
	255	"async": lambda x: ([x], None),
	256	"brace_group": lambda x: (x.cmds, None),
	257	"for_clause": lambda x: (x.cmds, x.items),
	258	"function_definition": function_definition,
	259	"if_clause": lambda x: (if_clause(x), None),
	260	"pipeline": lambda x: (x.commands, None),
	261	"redirect_list": lambda x: ([x.cmd], None),
	262	"subshell": lambda x: (x.cmds, None),
	263	"while_clause": lambda x: (chain(x.condition, x.cmds), None),
	264	"until_clause": lambda x: (chain(x.condition, x.cmds), None),
	265	"simple_command": simple_command,
	266	"case_clause": case_clause,
	267	}
	268
	269	for token in tokens:
	270	name, value = token
	271	try:
	272	more_tokens, words = token_handlers[name](value)
	273	except KeyError:
	274	raise NotImplementedError("Unsupported token type " + name)
	275
	276	if more_tokens:
	277	self.process_tokens(more_tokens)
	278
	279	if words:
	280	self.process_words(words)
	281
	282	def process_words(self, words):
	283	"""Process a set of 'words' in pyshyacc parlance, which includes
	284	extraction of executed commands from $() blocks, as well as grabbing
	285	the command name argument.
	286	"""
	287
	288	words = list(words)
	289	for word in list(words):
	290	wtree = pyshlex.make_wordtree(word[1])
	291	for part in wtree:
	292	if not isinstance(part, list):
	293	continue
	294
	295	if part[0] in ('`', '$('):
	296	command = pyshlex.wordtree_as_string(part[1:-1])
	297	self.parse_shell(command)
	298
	299	if word[0] in ("cmd_name", "cmd_word"):
	300	if word in words:
	301	words.remove(word)
	302
	303	usetoken = False
	304	for word in words:
	305	if word[0] in ("cmd_name", "cmd_word") or \
	306	(usetoken and word[0] == "TOKEN"):
	307	if "=" in word[1]:
	308	usetoken = True
	309	continue
	310
	311	cmd = word[1]
	312	if cmd.startswith("$"):
	313	self.log.debug(1, self.unhandled_template % cmd)
	314	elif cmd == "eval":
	315	command = " ".join(word for _, word in words[1:])
	316	self.parse_shell(command)
	317	else:
	318	self.allexecs.add(cmd)
	319	break