bitbake: Add codeparser for parsing shell and python functions

This commit is derived from Chris Larson's checksum work, turned into a standalone piece of code for parsing python and shell functions. The deindent code has been replaced with code to work around indentation for speed. The original NodeVisitor in the ast was replaced with a faster class walk call. Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
author: Richard Purdie <rpurdie@linux.intel.com> 2010-08-02 10:20:20 +0100
committer: Richard Purdie <rpurdie@linux.intel.com> 2010-08-31 12:41:23 +0100
commit: 3492bff64a809b3a2a2376b83f41e099e16d22f6 (patch)
tree: 5434ee1339f0fb038584a00fb14739909e570fb3 /bitbake/lib
parent: 13fdd4ae5d5709332d84427ff8e60dc9ba62974f (diff)
download: poky-3492bff64a809b3a2a2376b83f41e099e16d22f6.tar.gz
2 files changed, 276 insertions, 3 deletions
diff --git a/bitbake/lib/bb/codeparser.py b/bitbake/lib/bb/codeparser.py
new file mode 100644
index 0000000000..88a26c82a7
--- /dev/null
+++ b/bitbake/lib/bb/codeparser.py
@@ -0,0 +1,273 @@
+from pysh import pyshyacc, pyshlex
+from itertools import chain
+from bb import msg, utils
+import ast
+import codegen
+def check_indent(codestr):
+    """If the code is indented, add a top level piece of code to 'remove' the indentation"""
+    if codestr[0] is "  " or codestr[0] is " ":
+        return "if 1:\n" + codestr        
+    return codestr
+pythonparsecache = {}
+class PythonParser():
+    class ValueVisitor():
+        """Visitor to traverse a python abstract syntax tree and obtain
+        the variables referenced via bitbake metadata APIs, and the external
+        functions called.
+        """
+        getvars = ("d.getVar", "bb.data.getVar", "data.getVar")
+        expands = ("d.expand", "bb.data.expand", "data.expand")
+        execs = ("bb.build.exec_func", "bb.build.exec_task")
+        @classmethod
+        def _compare_name(cls, strparts, node):
+            """Given a sequence of strings representing a python name,
+            where the last component is the actual Name and the prior
+            elements are Attribute nodes, determine if the supplied node
+            matches.
+            """
+            if not strparts:
+                return True
+            current, rest = strparts[0], strparts[1:]
+            if isinstance(node, ast.Attribute):
+                if current == node.attr:
+                    return cls._compare_name(rest, node.value)
+            elif isinstance(node, ast.Name):
+                if current == node.id:
+                    return True
+            return False
+        @classmethod
+        def compare_name(cls, value, node):
+            """Convenience function for the _compare_node method, which
+            can accept a string (which is split by '.' for you), or an
+            iterable of strings, in which case it checks to see if any of
+            them match, similar to isinstance.
+            """
+            if isinstance(value, basestring):
+                return cls._compare_name(tuple(reversed(value.split("."))),
+                                         node)
+            else:
+                return any(cls.compare_name(item, node) for item in value)
+        def __init__(self, value):
+            self.var_references = set()
+            self.var_execs = set()
+            self.direct_func_calls = set()
+            self.var_expands = set()
+            self.value = value
+        @classmethod
+        def warn(cls, func, arg):
+            """Warn about calls of bitbake APIs which pass a non-literal
+            argument for the variable name, as we're not able to track such
+            a reference.
+            """
+            try:
+                funcstr = codegen.to_source(func)
+                argstr = codegen.to_source(arg)
+            except TypeError:
+                msg.debug(2, None, "Failed to convert function and argument to source form")
+            else:
+                msg.debug(1, None, "Warning: in call to '%s', argument '%s' is not a literal" %
+                                     (funcstr, argstr))
+        def visit_Call(self, node):
+            if self.compare_name(self.getvars, node.func):
+                if isinstance(node.args[0], ast.Str):
+                    self.var_references.add(node.args[0].s)
+                else:
+                    self.warn(node.func, node.args[0])
+            elif self.compare_name(self.expands, node.func):
+                if isinstance(node.args[0], ast.Str):
+                    self.warn(node.func, node.args[0])
+                    self.var_expands.update(node.args[0].s)
+                elif isinstance(node.args[0], ast.Call) and \
+                     self.compare_name(self.getvars, node.args[0].func):
+                    pass
+                else:
+                    self.warn(node.func, node.args[0])
+            elif self.compare_name(self.execs, node.func):
+                if isinstance(node.args[0], ast.Str):
+                    self.var_execs.add(node.args[0].s)
+                else:
+                    self.warn(node.func, node.args[0])
+            elif isinstance(node.func, ast.Name):
+                self.direct_func_calls.add(node.func.id)
+            elif isinstance(node.func, ast.Attribute):
+                # We must have a qualified name.  Therefore we need
+                # to walk the chain of 'Attribute' nodes to determine
+                # the qualification.
+                attr_node = node.func.value
+                identifier = node.func.attr
+                while isinstance(attr_node, ast.Attribute):
+                    identifier = attr_node.attr + "." + identifier
+                    attr_node = attr_node.value
+                if isinstance(attr_node, ast.Name):
+                    identifier = attr_node.id + "." + identifier
+                self.direct_func_calls.add(identifier)
+    def __init__(self):
+        #self.funcdefs = set()
+        self.execs = set()
+        #self.external_cmds = set()
+        self.references = set()
+    def parse_python(self, node):
+        if node in pythonparsecache:
+            self.references = pythonparsecache[node].references
+            self.execs = pythonparsecache[node].execs
+            return
+        code = compile(check_indent(str(node)), "<string>", "exec", 
+                       ast.PyCF_ONLY_AST)
+        visitor = self.ValueVisitor(code)
+        for n in ast.walk(code):
+            if n.__class__.__name__ == "Call":
+                visitor.visit_Call(n)
+        self.references.update(visitor.var_references)
+        self.references.update(visitor.var_execs)
+        self.execs = visitor.direct_func_calls
+        pythonparsecache[node] = self
+shellparsecache = {}
+class ShellParser():
+    def __init__(self):
+        self.funcdefs = set()
+        self.allexecs = set()
+        self.execs = set()
+    def parse_shell(self, value):
+        """Parse the supplied shell code in a string, returning the external
+        commands it executes.
+        """
+        if value in pythonparsecache:
+            self.execs = shellparsecache[value].execs
+            return
+        try:
+            tokens, _ = pyshyacc.parse(value, eof=True, debug=False)
+        except pyshlex.NeedMore:
+            raise ShellSyntaxError("Unexpected EOF")
+        for token in tokens:
+            self.process_tokens(token)
+        self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs)
+        shellparsecache[value] = self
+        return self.execs
+    def process_tokens(self, tokens):
+        """Process a supplied portion of the syntax tree as returned by
+        pyshyacc.parse.
+        """
+        def function_definition(value):
+            self.funcdefs.add(value.name)
+            return [value.body], None
+        def case_clause(value):
+            # Element 0 of each item in the case is the list of patterns, and
+            # Element 1 of each item in the case is the list of commands to be
+            # executed when that pattern matches.
+            words = chain(*[item[0] for item in value.items])
+            cmds  = chain(*[item[1] for item in value.items])
+            return cmds, words
+        def if_clause(value):
+            main = chain(value.cond, value.if_cmds)
+            rest = value.else_cmds
+            if isinstance(rest, tuple) and rest[0] == "elif":
+                return chain(main, if_clause(rest[1]))
+            else:
+                return chain(main, rest)
+        def simple_command(value):
+            return None, chain(value.words, (assign[1] for assign in value.assigns))
+        token_handlers = {
+            "and_or": lambda x: ((x.left, x.right), None),
+            "async": lambda x: ([x], None),
+            "brace_group": lambda x: (x.cmds, None),
+            "for_clause": lambda x: (x.cmds, x.items),
+            "function_definition": function_definition,
+            "if_clause": lambda x: (if_clause(x), None),
+            "pipeline": lambda x: (x.commands, None),
+            "redirect_list": lambda x: ([x.cmd], None),
+            "subshell": lambda x: (x.cmds, None),
+            "while_clause": lambda x: (chain(x.condition, x.cmds), None),
+            "until_clause": lambda x: (chain(x.condition, x.cmds), None),
+            "simple_command": simple_command,
+            "case_clause": case_clause,
+        }
+        for token in tokens:
+            name, value = token
+            try:
+                more_tokens, words = token_handlers[name](value)
+            except KeyError:
+                raise NotImplementedError("Unsupported token type " + name)
+            if more_tokens:
+                self.process_tokens(more_tokens)
+            if words:
+                self.process_words(words)
+    def process_words(self, words):
+        """Process a set of 'words' in pyshyacc parlance, which includes
+        extraction of executed commands from $() blocks, as well as grabbing
+        the command name argument.
+        """
+        words = list(words)
+        for word in list(words):
+            wtree = pyshlex.make_wordtree(word[1])
+            for part in wtree:
+                if not isinstance(part, list):
+                    continue
+                if part[0] in ('`', '$('):
+                    command = pyshlex.wordtree_as_string(part[1:-1])
+                    self.parse_shell(command)
+                    if word[0] in ("cmd_name", "cmd_word"):
+                        if word in words:
+                            words.remove(word)
+        usetoken = False
+        for word in words:
+            if word[0] in ("cmd_name", "cmd_word") or \
+               (usetoken and word[0] == "TOKEN"):
+                if "=" in word[1]:
+                    usetoken = True
+                    continue
+                cmd = word[1]
+                if cmd.startswith("$"):
+                    msg.debug(1, None, "Warning: execution of non-literal command '%s'" % cmd)
+                elif cmd == "eval":
+                    command = " ".join(word for _, word in words[1:])
+                    self.parse_shell(command)
+                else:
+                    self.allexecs.add(cmd)
+                break
diff --git a/bitbake/lib/bb/data_smart.py b/bitbake/lib/bb/data_smart.py
index 1ed04d50c3..b9d9476fd8 100644
--- a/bitbake/lib/bb/data_smart.py
+++ b/bitbake/lib/bb/data_smart.py
@@ -46,7 +46,7 @@ class VariableParse:
        self.value = val
        self.references = set()
-        self.funcrefs = set()
+        self.execs = set()
    def var_sub(self, match):
            key = match.group()[2:-1]
@@ -64,10 +64,10 @@ class VariableParse:
            code = match.group()[3:-1]
            codeobj = compile(code.strip(), self.varname or "<expansion>", "eval")
-            parser = bb.rptest.PythonParser()
+            parser = bb.codeparser.PythonParser()
            parser.parse_python(code)
            self.references |= parser.references
-            self.funcrefs |= parser.execs
+            self.execs |= parser.execs
            value = utils.better_eval(codeobj, {"d": self.d})
            return str(value)
author	Richard Purdie <rpurdie@linux.intel.com>	2010-08-02 10:20:20 +0100
committer	Richard Purdie <rpurdie@linux.intel.com>	2010-08-31 12:41:23 +0100
commit	3492bff64a809b3a2a2376b83f41e099e16d22f6 (patch)
tree	5434ee1339f0fb038584a00fb14739909e570fb3 /bitbake/lib
parent	13fdd4ae5d5709332d84427ff8e60dc9ba62974f (diff)
download	poky-3492bff64a809b3a2a2376b83f41e099e16d22f6.tar.gz

diff --git a/bitbake/lib/bb/codeparser.py b/bitbake/lib/bb/codeparser.py new file mode 100644 index 0000000000..88a26c82a7 --- /dev/null +++ b/bitbake/lib/bb/codeparser.py
@@ -0,0 +1,273 @@
		1	from pysh import pyshyacc, pyshlex
		2	from itertools import chain
		3	from bb import msg, utils
		4	import ast
		5	import codegen
		6
		7	def check_indent(codestr):
		8	"""If the code is indented, add a top level piece of code to 'remove' the indentation"""
		9
		10	if codestr[0] is " " or codestr[0] is " ":
		11	return "if 1:\n" + codestr
		12
		13	return codestr
		14
		15	pythonparsecache = {}
		16
		17	class PythonParser():
		18	class ValueVisitor():
		19	"""Visitor to traverse a python abstract syntax tree and obtain
		20	the variables referenced via bitbake metadata APIs, and the external
		21	functions called.
		22	"""
		23
		24	getvars = ("d.getVar", "bb.data.getVar", "data.getVar")
		25	expands = ("d.expand", "bb.data.expand", "data.expand")
		26	execs = ("bb.build.exec_func", "bb.build.exec_task")
		27
		28	@classmethod
		29	def _compare_name(cls, strparts, node):
		30	"""Given a sequence of strings representing a python name,
		31	where the last component is the actual Name and the prior
		32	elements are Attribute nodes, determine if the supplied node
		33	matches.
		34	"""
		35
		36	if not strparts:
		37	return True
		38
		39	current, rest = strparts[0], strparts[1:]
		40	if isinstance(node, ast.Attribute):
		41	if current == node.attr:
		42	return cls._compare_name(rest, node.value)
		43	elif isinstance(node, ast.Name):
		44	if current == node.id:
		45	return True
		46	return False
		47
		48	@classmethod
		49	def compare_name(cls, value, node):
		50	"""Convenience function for the _compare_node method, which
		51	can accept a string (which is split by '.' for you), or an
		52	iterable of strings, in which case it checks to see if any of
		53	them match, similar to isinstance.
		54	"""
		55
		56	if isinstance(value, basestring):
		57	return cls._compare_name(tuple(reversed(value.split("."))),
		58	node)
		59	else:
		60	return any(cls.compare_name(item, node) for item in value)
		61
		62	def __init__(self, value):
		63	self.var_references = set()
		64	self.var_execs = set()
		65	self.direct_func_calls = set()
		66	self.var_expands = set()
		67	self.value = value
		68
		69	@classmethod
		70	def warn(cls, func, arg):
		71	"""Warn about calls of bitbake APIs which pass a non-literal
		72	argument for the variable name, as we're not able to track such
		73	a reference.
		74	"""
		75
		76	try:
		77	funcstr = codegen.to_source(func)
		78	argstr = codegen.to_source(arg)
		79	except TypeError:
		80	msg.debug(2, None, "Failed to convert function and argument to source form")
		81	else:
		82	msg.debug(1, None, "Warning: in call to '%s', argument '%s' is not a literal" %
		83	(funcstr, argstr))
		84
		85	def visit_Call(self, node):
		86	if self.compare_name(self.getvars, node.func):
		87	if isinstance(node.args[0], ast.Str):
		88	self.var_references.add(node.args[0].s)
		89	else:
		90	self.warn(node.func, node.args[0])
		91	elif self.compare_name(self.expands, node.func):
		92	if isinstance(node.args[0], ast.Str):
		93	self.warn(node.func, node.args[0])
		94	self.var_expands.update(node.args[0].s)
		95	elif isinstance(node.args[0], ast.Call) and \
		96	self.compare_name(self.getvars, node.args[0].func):
		97	pass
		98	else:
		99	self.warn(node.func, node.args[0])
		100	elif self.compare_name(self.execs, node.func):
		101	if isinstance(node.args[0], ast.Str):
		102	self.var_execs.add(node.args[0].s)
		103	else:
		104	self.warn(node.func, node.args[0])
		105	elif isinstance(node.func, ast.Name):
		106	self.direct_func_calls.add(node.func.id)
		107	elif isinstance(node.func, ast.Attribute):
		108	# We must have a qualified name. Therefore we need
		109	# to walk the chain of 'Attribute' nodes to determine
		110	# the qualification.
		111	attr_node = node.func.value
		112	identifier = node.func.attr
		113	while isinstance(attr_node, ast.Attribute):
		114	identifier = attr_node.attr + "." + identifier
		115	attr_node = attr_node.value
		116	if isinstance(attr_node, ast.Name):
		117	identifier = attr_node.id + "." + identifier
		118	self.direct_func_calls.add(identifier)
		119
		120	def __init__(self):
		121	#self.funcdefs = set()
		122	self.execs = set()
		123	#self.external_cmds = set()
		124	self.references = set()
		125
		126	def parse_python(self, node):
		127
		128	if node in pythonparsecache:
		129	self.references = pythonparsecache[node].references
		130	self.execs = pythonparsecache[node].execs
		131	return
		132
		133	code = compile(check_indent(str(node)), "<string>", "exec",
		134	ast.PyCF_ONLY_AST)
		135
		136	visitor = self.ValueVisitor(code)
		137	for n in ast.walk(code):
		138	if n.__class__.__name__ == "Call":
		139	visitor.visit_Call(n)
		140
		141	self.references.update(visitor.var_references)
		142	self.references.update(visitor.var_execs)
		143	self.execs = visitor.direct_func_calls
		144
		145	pythonparsecache[node] = self
		146
		147
		148	shellparsecache = {}
		149
		150	class ShellParser():
		151	def __init__(self):
		152	self.funcdefs = set()
		153	self.allexecs = set()
		154	self.execs = set()
		155
		156	def parse_shell(self, value):
		157	"""Parse the supplied shell code in a string, returning the external
		158	commands it executes.
		159	"""
		160
		161	if value in pythonparsecache:
		162	self.execs = shellparsecache[value].execs
		163	return
		164
		165	try:
		166	tokens, _ = pyshyacc.parse(value, eof=True, debug=False)
		167	except pyshlex.NeedMore:
		168	raise ShellSyntaxError("Unexpected EOF")
		169
		170	for token in tokens:
		171	self.process_tokens(token)
		172	self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs)
		173
		174	shellparsecache[value] = self
		175
		176	return self.execs
		177
		178	def process_tokens(self, tokens):
		179	"""Process a supplied portion of the syntax tree as returned by
		180	pyshyacc.parse.
		181	"""
		182
		183	def function_definition(value):
		184	self.funcdefs.add(value.name)
		185	return [value.body], None
		186
		187	def case_clause(value):
		188	# Element 0 of each item in the case is the list of patterns, and
		189	# Element 1 of each item in the case is the list of commands to be
		190	# executed when that pattern matches.
		191	words = chain(*[item[0] for item in value.items])
		192	cmds = chain(*[item[1] for item in value.items])
		193	return cmds, words
		194
		195	def if_clause(value):
		196	main = chain(value.cond, value.if_cmds)
		197	rest = value.else_cmds
		198	if isinstance(rest, tuple) and rest[0] == "elif":
		199	return chain(main, if_clause(rest[1]))
		200	else:
		201	return chain(main, rest)
		202
		203	def simple_command(value):
		204	return None, chain(value.words, (assign[1] for assign in value.assigns))
		205
		206	token_handlers = {
		207	"and_or": lambda x: ((x.left, x.right), None),
		208	"async": lambda x: ([x], None),
		209	"brace_group": lambda x: (x.cmds, None),
		210	"for_clause": lambda x: (x.cmds, x.items),
		211	"function_definition": function_definition,
		212	"if_clause": lambda x: (if_clause(x), None),
		213	"pipeline": lambda x: (x.commands, None),
		214	"redirect_list": lambda x: ([x.cmd], None),
		215	"subshell": lambda x: (x.cmds, None),
		216	"while_clause": lambda x: (chain(x.condition, x.cmds), None),
		217	"until_clause": lambda x: (chain(x.condition, x.cmds), None),
		218	"simple_command": simple_command,
		219	"case_clause": case_clause,
		220	}
		221
		222	for token in tokens:
		223	name, value = token
		224	try:
		225	more_tokens, words = token_handlers[name](value)
		226	except KeyError:
		227	raise NotImplementedError("Unsupported token type " + name)
		228
		229	if more_tokens:
		230	self.process_tokens(more_tokens)
		231
		232	if words:
		233	self.process_words(words)
		234
		235	def process_words(self, words):
		236	"""Process a set of 'words' in pyshyacc parlance, which includes
		237	extraction of executed commands from $() blocks, as well as grabbing
		238	the command name argument.
		239	"""
		240
		241	words = list(words)
		242	for word in list(words):
		243	wtree = pyshlex.make_wordtree(word[1])
		244	for part in wtree:
		245	if not isinstance(part, list):
		246	continue
		247
		248	if part[0] in ('`', '$('):
		249	command = pyshlex.wordtree_as_string(part[1:-1])
		250	self.parse_shell(command)
		251
		252	if word[0] in ("cmd_name", "cmd_word"):
		253	if word in words:
		254	words.remove(word)
		255
		256	usetoken = False
		257	for word in words:
		258	if word[0] in ("cmd_name", "cmd_word") or \
		259	(usetoken and word[0] == "TOKEN"):
		260	if "=" in word[1]:
		261	usetoken = True
		262	continue
		263
		264	cmd = word[1]
		265	if cmd.startswith("$"):
		266	msg.debug(1, None, "Warning: execution of non-literal command '%s'" % cmd)
		267	elif cmd == "eval":
		268	command = " ".join(word for _, word in words[1:])
		269	self.parse_shell(command)
		270	else:
		271	self.allexecs.add(cmd)
		272	break
		273


diff --git a/bitbake/lib/bb/data_smart.py b/bitbake/lib/bb/data_smart.py index 1ed04d50c3..b9d9476fd8 100644 --- a/bitbake/lib/bb/data_smart.py +++ b/bitbake/lib/bb/data_smart.py
@@ -46,7 +46,7 @@ class VariableParse:
46	self.value = val	46	self.value = val
47		47
48	self.references = set()	48	self.references = set()
49	self.funcrefs = set()	49	self.execs = set()
50		50
51	def var_sub(self, match):	51	def var_sub(self, match):
52	key = match.group()[2:-1]	52	key = match.group()[2:-1]
@@ -64,10 +64,10 @@ class VariableParse:
64	code = match.group()[3:-1]	64	code = match.group()[3:-1]
65	codeobj = compile(code.strip(), self.varname or "<expansion>", "eval")	65	codeobj = compile(code.strip(), self.varname or "<expansion>", "eval")
66		66
67	parser = bb.rptest.PythonParser()	67	parser = bb.codeparser.PythonParser()
68	parser.parse_python(code)	68	parser.parse_python(code)
69	self.references \|= parser.references	69	self.references \|= parser.references
70	self.funcrefs \|= parser.execs	70	self.execs \|= parser.execs
71		71
72	value = utils.better_eval(codeobj, {"d": self.d})	72	value = utils.better_eval(codeobj, {"d": self.d})
73	return str(value)	73	return str(value)