summaryrefslogtreecommitdiffstats
path: root/bitbake
diff options
context:
space:
mode:
authorRichard Purdie <rpurdie@linux.intel.com>2010-08-02 10:20:20 +0100
committerRichard Purdie <rpurdie@linux.intel.com>2010-08-31 12:41:23 +0100
commit3492bff64a809b3a2a2376b83f41e099e16d22f6 (patch)
tree5434ee1339f0fb038584a00fb14739909e570fb3 /bitbake
parent13fdd4ae5d5709332d84427ff8e60dc9ba62974f (diff)
downloadpoky-3492bff64a809b3a2a2376b83f41e099e16d22f6.tar.gz
bitbake: Add codeparser for parsing shell and python functions
This commit is derived from Chris Larson's checksum work, turned into a standalone piece of code for parsing python and shell functions. The deindent code has been replaced with code to work around indentation for speed. The original NodeVisitor in the ast was replaced with a faster class walk call. Signed-off-by: Richard Purdie <rpurdie@linux.intel.com>
Diffstat (limited to 'bitbake')
-rw-r--r--bitbake/lib/bb/codeparser.py273
-rw-r--r--bitbake/lib/bb/data_smart.py6
2 files changed, 276 insertions, 3 deletions
diff --git a/bitbake/lib/bb/codeparser.py b/bitbake/lib/bb/codeparser.py
new file mode 100644
index 0000000000..88a26c82a7
--- /dev/null
+++ b/bitbake/lib/bb/codeparser.py
@@ -0,0 +1,273 @@
1from pysh import pyshyacc, pyshlex
2from itertools import chain
3from bb import msg, utils
4import ast
5import codegen
6
7def check_indent(codestr):
8 """If the code is indented, add a top level piece of code to 'remove' the indentation"""
9
10 if codestr[0] is " " or codestr[0] is " ":
11 return "if 1:\n" + codestr
12
13 return codestr
14
15pythonparsecache = {}
16
17class PythonParser():
18 class ValueVisitor():
19 """Visitor to traverse a python abstract syntax tree and obtain
20 the variables referenced via bitbake metadata APIs, and the external
21 functions called.
22 """
23
24 getvars = ("d.getVar", "bb.data.getVar", "data.getVar")
25 expands = ("d.expand", "bb.data.expand", "data.expand")
26 execs = ("bb.build.exec_func", "bb.build.exec_task")
27
28 @classmethod
29 def _compare_name(cls, strparts, node):
30 """Given a sequence of strings representing a python name,
31 where the last component is the actual Name and the prior
32 elements are Attribute nodes, determine if the supplied node
33 matches.
34 """
35
36 if not strparts:
37 return True
38
39 current, rest = strparts[0], strparts[1:]
40 if isinstance(node, ast.Attribute):
41 if current == node.attr:
42 return cls._compare_name(rest, node.value)
43 elif isinstance(node, ast.Name):
44 if current == node.id:
45 return True
46 return False
47
48 @classmethod
49 def compare_name(cls, value, node):
50 """Convenience function for the _compare_node method, which
51 can accept a string (which is split by '.' for you), or an
52 iterable of strings, in which case it checks to see if any of
53 them match, similar to isinstance.
54 """
55
56 if isinstance(value, basestring):
57 return cls._compare_name(tuple(reversed(value.split("."))),
58 node)
59 else:
60 return any(cls.compare_name(item, node) for item in value)
61
62 def __init__(self, value):
63 self.var_references = set()
64 self.var_execs = set()
65 self.direct_func_calls = set()
66 self.var_expands = set()
67 self.value = value
68
69 @classmethod
70 def warn(cls, func, arg):
71 """Warn about calls of bitbake APIs which pass a non-literal
72 argument for the variable name, as we're not able to track such
73 a reference.
74 """
75
76 try:
77 funcstr = codegen.to_source(func)
78 argstr = codegen.to_source(arg)
79 except TypeError:
80 msg.debug(2, None, "Failed to convert function and argument to source form")
81 else:
82 msg.debug(1, None, "Warning: in call to '%s', argument '%s' is not a literal" %
83 (funcstr, argstr))
84
85 def visit_Call(self, node):
86 if self.compare_name(self.getvars, node.func):
87 if isinstance(node.args[0], ast.Str):
88 self.var_references.add(node.args[0].s)
89 else:
90 self.warn(node.func, node.args[0])
91 elif self.compare_name(self.expands, node.func):
92 if isinstance(node.args[0], ast.Str):
93 self.warn(node.func, node.args[0])
94 self.var_expands.update(node.args[0].s)
95 elif isinstance(node.args[0], ast.Call) and \
96 self.compare_name(self.getvars, node.args[0].func):
97 pass
98 else:
99 self.warn(node.func, node.args[0])
100 elif self.compare_name(self.execs, node.func):
101 if isinstance(node.args[0], ast.Str):
102 self.var_execs.add(node.args[0].s)
103 else:
104 self.warn(node.func, node.args[0])
105 elif isinstance(node.func, ast.Name):
106 self.direct_func_calls.add(node.func.id)
107 elif isinstance(node.func, ast.Attribute):
108 # We must have a qualified name. Therefore we need
109 # to walk the chain of 'Attribute' nodes to determine
110 # the qualification.
111 attr_node = node.func.value
112 identifier = node.func.attr
113 while isinstance(attr_node, ast.Attribute):
114 identifier = attr_node.attr + "." + identifier
115 attr_node = attr_node.value
116 if isinstance(attr_node, ast.Name):
117 identifier = attr_node.id + "." + identifier
118 self.direct_func_calls.add(identifier)
119
120 def __init__(self):
121 #self.funcdefs = set()
122 self.execs = set()
123 #self.external_cmds = set()
124 self.references = set()
125
126 def parse_python(self, node):
127
128 if node in pythonparsecache:
129 self.references = pythonparsecache[node].references
130 self.execs = pythonparsecache[node].execs
131 return
132
133 code = compile(check_indent(str(node)), "<string>", "exec",
134 ast.PyCF_ONLY_AST)
135
136 visitor = self.ValueVisitor(code)
137 for n in ast.walk(code):
138 if n.__class__.__name__ == "Call":
139 visitor.visit_Call(n)
140
141 self.references.update(visitor.var_references)
142 self.references.update(visitor.var_execs)
143 self.execs = visitor.direct_func_calls
144
145 pythonparsecache[node] = self
146
147
148shellparsecache = {}
149
150class ShellParser():
151 def __init__(self):
152 self.funcdefs = set()
153 self.allexecs = set()
154 self.execs = set()
155
156 def parse_shell(self, value):
157 """Parse the supplied shell code in a string, returning the external
158 commands it executes.
159 """
160
161 if value in pythonparsecache:
162 self.execs = shellparsecache[value].execs
163 return
164
165 try:
166 tokens, _ = pyshyacc.parse(value, eof=True, debug=False)
167 except pyshlex.NeedMore:
168 raise ShellSyntaxError("Unexpected EOF")
169
170 for token in tokens:
171 self.process_tokens(token)
172 self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs)
173
174 shellparsecache[value] = self
175
176 return self.execs
177
178 def process_tokens(self, tokens):
179 """Process a supplied portion of the syntax tree as returned by
180 pyshyacc.parse.
181 """
182
183 def function_definition(value):
184 self.funcdefs.add(value.name)
185 return [value.body], None
186
187 def case_clause(value):
188 # Element 0 of each item in the case is the list of patterns, and
189 # Element 1 of each item in the case is the list of commands to be
190 # executed when that pattern matches.
191 words = chain(*[item[0] for item in value.items])
192 cmds = chain(*[item[1] for item in value.items])
193 return cmds, words
194
195 def if_clause(value):
196 main = chain(value.cond, value.if_cmds)
197 rest = value.else_cmds
198 if isinstance(rest, tuple) and rest[0] == "elif":
199 return chain(main, if_clause(rest[1]))
200 else:
201 return chain(main, rest)
202
203 def simple_command(value):
204 return None, chain(value.words, (assign[1] for assign in value.assigns))
205
206 token_handlers = {
207 "and_or": lambda x: ((x.left, x.right), None),
208 "async": lambda x: ([x], None),
209 "brace_group": lambda x: (x.cmds, None),
210 "for_clause": lambda x: (x.cmds, x.items),
211 "function_definition": function_definition,
212 "if_clause": lambda x: (if_clause(x), None),
213 "pipeline": lambda x: (x.commands, None),
214 "redirect_list": lambda x: ([x.cmd], None),
215 "subshell": lambda x: (x.cmds, None),
216 "while_clause": lambda x: (chain(x.condition, x.cmds), None),
217 "until_clause": lambda x: (chain(x.condition, x.cmds), None),
218 "simple_command": simple_command,
219 "case_clause": case_clause,
220 }
221
222 for token in tokens:
223 name, value = token
224 try:
225 more_tokens, words = token_handlers[name](value)
226 except KeyError:
227 raise NotImplementedError("Unsupported token type " + name)
228
229 if more_tokens:
230 self.process_tokens(more_tokens)
231
232 if words:
233 self.process_words(words)
234
235 def process_words(self, words):
236 """Process a set of 'words' in pyshyacc parlance, which includes
237 extraction of executed commands from $() blocks, as well as grabbing
238 the command name argument.
239 """
240
241 words = list(words)
242 for word in list(words):
243 wtree = pyshlex.make_wordtree(word[1])
244 for part in wtree:
245 if not isinstance(part, list):
246 continue
247
248 if part[0] in ('`', '$('):
249 command = pyshlex.wordtree_as_string(part[1:-1])
250 self.parse_shell(command)
251
252 if word[0] in ("cmd_name", "cmd_word"):
253 if word in words:
254 words.remove(word)
255
256 usetoken = False
257 for word in words:
258 if word[0] in ("cmd_name", "cmd_word") or \
259 (usetoken and word[0] == "TOKEN"):
260 if "=" in word[1]:
261 usetoken = True
262 continue
263
264 cmd = word[1]
265 if cmd.startswith("$"):
266 msg.debug(1, None, "Warning: execution of non-literal command '%s'" % cmd)
267 elif cmd == "eval":
268 command = " ".join(word for _, word in words[1:])
269 self.parse_shell(command)
270 else:
271 self.allexecs.add(cmd)
272 break
273
diff --git a/bitbake/lib/bb/data_smart.py b/bitbake/lib/bb/data_smart.py
index 1ed04d50c3..b9d9476fd8 100644
--- a/bitbake/lib/bb/data_smart.py
+++ b/bitbake/lib/bb/data_smart.py
@@ -46,7 +46,7 @@ class VariableParse:
46 self.value = val 46 self.value = val
47 47
48 self.references = set() 48 self.references = set()
49 self.funcrefs = set() 49 self.execs = set()
50 50
51 def var_sub(self, match): 51 def var_sub(self, match):
52 key = match.group()[2:-1] 52 key = match.group()[2:-1]
@@ -64,10 +64,10 @@ class VariableParse:
64 code = match.group()[3:-1] 64 code = match.group()[3:-1]
65 codeobj = compile(code.strip(), self.varname or "<expansion>", "eval") 65 codeobj = compile(code.strip(), self.varname or "<expansion>", "eval")
66 66
67 parser = bb.rptest.PythonParser() 67 parser = bb.codeparser.PythonParser()
68 parser.parse_python(code) 68 parser.parse_python(code)
69 self.references |= parser.references 69 self.references |= parser.references
70 self.funcrefs |= parser.execs 70 self.execs |= parser.execs
71 71
72 value = utils.better_eval(codeobj, {"d": self.d}) 72 value = utils.better_eval(codeobj, {"d": self.d})
73 return str(value) 73 return str(value)