summaryrefslogtreecommitdiffstats
path: root/bitbake/lib/bb/codeparser.py
diff options
context:
space:
mode:
Diffstat (limited to 'bitbake/lib/bb/codeparser.py')
-rw-r--r--bitbake/lib/bb/codeparser.py328
1 files changed, 328 insertions, 0 deletions
diff --git a/bitbake/lib/bb/codeparser.py b/bitbake/lib/bb/codeparser.py
new file mode 100644
index 0000000000..a50b9f268a
--- /dev/null
+++ b/bitbake/lib/bb/codeparser.py
@@ -0,0 +1,328 @@
1import ast
2import codegen
3import logging
4import os.path
5import bb.utils, bb.data
6from itertools import chain
7from pysh import pyshyacc, pyshlex, sherrors
8from bb.cache import MultiProcessCache
9
10
11logger = logging.getLogger('BitBake.CodeParser')
12
13try:
14 import cPickle as pickle
15except ImportError:
16 import pickle
17 logger.info('Importing cPickle failed. Falling back to a very slow implementation.')
18
19
20def check_indent(codestr):
21 """If the code is indented, add a top level piece of code to 'remove' the indentation"""
22
23 i = 0
24 while codestr[i] in ["\n", "\t", " "]:
25 i = i + 1
26
27 if i == 0:
28 return codestr
29
30 if codestr[i-1] == "\t" or codestr[i-1] == " ":
31 return "if 1:\n" + codestr
32
33 return codestr
34
35
36class CodeParserCache(MultiProcessCache):
37 cache_file_name = "bb_codeparser.dat"
38 CACHE_VERSION = 4
39
40 def __init__(self):
41 MultiProcessCache.__init__(self)
42 self.pythoncache = self.cachedata[0]
43 self.shellcache = self.cachedata[1]
44 self.pythoncacheextras = self.cachedata_extras[0]
45 self.shellcacheextras = self.cachedata_extras[1]
46
47 def init_cache(self, d):
48 MultiProcessCache.init_cache(self, d)
49
50 # cachedata gets re-assigned in the parent
51 self.pythoncache = self.cachedata[0]
52 self.shellcache = self.cachedata[1]
53
54 def compress_keys(self, data):
55 # When the dicts are originally created, python calls intern() on the set keys
56 # which significantly improves memory usage. Sadly the pickle/unpickle process
57 # doesn't call intern() on the keys and results in the same strings being duplicated
58 # in memory. This also means pickle will save the same string multiple times in
59 # the cache file. By interning the data here, the cache file shrinks dramatically
60 # meaning faster load times and the reloaded cache files also consume much less
61 # memory. This is worth any performance hit from this loops and the use of the
62 # intern() data storage.
63 # Python 3.x may behave better in this area
64 for h in data[0]:
65 data[0][h]["refs"] = self.internSet(data[0][h]["refs"])
66 data[0][h]["execs"] = self.internSet(data[0][h]["execs"])
67 for k in data[0][h]["contains"]:
68 data[0][h]["contains"][k] = self.internSet(data[0][h]["contains"][k])
69 for h in data[1]:
70 data[1][h]["execs"] = self.internSet(data[1][h]["execs"])
71 return
72
73 def create_cachedata(self):
74 data = [{}, {}]
75 return data
76
77codeparsercache = CodeParserCache()
78
79def parser_cache_init(d):
80 codeparsercache.init_cache(d)
81
82def parser_cache_save(d):
83 codeparsercache.save_extras(d)
84
85def parser_cache_savemerge(d):
86 codeparsercache.save_merge(d)
87
88Logger = logging.getLoggerClass()
89class BufferedLogger(Logger):
90 def __init__(self, name, level=0, target=None):
91 Logger.__init__(self, name)
92 self.setLevel(level)
93 self.buffer = []
94 self.target = target
95
96 def handle(self, record):
97 self.buffer.append(record)
98
99 def flush(self):
100 for record in self.buffer:
101 self.target.handle(record)
102 self.buffer = []
103
104class PythonParser():
105 getvars = ("d.getVar", "bb.data.getVar", "data.getVar", "d.appendVar", "d.prependVar")
106 containsfuncs = ("bb.utils.contains", "base_contains", "oe.utils.contains")
107 execfuncs = ("bb.build.exec_func", "bb.build.exec_task")
108
109 def warn(self, func, arg):
110 """Warn about calls of bitbake APIs which pass a non-literal
111 argument for the variable name, as we're not able to track such
112 a reference.
113 """
114
115 try:
116 funcstr = codegen.to_source(func)
117 argstr = codegen.to_source(arg)
118 except TypeError:
119 self.log.debug(2, 'Failed to convert function and argument to source form')
120 else:
121 self.log.debug(1, self.unhandled_message % (funcstr, argstr))
122
123 def visit_Call(self, node):
124 name = self.called_node_name(node.func)
125 if name in self.getvars or name in self.containsfuncs:
126 if isinstance(node.args[0], ast.Str):
127 varname = node.args[0].s
128 if name in self.containsfuncs and isinstance(node.args[1], ast.Str):
129 if varname not in self.contains:
130 self.contains[varname] = set()
131 self.contains[varname].add(node.args[1].s)
132 else:
133 self.references.add(node.args[0].s)
134 else:
135 self.warn(node.func, node.args[0])
136 elif name in self.execfuncs:
137 if isinstance(node.args[0], ast.Str):
138 self.var_execs.add(node.args[0].s)
139 else:
140 self.warn(node.func, node.args[0])
141 elif name and isinstance(node.func, (ast.Name, ast.Attribute)):
142 self.execs.add(name)
143
144 def called_node_name(self, node):
145 """Given a called node, return its original string form"""
146 components = []
147 while node:
148 if isinstance(node, ast.Attribute):
149 components.append(node.attr)
150 node = node.value
151 elif isinstance(node, ast.Name):
152 components.append(node.id)
153 return '.'.join(reversed(components))
154 else:
155 break
156
157 def __init__(self, name, log):
158 self.var_execs = set()
159 self.contains = {}
160 self.execs = set()
161 self.references = set()
162 self.log = BufferedLogger('BitBake.Data.PythonParser', logging.DEBUG, log)
163
164 self.unhandled_message = "in call of %s, argument '%s' is not a string literal"
165 self.unhandled_message = "while parsing %s, %s" % (name, self.unhandled_message)
166
167 def parse_python(self, node):
168 h = hash(str(node))
169
170 if h in codeparsercache.pythoncache:
171 self.references = codeparsercache.pythoncache[h]["refs"]
172 self.execs = codeparsercache.pythoncache[h]["execs"]
173 self.contains = codeparsercache.pythoncache[h]["contains"]
174 return
175
176 if h in codeparsercache.pythoncacheextras:
177 self.references = codeparsercache.pythoncacheextras[h]["refs"]
178 self.execs = codeparsercache.pythoncacheextras[h]["execs"]
179 self.contains = codeparsercache.pythoncacheextras[h]["contains"]
180 return
181
182 code = compile(check_indent(str(node)), "<string>", "exec",
183 ast.PyCF_ONLY_AST)
184
185 for n in ast.walk(code):
186 if n.__class__.__name__ == "Call":
187 self.visit_Call(n)
188
189 self.execs.update(self.var_execs)
190
191 codeparsercache.pythoncacheextras[h] = {}
192 codeparsercache.pythoncacheextras[h]["refs"] = self.references
193 codeparsercache.pythoncacheextras[h]["execs"] = self.execs
194 codeparsercache.pythoncacheextras[h]["contains"] = self.contains
195
196class ShellParser():
197 def __init__(self, name, log):
198 self.funcdefs = set()
199 self.allexecs = set()
200 self.execs = set()
201 self.log = BufferedLogger('BitBake.Data.%s' % name, logging.DEBUG, log)
202 self.unhandled_template = "unable to handle non-literal command '%s'"
203 self.unhandled_template = "while parsing %s, %s" % (name, self.unhandled_template)
204
205 def parse_shell(self, value):
206 """Parse the supplied shell code in a string, returning the external
207 commands it executes.
208 """
209
210 h = hash(str(value))
211
212 if h in codeparsercache.shellcache:
213 self.execs = codeparsercache.shellcache[h]["execs"]
214 return self.execs
215
216 if h in codeparsercache.shellcacheextras:
217 self.execs = codeparsercache.shellcacheextras[h]["execs"]
218 return self.execs
219
220 try:
221 tokens, _ = pyshyacc.parse(value, eof=True, debug=False)
222 except pyshlex.NeedMore:
223 raise sherrors.ShellSyntaxError("Unexpected EOF")
224
225 for token in tokens:
226 self.process_tokens(token)
227 self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs)
228
229 codeparsercache.shellcacheextras[h] = {}
230 codeparsercache.shellcacheextras[h]["execs"] = self.execs
231
232 return self.execs
233
234 def process_tokens(self, tokens):
235 """Process a supplied portion of the syntax tree as returned by
236 pyshyacc.parse.
237 """
238
239 def function_definition(value):
240 self.funcdefs.add(value.name)
241 return [value.body], None
242
243 def case_clause(value):
244 # Element 0 of each item in the case is the list of patterns, and
245 # Element 1 of each item in the case is the list of commands to be
246 # executed when that pattern matches.
247 words = chain(*[item[0] for item in value.items])
248 cmds = chain(*[item[1] for item in value.items])
249 return cmds, words
250
251 def if_clause(value):
252 main = chain(value.cond, value.if_cmds)
253 rest = value.else_cmds
254 if isinstance(rest, tuple) and rest[0] == "elif":
255 return chain(main, if_clause(rest[1]))
256 else:
257 return chain(main, rest)
258
259 def simple_command(value):
260 return None, chain(value.words, (assign[1] for assign in value.assigns))
261
262 token_handlers = {
263 "and_or": lambda x: ((x.left, x.right), None),
264 "async": lambda x: ([x], None),
265 "brace_group": lambda x: (x.cmds, None),
266 "for_clause": lambda x: (x.cmds, x.items),
267 "function_definition": function_definition,
268 "if_clause": lambda x: (if_clause(x), None),
269 "pipeline": lambda x: (x.commands, None),
270 "redirect_list": lambda x: ([x.cmd], None),
271 "subshell": lambda x: (x.cmds, None),
272 "while_clause": lambda x: (chain(x.condition, x.cmds), None),
273 "until_clause": lambda x: (chain(x.condition, x.cmds), None),
274 "simple_command": simple_command,
275 "case_clause": case_clause,
276 }
277
278 for token in tokens:
279 name, value = token
280 try:
281 more_tokens, words = token_handlers[name](value)
282 except KeyError:
283 raise NotImplementedError("Unsupported token type " + name)
284
285 if more_tokens:
286 self.process_tokens(more_tokens)
287
288 if words:
289 self.process_words(words)
290
291 def process_words(self, words):
292 """Process a set of 'words' in pyshyacc parlance, which includes
293 extraction of executed commands from $() blocks, as well as grabbing
294 the command name argument.
295 """
296
297 words = list(words)
298 for word in list(words):
299 wtree = pyshlex.make_wordtree(word[1])
300 for part in wtree:
301 if not isinstance(part, list):
302 continue
303
304 if part[0] in ('`', '$('):
305 command = pyshlex.wordtree_as_string(part[1:-1])
306 self.parse_shell(command)
307
308 if word[0] in ("cmd_name", "cmd_word"):
309 if word in words:
310 words.remove(word)
311
312 usetoken = False
313 for word in words:
314 if word[0] in ("cmd_name", "cmd_word") or \
315 (usetoken and word[0] == "TOKEN"):
316 if "=" in word[1]:
317 usetoken = True
318 continue
319
320 cmd = word[1]
321 if cmd.startswith("$"):
322 self.log.debug(1, self.unhandled_template % cmd)
323 elif cmd == "eval":
324 command = " ".join(word for _, word in words[1:])
325 self.parse_shell(command)
326 else:
327 self.allexecs.add(cmd)
328 break