summaryrefslogtreecommitdiffstats
path: root/bitbake/lib/bb/codeparser.py
diff options
context:
space:
mode:
Diffstat (limited to 'bitbake/lib/bb/codeparser.py')
-rw-r--r--bitbake/lib/bb/codeparser.py406
1 files changed, 406 insertions, 0 deletions
diff --git a/bitbake/lib/bb/codeparser.py b/bitbake/lib/bb/codeparser.py
new file mode 100644
index 0000000000..8b8f91a762
--- /dev/null
+++ b/bitbake/lib/bb/codeparser.py
@@ -0,0 +1,406 @@
1import ast
2import codegen
3import logging
4import os.path
5import bb.utils, bb.data
6from itertools import chain
7from pysh import pyshyacc, pyshlex, sherrors
8from bb.cache import MultiProcessCache
9
10
11logger = logging.getLogger('BitBake.CodeParser')
12
13try:
14 import cPickle as pickle
15except ImportError:
16 import pickle
17 logger.info('Importing cPickle failed. Falling back to a very slow implementation.')
18
19
20def check_indent(codestr):
21 """If the code is indented, add a top level piece of code to 'remove' the indentation"""
22
23 i = 0
24 while codestr[i] in ["\n", "\t", " "]:
25 i = i + 1
26
27 if i == 0:
28 return codestr
29
30 if codestr[i-1] == "\t" or codestr[i-1] == " ":
31 return "if 1:\n" + codestr
32
33 return codestr
34
35
36# Basically pickle, in python 2.7.3 at least, does badly with data duplication
37# upon pickling and unpickling. Combine this with duplicate objects and things
38# are a mess.
39#
40# When the sets are originally created, python calls intern() on the set keys
41# which significantly improves memory usage. Sadly the pickle/unpickle process
42# doesn't call intern() on the keys and results in the same strings being duplicated
43# in memory. This also means pickle will save the same string multiple times in
44# the cache file.
45#
46# By having shell and python cacheline objects with setstate/getstate, we force
47# the object creation through our own routine where we can call intern (via internSet).
48#
49# We also use hashable frozensets and ensure we use references to these so that
50# duplicates can be removed, both in memory and in the resulting pickled data.
51#
52# By playing these games, the size of the cache file shrinks dramatically
53# meaning faster load times and the reloaded cache files also consume much less
54# memory. Smaller cache files, faster load times and lower memory usage is good.
55#
56# A custom getstate/setstate using tuples is actually worth 15% cachesize by
57# avoiding duplication of the attribute names!
58
59class SetCache(object):
60 def __init__(self):
61 self.setcache = {}
62
63 def internSet(self, items):
64
65 new = []
66 for i in items:
67 new.append(intern(i))
68 s = frozenset(new)
69 if hash(s) in self.setcache:
70 return self.setcache[hash(s)]
71 self.setcache[hash(s)] = s
72 return s
73
74codecache = SetCache()
75
76class pythonCacheLine(object):
77 def __init__(self, refs, execs, contains):
78 self.refs = codecache.internSet(refs)
79 self.execs = codecache.internSet(execs)
80 self.contains = {}
81 for c in contains:
82 self.contains[c] = codecache.internSet(contains[c])
83
84 def __getstate__(self):
85 return (self.refs, self.execs, self.contains)
86
87 def __setstate__(self, state):
88 (refs, execs, contains) = state
89 self.__init__(refs, execs, contains)
90 def __hash__(self):
91 l = (hash(self.refs), hash(self.execs))
92 for c in sorted(self.contains.keys()):
93 l = l + (c, hash(self.contains[c]))
94 return hash(l)
95
96class shellCacheLine(object):
97 def __init__(self, execs):
98 self.execs = codecache.internSet(execs)
99
100 def __getstate__(self):
101 return (self.execs)
102
103 def __setstate__(self, state):
104 (execs) = state
105 self.__init__(execs)
106 def __hash__(self):
107 return hash(self.execs)
108
109class CodeParserCache(MultiProcessCache):
110 cache_file_name = "bb_codeparser.dat"
111 CACHE_VERSION = 7
112
113 def __init__(self):
114 MultiProcessCache.__init__(self)
115 self.pythoncache = self.cachedata[0]
116 self.shellcache = self.cachedata[1]
117 self.pythoncacheextras = self.cachedata_extras[0]
118 self.shellcacheextras = self.cachedata_extras[1]
119
120 # To avoid duplication in the codeparser cache, keep
121 # a lookup of hashes of objects we already have
122 self.pythoncachelines = {}
123 self.shellcachelines = {}
124
125 def newPythonCacheLine(self, refs, execs, contains):
126 cacheline = pythonCacheLine(refs, execs, contains)
127 h = hash(cacheline)
128 if h in self.pythoncachelines:
129 return self.pythoncachelines[h]
130 self.pythoncachelines[h] = cacheline
131 return cacheline
132
133 def newShellCacheLine(self, execs):
134 cacheline = shellCacheLine(execs)
135 h = hash(cacheline)
136 if h in self.shellcachelines:
137 return self.shellcachelines[h]
138 self.shellcachelines[h] = cacheline
139 return cacheline
140
141 def init_cache(self, d):
142 MultiProcessCache.init_cache(self, d)
143
144 # cachedata gets re-assigned in the parent
145 self.pythoncache = self.cachedata[0]
146 self.shellcache = self.cachedata[1]
147
148 def create_cachedata(self):
149 data = [{}, {}]
150 return data
151
152codeparsercache = CodeParserCache()
153
154def parser_cache_init(d):
155 codeparsercache.init_cache(d)
156
157def parser_cache_save(d):
158 codeparsercache.save_extras(d)
159
160def parser_cache_savemerge(d):
161 codeparsercache.save_merge(d)
162
163Logger = logging.getLoggerClass()
164class BufferedLogger(Logger):
165 def __init__(self, name, level=0, target=None):
166 Logger.__init__(self, name)
167 self.setLevel(level)
168 self.buffer = []
169 self.target = target
170
171 def handle(self, record):
172 self.buffer.append(record)
173
174 def flush(self):
175 for record in self.buffer:
176 self.target.handle(record)
177 self.buffer = []
178
179class PythonParser():
180 getvars = (".getVar", ".appendVar", ".prependVar")
181 containsfuncs = ("bb.utils.contains", "base_contains", "oe.utils.contains", "bb.utils.contains_any")
182 execfuncs = ("bb.build.exec_func", "bb.build.exec_task")
183
184 def warn(self, func, arg):
185 """Warn about calls of bitbake APIs which pass a non-literal
186 argument for the variable name, as we're not able to track such
187 a reference.
188 """
189
190 try:
191 funcstr = codegen.to_source(func)
192 argstr = codegen.to_source(arg)
193 except TypeError:
194 self.log.debug(2, 'Failed to convert function and argument to source form')
195 else:
196 self.log.debug(1, self.unhandled_message % (funcstr, argstr))
197
198 def visit_Call(self, node):
199 name = self.called_node_name(node.func)
200 if name and name.endswith(self.getvars) or name in self.containsfuncs:
201 if isinstance(node.args[0], ast.Str):
202 varname = node.args[0].s
203 if name in self.containsfuncs and isinstance(node.args[1], ast.Str):
204 if varname not in self.contains:
205 self.contains[varname] = set()
206 self.contains[varname].add(node.args[1].s)
207 else:
208 self.references.add(node.args[0].s)
209 else:
210 self.warn(node.func, node.args[0])
211 elif name in self.execfuncs:
212 if isinstance(node.args[0], ast.Str):
213 self.var_execs.add(node.args[0].s)
214 else:
215 self.warn(node.func, node.args[0])
216 elif name and isinstance(node.func, (ast.Name, ast.Attribute)):
217 self.execs.add(name)
218
219 def called_node_name(self, node):
220 """Given a called node, return its original string form"""
221 components = []
222 while node:
223 if isinstance(node, ast.Attribute):
224 components.append(node.attr)
225 node = node.value
226 elif isinstance(node, ast.Name):
227 components.append(node.id)
228 return '.'.join(reversed(components))
229 else:
230 break
231
232 def __init__(self, name, log):
233 self.var_execs = set()
234 self.contains = {}
235 self.execs = set()
236 self.references = set()
237 self.log = BufferedLogger('BitBake.Data.PythonParser', logging.DEBUG, log)
238
239 self.unhandled_message = "in call of %s, argument '%s' is not a string literal"
240 self.unhandled_message = "while parsing %s, %s" % (name, self.unhandled_message)
241
242 def parse_python(self, node):
243 h = hash(str(node))
244
245 if h in codeparsercache.pythoncache:
246 self.references = set(codeparsercache.pythoncache[h].refs)
247 self.execs = set(codeparsercache.pythoncache[h].execs)
248 self.contains = {}
249 for i in codeparsercache.pythoncache[h].contains:
250 self.contains[i] = set(codeparsercache.pythoncache[h].contains[i])
251 return
252
253 if h in codeparsercache.pythoncacheextras:
254 self.references = set(codeparsercache.pythoncacheextras[h].refs)
255 self.execs = set(codeparsercache.pythoncacheextras[h].execs)
256 self.contains = {}
257 for i in codeparsercache.pythoncacheextras[h].contains:
258 self.contains[i] = set(codeparsercache.pythoncacheextras[h].contains[i])
259 return
260
261 code = compile(check_indent(str(node)), "<string>", "exec",
262 ast.PyCF_ONLY_AST)
263
264 for n in ast.walk(code):
265 if n.__class__.__name__ == "Call":
266 self.visit_Call(n)
267
268 self.execs.update(self.var_execs)
269
270 codeparsercache.pythoncacheextras[h] = codeparsercache.newPythonCacheLine(self.references, self.execs, self.contains)
271
272class ShellParser():
273 def __init__(self, name, log):
274 self.funcdefs = set()
275 self.allexecs = set()
276 self.execs = set()
277 self.log = BufferedLogger('BitBake.Data.%s' % name, logging.DEBUG, log)
278 self.unhandled_template = "unable to handle non-literal command '%s'"
279 self.unhandled_template = "while parsing %s, %s" % (name, self.unhandled_template)
280
281 def parse_shell(self, value):
282 """Parse the supplied shell code in a string, returning the external
283 commands it executes.
284 """
285
286 h = hash(str(value))
287
288 if h in codeparsercache.shellcache:
289 self.execs = set(codeparsercache.shellcache[h].execs)
290 return self.execs
291
292 if h in codeparsercache.shellcacheextras:
293 self.execs = set(codeparsercache.shellcacheextras[h].execs)
294 return self.execs
295
296 self._parse_shell(value)
297 self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs)
298
299 codeparsercache.shellcacheextras[h] = codeparsercache.newShellCacheLine(self.execs)
300
301 return self.execs
302
303 def _parse_shell(self, value):
304 try:
305 tokens, _ = pyshyacc.parse(value, eof=True, debug=False)
306 except pyshlex.NeedMore:
307 raise sherrors.ShellSyntaxError("Unexpected EOF")
308
309 for token in tokens:
310 self.process_tokens(token)
311
312 def process_tokens(self, tokens):
313 """Process a supplied portion of the syntax tree as returned by
314 pyshyacc.parse.
315 """
316
317 def function_definition(value):
318 self.funcdefs.add(value.name)
319 return [value.body], None
320
321 def case_clause(value):
322 # Element 0 of each item in the case is the list of patterns, and
323 # Element 1 of each item in the case is the list of commands to be
324 # executed when that pattern matches.
325 words = chain(*[item[0] for item in value.items])
326 cmds = chain(*[item[1] for item in value.items])
327 return cmds, words
328
329 def if_clause(value):
330 main = chain(value.cond, value.if_cmds)
331 rest = value.else_cmds
332 if isinstance(rest, tuple) and rest[0] == "elif":
333 return chain(main, if_clause(rest[1]))
334 else:
335 return chain(main, rest)
336
337 def simple_command(value):
338 return None, chain(value.words, (assign[1] for assign in value.assigns))
339
340 token_handlers = {
341 "and_or": lambda x: ((x.left, x.right), None),
342 "async": lambda x: ([x], None),
343 "brace_group": lambda x: (x.cmds, None),
344 "for_clause": lambda x: (x.cmds, x.items),
345 "function_definition": function_definition,
346 "if_clause": lambda x: (if_clause(x), None),
347 "pipeline": lambda x: (x.commands, None),
348 "redirect_list": lambda x: ([x.cmd], None),
349 "subshell": lambda x: (x.cmds, None),
350 "while_clause": lambda x: (chain(x.condition, x.cmds), None),
351 "until_clause": lambda x: (chain(x.condition, x.cmds), None),
352 "simple_command": simple_command,
353 "case_clause": case_clause,
354 }
355
356 for token in tokens:
357 name, value = token
358 try:
359 more_tokens, words = token_handlers[name](value)
360 except KeyError:
361 raise NotImplementedError("Unsupported token type " + name)
362
363 if more_tokens:
364 self.process_tokens(more_tokens)
365
366 if words:
367 self.process_words(words)
368
369 def process_words(self, words):
370 """Process a set of 'words' in pyshyacc parlance, which includes
371 extraction of executed commands from $() blocks, as well as grabbing
372 the command name argument.
373 """
374
375 words = list(words)
376 for word in list(words):
377 wtree = pyshlex.make_wordtree(word[1])
378 for part in wtree:
379 if not isinstance(part, list):
380 continue
381
382 if part[0] in ('`', '$('):
383 command = pyshlex.wordtree_as_string(part[1:-1])
384 self._parse_shell(command)
385
386 if word[0] in ("cmd_name", "cmd_word"):
387 if word in words:
388 words.remove(word)
389
390 usetoken = False
391 for word in words:
392 if word[0] in ("cmd_name", "cmd_word") or \
393 (usetoken and word[0] == "TOKEN"):
394 if "=" in word[1]:
395 usetoken = True
396 continue
397
398 cmd = word[1]
399 if cmd.startswith("$"):
400 self.log.debug(1, self.unhandled_template % cmd)
401 elif cmd == "eval":
402 command = " ".join(word for _, word in words[1:])
403 self._parse_shell(command)
404 else:
405 self.allexecs.add(cmd)
406 break