summaryrefslogtreecommitdiffstats
path: root/bitbake/lib/bb/codeparser.py
diff options
context:
space:
mode:
Diffstat (limited to 'bitbake/lib/bb/codeparser.py')
-rw-r--r--bitbake/lib/bb/codeparser.py319
1 files changed, 319 insertions, 0 deletions
diff --git a/bitbake/lib/bb/codeparser.py b/bitbake/lib/bb/codeparser.py
new file mode 100644
index 0000000000..e44e791585
--- /dev/null
+++ b/bitbake/lib/bb/codeparser.py
@@ -0,0 +1,319 @@
1import ast
2import codegen
3import logging
4import os.path
5import bb.utils, bb.data
6from itertools import chain
7from pysh import pyshyacc, pyshlex, sherrors
8from bb.cache import MultiProcessCache
9
10
11logger = logging.getLogger('BitBake.CodeParser')
12
13try:
14 import cPickle as pickle
15except ImportError:
16 import pickle
17 logger.info('Importing cPickle failed. Falling back to a very slow implementation.')
18
19
20def check_indent(codestr):
21 """If the code is indented, add a top level piece of code to 'remove' the indentation"""
22
23 i = 0
24 while codestr[i] in ["\n", "\t", " "]:
25 i = i + 1
26
27 if i == 0:
28 return codestr
29
30 if codestr[i-1] == "\t" or codestr[i-1] == " ":
31 return "if 1:\n" + codestr
32
33 return codestr
34
35
36class CodeParserCache(MultiProcessCache):
37 cache_file_name = "bb_codeparser.dat"
38 CACHE_VERSION = 3
39
40 def __init__(self):
41 MultiProcessCache.__init__(self)
42 self.pythoncache = self.cachedata[0]
43 self.shellcache = self.cachedata[1]
44 self.pythoncacheextras = self.cachedata_extras[0]
45 self.shellcacheextras = self.cachedata_extras[1]
46
47 def init_cache(self, d):
48 MultiProcessCache.init_cache(self, d)
49
50 # cachedata gets re-assigned in the parent
51 self.pythoncache = self.cachedata[0]
52 self.shellcache = self.cachedata[1]
53
54 def compress_keys(self, data):
55 # When the dicts are originally created, python calls intern() on the set keys
56 # which significantly improves memory usage. Sadly the pickle/unpickle process
57 # doesn't call intern() on the keys and results in the same strings being duplicated
58 # in memory. This also means pickle will save the same string multiple times in
59 # the cache file. By interning the data here, the cache file shrinks dramatically
60 # meaning faster load times and the reloaded cache files also consume much less
61 # memory. This is worth any performance hit from this loops and the use of the
62 # intern() data storage.
63 # Python 3.x may behave better in this area
64 for h in data[0]:
65 data[0][h]["refs"] = self.internSet(data[0][h]["refs"])
66 data[0][h]["execs"] = self.internSet(data[0][h]["execs"])
67 for h in data[1]:
68 data[1][h]["execs"] = self.internSet(data[1][h]["execs"])
69 return
70
71 def create_cachedata(self):
72 data = [{}, {}]
73 return data
74
75codeparsercache = CodeParserCache()
76
77def parser_cache_init(d):
78 codeparsercache.init_cache(d)
79
80def parser_cache_save(d):
81 codeparsercache.save_extras(d)
82
83def parser_cache_savemerge(d):
84 codeparsercache.save_merge(d)
85
86Logger = logging.getLoggerClass()
87class BufferedLogger(Logger):
88 def __init__(self, name, level=0, target=None):
89 Logger.__init__(self, name)
90 self.setLevel(level)
91 self.buffer = []
92 self.target = target
93
94 def handle(self, record):
95 self.buffer.append(record)
96
97 def flush(self):
98 for record in self.buffer:
99 self.target.handle(record)
100 self.buffer = []
101
102class PythonParser():
103 getvars = ("d.getVar", "bb.data.getVar", "data.getVar", "d.appendVar", "d.prependVar")
104 containsfuncs = ("bb.utils.contains", "base_contains", "oe.utils.contains")
105 execfuncs = ("bb.build.exec_func", "bb.build.exec_task")
106
107 def warn(self, func, arg):
108 """Warn about calls of bitbake APIs which pass a non-literal
109 argument for the variable name, as we're not able to track such
110 a reference.
111 """
112
113 try:
114 funcstr = codegen.to_source(func)
115 argstr = codegen.to_source(arg)
116 except TypeError:
117 self.log.debug(2, 'Failed to convert function and argument to source form')
118 else:
119 self.log.debug(1, self.unhandled_message % (funcstr, argstr))
120
121 def visit_Call(self, node):
122 name = self.called_node_name(node.func)
123 if name in self.getvars or name in self.containsfuncs:
124 if isinstance(node.args[0], ast.Str):
125 self.var_references.add(node.args[0].s)
126 else:
127 self.warn(node.func, node.args[0])
128 elif name in self.execfuncs:
129 if isinstance(node.args[0], ast.Str):
130 self.var_execs.add(node.args[0].s)
131 else:
132 self.warn(node.func, node.args[0])
133 elif name and isinstance(node.func, (ast.Name, ast.Attribute)):
134 self.execs.add(name)
135
136 def called_node_name(self, node):
137 """Given a called node, return its original string form"""
138 components = []
139 while node:
140 if isinstance(node, ast.Attribute):
141 components.append(node.attr)
142 node = node.value
143 elif isinstance(node, ast.Name):
144 components.append(node.id)
145 return '.'.join(reversed(components))
146 else:
147 break
148
149 def __init__(self, name, log):
150 self.var_references = set()
151 self.var_execs = set()
152 self.execs = set()
153 self.references = set()
154 self.log = BufferedLogger('BitBake.Data.%s' % name, logging.DEBUG, log)
155
156 self.unhandled_message = "in call of %s, argument '%s' is not a string literal"
157 self.unhandled_message = "while parsing %s, %s" % (name, self.unhandled_message)
158
159 def parse_python(self, node):
160 h = hash(str(node))
161
162 if h in codeparsercache.pythoncache:
163 self.references = codeparsercache.pythoncache[h]["refs"]
164 self.execs = codeparsercache.pythoncache[h]["execs"]
165 return
166
167 if h in codeparsercache.pythoncacheextras:
168 self.references = codeparsercache.pythoncacheextras[h]["refs"]
169 self.execs = codeparsercache.pythoncacheextras[h]["execs"]
170 return
171
172
173 code = compile(check_indent(str(node)), "<string>", "exec",
174 ast.PyCF_ONLY_AST)
175
176 for n in ast.walk(code):
177 if n.__class__.__name__ == "Call":
178 self.visit_Call(n)
179
180 self.references.update(self.var_references)
181 self.references.update(self.var_execs)
182
183 codeparsercache.pythoncacheextras[h] = {}
184 codeparsercache.pythoncacheextras[h]["refs"] = self.references
185 codeparsercache.pythoncacheextras[h]["execs"] = self.execs
186
187class ShellParser():
188 def __init__(self, name, log):
189 self.funcdefs = set()
190 self.allexecs = set()
191 self.execs = set()
192 self.log = BufferedLogger('BitBake.Data.%s' % name, logging.DEBUG, log)
193 self.unhandled_template = "unable to handle non-literal command '%s'"
194 self.unhandled_template = "while parsing %s, %s" % (name, self.unhandled_template)
195
196 def parse_shell(self, value):
197 """Parse the supplied shell code in a string, returning the external
198 commands it executes.
199 """
200
201 h = hash(str(value))
202
203 if h in codeparsercache.shellcache:
204 self.execs = codeparsercache.shellcache[h]["execs"]
205 return self.execs
206
207 if h in codeparsercache.shellcacheextras:
208 self.execs = codeparsercache.shellcacheextras[h]["execs"]
209 return self.execs
210
211 try:
212 tokens, _ = pyshyacc.parse(value, eof=True, debug=False)
213 except pyshlex.NeedMore:
214 raise sherrors.ShellSyntaxError("Unexpected EOF")
215
216 for token in tokens:
217 self.process_tokens(token)
218 self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs)
219
220 codeparsercache.shellcacheextras[h] = {}
221 codeparsercache.shellcacheextras[h]["execs"] = self.execs
222
223 return self.execs
224
225 def process_tokens(self, tokens):
226 """Process a supplied portion of the syntax tree as returned by
227 pyshyacc.parse.
228 """
229
230 def function_definition(value):
231 self.funcdefs.add(value.name)
232 return [value.body], None
233
234 def case_clause(value):
235 # Element 0 of each item in the case is the list of patterns, and
236 # Element 1 of each item in the case is the list of commands to be
237 # executed when that pattern matches.
238 words = chain(*[item[0] for item in value.items])
239 cmds = chain(*[item[1] for item in value.items])
240 return cmds, words
241
242 def if_clause(value):
243 main = chain(value.cond, value.if_cmds)
244 rest = value.else_cmds
245 if isinstance(rest, tuple) and rest[0] == "elif":
246 return chain(main, if_clause(rest[1]))
247 else:
248 return chain(main, rest)
249
250 def simple_command(value):
251 return None, chain(value.words, (assign[1] for assign in value.assigns))
252
253 token_handlers = {
254 "and_or": lambda x: ((x.left, x.right), None),
255 "async": lambda x: ([x], None),
256 "brace_group": lambda x: (x.cmds, None),
257 "for_clause": lambda x: (x.cmds, x.items),
258 "function_definition": function_definition,
259 "if_clause": lambda x: (if_clause(x), None),
260 "pipeline": lambda x: (x.commands, None),
261 "redirect_list": lambda x: ([x.cmd], None),
262 "subshell": lambda x: (x.cmds, None),
263 "while_clause": lambda x: (chain(x.condition, x.cmds), None),
264 "until_clause": lambda x: (chain(x.condition, x.cmds), None),
265 "simple_command": simple_command,
266 "case_clause": case_clause,
267 }
268
269 for token in tokens:
270 name, value = token
271 try:
272 more_tokens, words = token_handlers[name](value)
273 except KeyError:
274 raise NotImplementedError("Unsupported token type " + name)
275
276 if more_tokens:
277 self.process_tokens(more_tokens)
278
279 if words:
280 self.process_words(words)
281
282 def process_words(self, words):
283 """Process a set of 'words' in pyshyacc parlance, which includes
284 extraction of executed commands from $() blocks, as well as grabbing
285 the command name argument.
286 """
287
288 words = list(words)
289 for word in list(words):
290 wtree = pyshlex.make_wordtree(word[1])
291 for part in wtree:
292 if not isinstance(part, list):
293 continue
294
295 if part[0] in ('`', '$('):
296 command = pyshlex.wordtree_as_string(part[1:-1])
297 self.parse_shell(command)
298
299 if word[0] in ("cmd_name", "cmd_word"):
300 if word in words:
301 words.remove(word)
302
303 usetoken = False
304 for word in words:
305 if word[0] in ("cmd_name", "cmd_word") or \
306 (usetoken and word[0] == "TOKEN"):
307 if "=" in word[1]:
308 usetoken = True
309 continue
310
311 cmd = word[1]
312 if cmd.startswith("$"):
313 self.log.debug(1, self.unhandled_template % cmd)
314 elif cmd == "eval":
315 command = " ".join(word for _, word in words[1:])
316 self.parse_shell(command)
317 else:
318 self.allexecs.add(cmd)
319 break