summaryrefslogtreecommitdiffstats
path: root/bitbake/lib/bb/codeparser.py
diff options
context:
space:
mode:
Diffstat (limited to 'bitbake/lib/bb/codeparser.py')
-rw-r--r--bitbake/lib/bb/codeparser.py517
1 files changed, 0 insertions, 517 deletions
diff --git a/bitbake/lib/bb/codeparser.py b/bitbake/lib/bb/codeparser.py
deleted file mode 100644
index 2e8b7ced3c..0000000000
--- a/bitbake/lib/bb/codeparser.py
+++ /dev/null
@@ -1,517 +0,0 @@
1#
2# Copyright BitBake Contributors
3#
4# SPDX-License-Identifier: GPL-2.0-only
5#
6
7"""
8BitBake code parser
9
10Parses actual code (i.e. python and shell) for functions and in-line
11expressions. Used mainly to determine dependencies on other functions
12and variables within the BitBake metadata. Also provides a cache for
13this information in order to speed up processing.
14
15(Not to be confused with the code that parses the metadata itself,
16see lib/bb/parse/ for that).
17
18NOTE: if you change how the parsers gather information you will almost
19certainly need to increment CodeParserCache.CACHE_VERSION below so that
20any existing codeparser cache gets invalidated. Additionally you'll need
21to increment __cache_version__ in cache.py in order to ensure that old
22recipe caches don't trigger "Taskhash mismatch" errors.
23
24"""
25
26import ast
27import sys
28import codegen
29import logging
30import inspect
31import bb.pysh as pysh
32import bb.utils, bb.data
33import hashlib
34from itertools import chain
35from bb.pysh import pyshyacc, pyshlex
36from bb.cache import MultiProcessCache
37
38logger = logging.getLogger('BitBake.CodeParser')
39
40def bbhash(s):
41 return hashlib.sha256(s.encode("utf-8")).hexdigest()
42
43def check_indent(codestr):
44 """If the code is indented, add a top level piece of code to 'remove' the indentation"""
45
46 i = 0
47 while codestr[i] in ["\n", "\t", " "]:
48 i = i + 1
49
50 if i == 0:
51 return codestr
52
53 if codestr[i-1] == "\t" or codestr[i-1] == " ":
54 if codestr[0] == "\n":
55 # Since we're adding a line, we need to remove one line of any empty padding
56 # to ensure line numbers are correct
57 codestr = codestr[1:]
58 return "if 1:\n" + codestr
59
60 return codestr
61
62modulecode_deps = {}
63
64def add_module_functions(fn, functions, namespace):
65 import os
66 fstat = os.stat(fn)
67 fixedhash = fn + ":" + str(fstat.st_size) + ":" + str(fstat.st_mtime)
68 for f in functions:
69 name = "%s.%s" % (namespace, f)
70 parser = PythonParser(name, logger)
71 try:
72 parser.parse_python(None, filename=fn, lineno=1, fixedhash=fixedhash+f)
73 #bb.warn("Cached %s" % f)
74 except KeyError:
75 lines, lineno = inspect.getsourcelines(functions[f])
76 src = "".join(lines)
77 parser.parse_python(src, filename=fn, lineno=lineno, fixedhash=fixedhash+f)
78 #bb.warn("Not cached %s" % f)
79 execs = parser.execs.copy()
80 # Expand internal module exec references
81 for e in parser.execs:
82 if e in functions:
83 execs.remove(e)
84 execs.add(namespace + "." + e)
85 modulecode_deps[name] = [parser.references.copy(), execs, parser.var_execs.copy(), parser.contains.copy()]
86 #bb.warn("%s: %s\nRefs:%s Execs: %s %s %s" % (name, fn, parser.references, parser.execs, parser.var_execs, parser.contains))
87
88def update_module_dependencies(d):
89 for mod in modulecode_deps:
90 excludes = set((d.getVarFlag(mod, "vardepsexclude") or "").split())
91 if excludes:
92 modulecode_deps[mod] = [modulecode_deps[mod][0] - excludes, modulecode_deps[mod][1] - excludes, modulecode_deps[mod][2] - excludes, modulecode_deps[mod][3]]
93
94# A custom getstate/setstate using tuples is actually worth 15% cachesize by
95# avoiding duplication of the attribute names!
96class SetCache(object):
97 def __init__(self):
98 self.setcache = {}
99
100 def internSet(self, items):
101
102 new = []
103 for i in items:
104 new.append(sys.intern(i))
105 s = frozenset(new)
106 h = hash(s)
107 if h in self.setcache:
108 return self.setcache[h]
109 self.setcache[h] = s
110 return s
111
112codecache = SetCache()
113
114class pythonCacheLine(object):
115 def __init__(self, refs, execs, contains):
116 self.refs = codecache.internSet(refs)
117 self.execs = codecache.internSet(execs)
118 self.contains = {}
119 for c in contains:
120 self.contains[c] = codecache.internSet(contains[c])
121
122 def __getstate__(self):
123 return (self.refs, self.execs, self.contains)
124
125 def __setstate__(self, state):
126 (refs, execs, contains) = state
127 self.__init__(refs, execs, contains)
128 def __hash__(self):
129 l = (hash(self.refs), hash(self.execs))
130 for c in sorted(self.contains.keys()):
131 l = l + (c, hash(self.contains[c]))
132 return hash(l)
133 def __repr__(self):
134 return " ".join([str(self.refs), str(self.execs), str(self.contains)])
135
136
137class shellCacheLine(object):
138 def __init__(self, execs):
139 self.execs = codecache.internSet(execs)
140
141 def __getstate__(self):
142 return (self.execs)
143
144 def __setstate__(self, state):
145 (execs) = state
146 self.__init__(execs)
147 def __hash__(self):
148 return hash(self.execs)
149 def __repr__(self):
150 return str(self.execs)
151
152class CodeParserCache(MultiProcessCache):
153 cache_file_name = "bb_codeparser.dat"
154 # NOTE: you must increment this if you change how the parsers gather information,
155 # so that an existing cache gets invalidated. Additionally you'll need
156 # to increment __cache_version__ in cache.py in order to ensure that old
157 # recipe caches don't trigger "Taskhash mismatch" errors.
158 CACHE_VERSION = 11
159
160 def __init__(self):
161 MultiProcessCache.__init__(self)
162 self.pythoncache = self.cachedata[0]
163 self.shellcache = self.cachedata[1]
164 self.pythoncacheextras = self.cachedata_extras[0]
165 self.shellcacheextras = self.cachedata_extras[1]
166
167 # To avoid duplication in the codeparser cache, keep
168 # a lookup of hashes of objects we already have
169 self.pythoncachelines = {}
170 self.shellcachelines = {}
171
172 def newPythonCacheLine(self, refs, execs, contains):
173 cacheline = pythonCacheLine(refs, execs, contains)
174 h = hash(cacheline)
175 if h in self.pythoncachelines:
176 return self.pythoncachelines[h]
177 self.pythoncachelines[h] = cacheline
178 return cacheline
179
180 def newShellCacheLine(self, execs):
181 cacheline = shellCacheLine(execs)
182 h = hash(cacheline)
183 if h in self.shellcachelines:
184 return self.shellcachelines[h]
185 self.shellcachelines[h] = cacheline
186 return cacheline
187
188 def init_cache(self, cachedir):
189 # Check if we already have the caches
190 if self.pythoncache:
191 return
192
193 MultiProcessCache.init_cache(self, cachedir)
194
195 # cachedata gets re-assigned in the parent
196 self.pythoncache = self.cachedata[0]
197 self.shellcache = self.cachedata[1]
198
199 def create_cachedata(self):
200 data = [{}, {}]
201 return data
202
203codeparsercache = CodeParserCache()
204
205def parser_cache_init(cachedir):
206 codeparsercache.init_cache(cachedir)
207
208def parser_cache_save():
209 codeparsercache.save_extras()
210
211def parser_cache_savemerge():
212 codeparsercache.save_merge()
213
214Logger = logging.getLoggerClass()
215class BufferedLogger(Logger):
216 def __init__(self, name, level=0, target=None):
217 Logger.__init__(self, name)
218 self.setLevel(level)
219 self.buffer = []
220 self.target = target
221
222 def handle(self, record):
223 self.buffer.append(record)
224
225 def flush(self):
226 for record in self.buffer:
227 if self.target.isEnabledFor(record.levelno):
228 self.target.handle(record)
229 self.buffer = []
230
231class DummyLogger():
232 def flush(self):
233 return
234
235class PythonParser():
236 getvars = (".getVar", ".appendVar", ".prependVar", "oe.utils.conditional")
237 getvarflags = (".getVarFlag", ".appendVarFlag", ".prependVarFlag")
238 containsfuncs = ("bb.utils.contains", "base_contains")
239 containsanyfuncs = ("bb.utils.contains_any", "bb.utils.filter")
240 execfuncs = ("bb.build.exec_func", "bb.build.exec_task")
241
242 def warn(self, func, arg):
243 """Warn about calls of bitbake APIs which pass a non-literal
244 argument for the variable name, as we're not able to track such
245 a reference.
246 """
247
248 try:
249 funcstr = codegen.to_source(func)
250 argstr = codegen.to_source(arg)
251 except TypeError:
252 self.log.debug2('Failed to convert function and argument to source form')
253 else:
254 self.log.debug(self.unhandled_message % (funcstr, argstr))
255
256 def visit_Call(self, node):
257 name = self.called_node_name(node.func)
258 if name and (name.endswith(self.getvars) or name.endswith(self.getvarflags) or name in self.containsfuncs or name in self.containsanyfuncs):
259 if isinstance(node.args[0], ast.Constant) and isinstance(node.args[0].value, str):
260 varname = node.args[0].value
261 if name in self.containsfuncs and isinstance(node.args[1], ast.Constant):
262 if varname not in self.contains:
263 self.contains[varname] = set()
264 self.contains[varname].add(node.args[1].value)
265 elif name in self.containsanyfuncs and isinstance(node.args[1], ast.Constant):
266 if varname not in self.contains:
267 self.contains[varname] = set()
268 self.contains[varname].update(node.args[1].value.split())
269 elif name.endswith(self.getvarflags):
270 if isinstance(node.args[1], ast.Constant):
271 self.references.add('%s[%s]' % (varname, node.args[1].value))
272 else:
273 self.warn(node.func, node.args[1])
274 else:
275 self.references.add(varname)
276 else:
277 self.warn(node.func, node.args[0])
278 elif name and name.endswith(".expand"):
279 if isinstance(node.args[0], ast.Constant):
280 value = node.args[0].value
281 d = bb.data.init()
282 parser = d.expandWithRefs(value, self.name)
283 self.references |= parser.references
284 self.execs |= parser.execs
285 for varname in parser.contains:
286 if varname not in self.contains:
287 self.contains[varname] = set()
288 self.contains[varname] |= parser.contains[varname]
289 elif name in self.execfuncs:
290 if isinstance(node.args[0], ast.Constant):
291 self.var_execs.add(node.args[0].value)
292 else:
293 self.warn(node.func, node.args[0])
294 elif name and isinstance(node.func, (ast.Name, ast.Attribute)):
295 self.execs.add(name)
296
297 def called_node_name(self, node):
298 """Given a called node, return its original string form"""
299 components = []
300 while node:
301 if isinstance(node, ast.Attribute):
302 components.append(node.attr)
303 node = node.value
304 elif isinstance(node, ast.Name):
305 components.append(node.id)
306 return '.'.join(reversed(components))
307 else:
308 break
309
310 def __init__(self, name, log):
311 self.name = name
312 self.var_execs = set()
313 self.contains = {}
314 self.execs = set()
315 self.references = set()
316 self._log = log
317 # Defer init as expensive
318 self.log = DummyLogger()
319
320 self.unhandled_message = "in call of %s, argument '%s' is not a string literal"
321 self.unhandled_message = "while parsing %s, %s" % (name, self.unhandled_message)
322
323 # For the python module code it is expensive to have the function text so it is
324 # uses a different fixedhash to cache against. We can take the hit on obtaining the
325 # text if it isn't in the cache.
326 def parse_python(self, node, lineno=0, filename="<string>", fixedhash=None):
327 if not fixedhash and (not node or not node.strip()):
328 return
329
330 if fixedhash:
331 h = fixedhash
332 else:
333 h = bbhash(str(node))
334
335 if h in codeparsercache.pythoncache:
336 self.references = set(codeparsercache.pythoncache[h].refs)
337 self.execs = set(codeparsercache.pythoncache[h].execs)
338 self.contains = {}
339 for i in codeparsercache.pythoncache[h].contains:
340 self.contains[i] = set(codeparsercache.pythoncache[h].contains[i])
341 return
342
343 if h in codeparsercache.pythoncacheextras:
344 self.references = set(codeparsercache.pythoncacheextras[h].refs)
345 self.execs = set(codeparsercache.pythoncacheextras[h].execs)
346 self.contains = {}
347 for i in codeparsercache.pythoncacheextras[h].contains:
348 self.contains[i] = set(codeparsercache.pythoncacheextras[h].contains[i])
349 return
350
351 if fixedhash and not node:
352 raise KeyError
353
354 # Need to parse so take the hit on the real log buffer
355 self.log = BufferedLogger('BitBake.Data.PythonParser', logging.DEBUG, self._log)
356
357 # We can't add to the linenumbers for compile, we can pad to the correct number of blank lines though
358 node = "\n" * int(lineno) + node
359 code = compile(check_indent(str(node)), filename, "exec",
360 ast.PyCF_ONLY_AST)
361
362 for n in ast.walk(code):
363 if n.__class__.__name__ == "Call":
364 self.visit_Call(n)
365
366 self.execs.update(self.var_execs)
367
368 codeparsercache.pythoncacheextras[h] = codeparsercache.newPythonCacheLine(self.references, self.execs, self.contains)
369
370class ShellParser():
371 def __init__(self, name, log):
372 self.funcdefs = set()
373 self.allexecs = set()
374 self.execs = set()
375 self._name = name
376 self._log = log
377 # Defer init as expensive
378 self.log = DummyLogger()
379
380 self.unhandled_template = "unable to handle non-literal command '%s'"
381 self.unhandled_template = "while parsing %s, %s" % (name, self.unhandled_template)
382
383 def parse_shell(self, value):
384 """Parse the supplied shell code in a string, returning the external
385 commands it executes.
386 """
387
388 h = bbhash(str(value))
389
390 if h in codeparsercache.shellcache:
391 self.execs = set(codeparsercache.shellcache[h].execs)
392 return self.execs
393
394 if h in codeparsercache.shellcacheextras:
395 self.execs = set(codeparsercache.shellcacheextras[h].execs)
396 return self.execs
397
398 # Need to parse so take the hit on the real log buffer
399 self.log = BufferedLogger('BitBake.Data.%s' % self._name, logging.DEBUG, self._log)
400
401 self._parse_shell(value)
402 self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs)
403
404 codeparsercache.shellcacheextras[h] = codeparsercache.newShellCacheLine(self.execs)
405
406 return self.execs
407
408 def _parse_shell(self, value):
409 try:
410 tokens, _ = pyshyacc.parse(value, eof=True, debug=False)
411 except Exception:
412 bb.error('Error during parse shell code, the last 5 lines are:\n%s' % '\n'.join(value.split('\n')[-5:]))
413 raise
414
415 self.process_tokens(tokens)
416
417 def process_tokens(self, tokens):
418 """Process a supplied portion of the syntax tree as returned by
419 pyshyacc.parse.
420 """
421
422 def function_definition(value):
423 self.funcdefs.add(value.name)
424 return [value.body], None
425
426 def case_clause(value):
427 # Element 0 of each item in the case is the list of patterns, and
428 # Element 1 of each item in the case is the list of commands to be
429 # executed when that pattern matches.
430 words = chain(*[item[0] for item in value.items])
431 cmds = chain(*[item[1] for item in value.items])
432 return cmds, words
433
434 def if_clause(value):
435 main = chain(value.cond, value.if_cmds)
436 rest = value.else_cmds
437 if isinstance(rest, tuple) and rest[0] == "elif":
438 return chain(main, if_clause(rest[1]))
439 else:
440 return chain(main, rest)
441
442 def simple_command(value):
443 return None, chain(value.words, (assign[1] for assign in value.assigns))
444
445 token_handlers = {
446 "and_or": lambda x: ((x.left, x.right), None),
447 "async": lambda x: ([x], None),
448 "brace_group": lambda x: (x.cmds, None),
449 "for_clause": lambda x: (x.cmds, x.items),
450 "function_definition": function_definition,
451 "if_clause": lambda x: (if_clause(x), None),
452 "pipeline": lambda x: (x.commands, None),
453 "redirect_list": lambda x: ([x.cmd], None),
454 "subshell": lambda x: (x.cmds, None),
455 "while_clause": lambda x: (chain(x.condition, x.cmds), None),
456 "until_clause": lambda x: (chain(x.condition, x.cmds), None),
457 "simple_command": simple_command,
458 "case_clause": case_clause,
459 }
460
461 def process_token_list(tokens):
462 for token in tokens:
463 if isinstance(token, list):
464 process_token_list(token)
465 continue
466 name, value = token
467 try:
468 more_tokens, words = token_handlers[name](value)
469 except KeyError:
470 raise NotImplementedError("Unsupported token type " + name)
471
472 if more_tokens:
473 self.process_tokens(more_tokens)
474
475 if words:
476 self.process_words(words)
477
478 process_token_list(tokens)
479
480 def process_words(self, words):
481 """Process a set of 'words' in pyshyacc parlance, which includes
482 extraction of executed commands from $() blocks, as well as grabbing
483 the command name argument.
484 """
485
486 words = list(words)
487 for word in list(words):
488 wtree = pyshlex.make_wordtree(word[1])
489 for part in wtree:
490 if not isinstance(part, list):
491 continue
492
493 if part[0] in ('`', '$('):
494 command = pyshlex.wordtree_as_string(part[1:-1])
495 self._parse_shell(command)
496
497 if word[0] in ("cmd_name", "cmd_word"):
498 if word in words:
499 words.remove(word)
500
501 usetoken = False
502 for word in words:
503 if word[0] in ("cmd_name", "cmd_word") or \
504 (usetoken and word[0] == "TOKEN"):
505 if "=" in word[1]:
506 usetoken = True
507 continue
508
509 cmd = word[1]
510 if cmd.startswith("$"):
511 self.log.debug(self.unhandled_template % cmd)
512 elif cmd == "eval":
513 command = " ".join(word for _, word in words[1:])
514 self._parse_shell(command)
515 else:
516 self.allexecs.add(cmd)
517 break