summaryrefslogtreecommitdiffstats
path: root/bitbake/lib/bb/siggen.py
diff options
context:
space:
mode:
Diffstat (limited to 'bitbake/lib/bb/siggen.py')
-rw-r--r--bitbake/lib/bb/siggen.py752
1 files changed, 483 insertions, 269 deletions
diff --git a/bitbake/lib/bb/siggen.py b/bitbake/lib/bb/siggen.py
index 0d88c6ec68..03dfda6f3c 100644
--- a/bitbake/lib/bb/siggen.py
+++ b/bitbake/lib/bb/siggen.py
@@ -1,4 +1,6 @@
1# 1#
2# Copyright BitBake Contributors
3#
2# SPDX-License-Identifier: GPL-2.0-only 4# SPDX-License-Identifier: GPL-2.0-only
3# 5#
4 6
@@ -11,6 +13,10 @@ import pickle
11import bb.data 13import bb.data
12import difflib 14import difflib
13import simplediff 15import simplediff
16import json
17import types
18from contextlib import contextmanager
19import bb.compress.zstd
14from bb.checksum import FileChecksumCache 20from bb.checksum import FileChecksumCache
15from bb import runqueue 21from bb import runqueue
16import hashserv 22import hashserv
@@ -19,6 +25,35 @@ import hashserv.client
19logger = logging.getLogger('BitBake.SigGen') 25logger = logging.getLogger('BitBake.SigGen')
20hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv') 26hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv')
21 27
28#find_siginfo and find_siginfo_version are set by the metadata siggen
29# The minimum version of the find_siginfo function we need
30find_siginfo_minversion = 2
31
32HASHSERV_ENVVARS = [
33 "SSL_CERT_DIR",
34 "SSL_CERT_FILE",
35 "NO_PROXY",
36 "HTTPS_PROXY",
37 "HTTP_PROXY"
38]
39
40def check_siggen_version(siggen):
41 if not hasattr(siggen, "find_siginfo_version"):
42 bb.fatal("Siggen from metadata (OE-Core?) is too old, please update it (no version found)")
43 if siggen.find_siginfo_version < siggen.find_siginfo_minversion:
44 bb.fatal("Siggen from metadata (OE-Core?) is too old, please update it (%s vs %s)" % (siggen.find_siginfo_version, siggen.find_siginfo_minversion))
45
46class SetEncoder(json.JSONEncoder):
47 def default(self, obj):
48 if isinstance(obj, set) or isinstance(obj, frozenset):
49 return dict(_set_object=list(sorted(obj)))
50 return json.JSONEncoder.default(self, obj)
51
52def SetDecoder(dct):
53 if '_set_object' in dct:
54 return frozenset(dct['_set_object'])
55 return dct
56
22def init(d): 57def init(d):
23 siggens = [obj for obj in globals().values() 58 siggens = [obj for obj in globals().values()
24 if type(obj) is type and issubclass(obj, SignatureGenerator)] 59 if type(obj) is type and issubclass(obj, SignatureGenerator)]
@@ -27,7 +62,6 @@ def init(d):
27 for sg in siggens: 62 for sg in siggens:
28 if desired == sg.name: 63 if desired == sg.name:
29 return sg(d) 64 return sg(d)
30 break
31 else: 65 else:
32 logger.error("Invalid signature generator '%s', using default 'noop'\n" 66 logger.error("Invalid signature generator '%s', using default 'noop'\n"
33 "Available generators: %s", desired, 67 "Available generators: %s", desired,
@@ -39,11 +73,6 @@ class SignatureGenerator(object):
39 """ 73 """
40 name = "noop" 74 name = "noop"
41 75
42 # If the derived class supports multiconfig datacaches, set this to True
43 # The default is False for backward compatibility with derived signature
44 # generators that do not understand multiconfig caches
45 supports_multiconfig_datacaches = False
46
47 def __init__(self, data): 76 def __init__(self, data):
48 self.basehash = {} 77 self.basehash = {}
49 self.taskhash = {} 78 self.taskhash = {}
@@ -61,9 +90,39 @@ class SignatureGenerator(object):
61 def postparsing_clean_cache(self): 90 def postparsing_clean_cache(self):
62 return 91 return
63 92
93 def setup_datacache(self, datacaches):
94 self.datacaches = datacaches
95
96 def setup_datacache_from_datastore(self, mcfn, d):
97 # In task context we have no cache so setup internal data structures
98 # from the fully parsed data store provided
99
100 mc = d.getVar("__BBMULTICONFIG", False) or ""
101 tasks = d.getVar('__BBTASKS', False)
102
103 self.datacaches = {}
104 self.datacaches[mc] = types.SimpleNamespace()
105 setattr(self.datacaches[mc], "stamp", {})
106 self.datacaches[mc].stamp[mcfn] = d.getVar('STAMP')
107 setattr(self.datacaches[mc], "stamp_extrainfo", {})
108 self.datacaches[mc].stamp_extrainfo[mcfn] = {}
109 for t in tasks:
110 flag = d.getVarFlag(t, "stamp-extra-info")
111 if flag:
112 self.datacaches[mc].stamp_extrainfo[mcfn][t] = flag
113
114 def get_cached_unihash(self, tid):
115 return None
116
64 def get_unihash(self, tid): 117 def get_unihash(self, tid):
118 unihash = self.get_cached_unihash(tid)
119 if unihash:
120 return unihash
65 return self.taskhash[tid] 121 return self.taskhash[tid]
66 122
123 def get_unihashes(self, tids):
124 return {tid: self.get_unihash(tid) for tid in tids}
125
67 def prep_taskhash(self, tid, deps, dataCaches): 126 def prep_taskhash(self, tid, deps, dataCaches):
68 return 127 return
69 128
@@ -75,17 +134,51 @@ class SignatureGenerator(object):
75 """Write/update the file checksum cache onto disk""" 134 """Write/update the file checksum cache onto disk"""
76 return 135 return
77 136
137 def stampfile_base(self, mcfn):
138 mc = bb.runqueue.mc_from_tid(mcfn)
139 return self.datacaches[mc].stamp[mcfn]
140
141 def stampfile_mcfn(self, taskname, mcfn, extrainfo=True):
142 mc = bb.runqueue.mc_from_tid(mcfn)
143 stamp = self.datacaches[mc].stamp[mcfn]
144 if not stamp:
145 return
146
147 stamp_extrainfo = ""
148 if extrainfo:
149 taskflagname = taskname
150 if taskname.endswith("_setscene"):
151 taskflagname = taskname.replace("_setscene", "")
152 stamp_extrainfo = self.datacaches[mc].stamp_extrainfo[mcfn].get(taskflagname) or ""
153
154 return self.stampfile(stamp, mcfn, taskname, stamp_extrainfo)
155
78 def stampfile(self, stampbase, file_name, taskname, extrainfo): 156 def stampfile(self, stampbase, file_name, taskname, extrainfo):
79 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 157 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
80 158
159 def stampcleanmask_mcfn(self, taskname, mcfn):
160 mc = bb.runqueue.mc_from_tid(mcfn)
161 stamp = self.datacaches[mc].stamp[mcfn]
162 if not stamp:
163 return []
164
165 taskflagname = taskname
166 if taskname.endswith("_setscene"):
167 taskflagname = taskname.replace("_setscene", "")
168 stamp_extrainfo = self.datacaches[mc].stamp_extrainfo[mcfn].get(taskflagname) or ""
169
170 return self.stampcleanmask(stamp, mcfn, taskname, stamp_extrainfo)
171
81 def stampcleanmask(self, stampbase, file_name, taskname, extrainfo): 172 def stampcleanmask(self, stampbase, file_name, taskname, extrainfo):
82 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 173 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
83 174
84 def dump_sigtask(self, fn, task, stampbase, runtime): 175 def dump_sigtask(self, mcfn, task, stampbase, runtime):
85 return 176 return
86 177
87 def invalidate_task(self, task, d, fn): 178 def invalidate_task(self, task, mcfn):
88 bb.build.del_stamp(task, d, fn) 179 mc = bb.runqueue.mc_from_tid(mcfn)
180 stamp = self.datacaches[mc].stamp[mcfn]
181 bb.utils.remove(stamp)
89 182
90 def dump_sigs(self, dataCache, options): 183 def dump_sigs(self, dataCache, options):
91 return 184 return
@@ -108,40 +201,19 @@ class SignatureGenerator(object):
108 def save_unitaskhashes(self): 201 def save_unitaskhashes(self):
109 return 202 return
110 203
111 def set_setscene_tasks(self, setscene_tasks): 204 def copy_unitaskhashes(self, targetdir):
112 return 205 return
113 206
114 @classmethod 207 def set_setscene_tasks(self, setscene_tasks):
115 def get_data_caches(cls, dataCaches, mc): 208 return
116 """
117 This function returns the datacaches that should be passed to signature
118 generator functions. If the signature generator supports multiconfig
119 caches, the entire dictionary of data caches is sent, otherwise a
120 special proxy is sent that support both index access to all
121 multiconfigs, and also direct access for the default multiconfig.
122
123 The proxy class allows code in this class itself to always use
124 multiconfig aware code (to ease maintenance), but derived classes that
125 are unaware of multiconfig data caches can still access the default
126 multiconfig as expected.
127
128 Do not override this function in derived classes; it will be removed in
129 the future when support for multiconfig data caches is mandatory
130 """
131 class DataCacheProxy(object):
132 def __init__(self):
133 pass
134
135 def __getitem__(self, key):
136 return dataCaches[key]
137
138 def __getattr__(self, name):
139 return getattr(dataCaches[mc], name)
140 209
141 if cls.supports_multiconfig_datacaches: 210 def exit(self):
142 return dataCaches 211 return
143 212
144 return DataCacheProxy() 213def build_pnid(mc, pn, taskname):
214 if mc:
215 return "mc:" + mc + ":" + pn + ":" + taskname
216 return pn + ":" + taskname
145 217
146class SignatureGeneratorBasic(SignatureGenerator): 218class SignatureGeneratorBasic(SignatureGenerator):
147 """ 219 """
@@ -152,15 +224,12 @@ class SignatureGeneratorBasic(SignatureGenerator):
152 self.basehash = {} 224 self.basehash = {}
153 self.taskhash = {} 225 self.taskhash = {}
154 self.unihash = {} 226 self.unihash = {}
155 self.taskdeps = {}
156 self.runtaskdeps = {} 227 self.runtaskdeps = {}
157 self.file_checksum_values = {} 228 self.file_checksum_values = {}
158 self.taints = {} 229 self.taints = {}
159 self.gendeps = {}
160 self.lookupcache = {}
161 self.setscenetasks = set() 230 self.setscenetasks = set()
162 self.basewhitelist = set((data.getVar("BB_HASHBASE_WHITELIST") or "").split()) 231 self.basehash_ignore_vars = set((data.getVar("BB_BASEHASH_IGNORE_VARS") or "").split())
163 self.taskwhitelist = None 232 self.taskhash_ignore_tasks = None
164 self.init_rundepcheck(data) 233 self.init_rundepcheck(data)
165 checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE") 234 checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE")
166 if checksum_cache_file: 235 if checksum_cache_file:
@@ -175,21 +244,21 @@ class SignatureGeneratorBasic(SignatureGenerator):
175 self.tidtopn = {} 244 self.tidtopn = {}
176 245
177 def init_rundepcheck(self, data): 246 def init_rundepcheck(self, data):
178 self.taskwhitelist = data.getVar("BB_HASHTASK_WHITELIST") or None 247 self.taskhash_ignore_tasks = data.getVar("BB_TASKHASH_IGNORE_TASKS") or None
179 if self.taskwhitelist: 248 if self.taskhash_ignore_tasks:
180 self.twl = re.compile(self.taskwhitelist) 249 self.twl = re.compile(self.taskhash_ignore_tasks)
181 else: 250 else:
182 self.twl = None 251 self.twl = None
183 252
184 def _build_data(self, fn, d): 253 def _build_data(self, mcfn, d):
185 254
186 ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1') 255 ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1')
187 tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basewhitelist) 256 tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basehash_ignore_vars)
188 257
189 taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basewhitelist, fn) 258 taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basehash_ignore_vars, mcfn)
190 259
191 for task in tasklist: 260 for task in tasklist:
192 tid = fn + ":" + task 261 tid = mcfn + ":" + task
193 if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]: 262 if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]:
194 bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid])) 263 bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid]))
195 bb.error("The following commands may help:") 264 bb.error("The following commands may help:")
@@ -200,11 +269,7 @@ class SignatureGeneratorBasic(SignatureGenerator):
200 bb.error("%s -Sprintdiff\n" % cmd) 269 bb.error("%s -Sprintdiff\n" % cmd)
201 self.basehash[tid] = basehash[tid] 270 self.basehash[tid] = basehash[tid]
202 271
203 self.taskdeps[fn] = taskdeps 272 return taskdeps, gendeps, lookupcache
204 self.gendeps[fn] = gendeps
205 self.lookupcache[fn] = lookupcache
206
207 return taskdeps
208 273
209 def set_setscene_tasks(self, setscene_tasks): 274 def set_setscene_tasks(self, setscene_tasks):
210 self.setscenetasks = set(setscene_tasks) 275 self.setscenetasks = set(setscene_tasks)
@@ -212,35 +277,47 @@ class SignatureGeneratorBasic(SignatureGenerator):
212 def finalise(self, fn, d, variant): 277 def finalise(self, fn, d, variant):
213 278
214 mc = d.getVar("__BBMULTICONFIG", False) or "" 279 mc = d.getVar("__BBMULTICONFIG", False) or ""
280 mcfn = fn
215 if variant or mc: 281 if variant or mc:
216 fn = bb.cache.realfn2virtual(fn, variant, mc) 282 mcfn = bb.cache.realfn2virtual(fn, variant, mc)
217 283
218 try: 284 try:
219 taskdeps = self._build_data(fn, d) 285 taskdeps, gendeps, lookupcache = self._build_data(mcfn, d)
220 except bb.parse.SkipRecipe: 286 except bb.parse.SkipRecipe:
221 raise 287 raise
222 except: 288 except:
223 bb.warn("Error during finalise of %s" % fn) 289 bb.warn("Error during finalise of %s" % mcfn)
224 raise 290 raise
225 291
292 basehashes = {}
293 for task in taskdeps:
294 basehashes[task] = self.basehash[mcfn + ":" + task]
295
296 d.setVar("__siggen_basehashes", basehashes)
297 d.setVar("__siggen_gendeps", gendeps)
298 d.setVar("__siggen_varvals", lookupcache)
299 d.setVar("__siggen_taskdeps", taskdeps)
300
226 #Slow but can be useful for debugging mismatched basehashes 301 #Slow but can be useful for debugging mismatched basehashes
227 #for task in self.taskdeps[fn]: 302 #self.setup_datacache_from_datastore(mcfn, d)
228 # self.dump_sigtask(fn, task, d.getVar("STAMP"), False) 303 #for task in taskdeps:
304 # self.dump_sigtask(mcfn, task, d.getVar("STAMP"), False)
229 305
230 for task in taskdeps: 306 def setup_datacache_from_datastore(self, mcfn, d):
231 d.setVar("BB_BASEHASH_task-%s" % task, self.basehash[fn + ":" + task]) 307 super().setup_datacache_from_datastore(mcfn, d)
232 308
233 def postparsing_clean_cache(self): 309 mc = bb.runqueue.mc_from_tid(mcfn)
234 # 310 for attr in ["siggen_varvals", "siggen_taskdeps", "siggen_gendeps"]:
235 # After parsing we can remove some things from memory to reduce our memory footprint 311 if not hasattr(self.datacaches[mc], attr):
236 # 312 setattr(self.datacaches[mc], attr, {})
237 self.gendeps = {} 313 self.datacaches[mc].siggen_varvals[mcfn] = d.getVar("__siggen_varvals")
238 self.lookupcache = {} 314 self.datacaches[mc].siggen_taskdeps[mcfn] = d.getVar("__siggen_taskdeps")
239 self.taskdeps = {} 315 self.datacaches[mc].siggen_gendeps[mcfn] = d.getVar("__siggen_gendeps")
240 316
241 def rundep_check(self, fn, recipename, task, dep, depname, dataCaches): 317 def rundep_check(self, fn, recipename, task, dep, depname, dataCaches):
242 # Return True if we should keep the dependency, False to drop it 318 # Return True if we should keep the dependency, False to drop it
243 # We only manipulate the dependencies for packages not in the whitelist 319 # We only manipulate the dependencies for packages not in the ignore
320 # list
244 if self.twl and not self.twl.search(recipename): 321 if self.twl and not self.twl.search(recipename):
245 # then process the actual dependencies 322 # then process the actual dependencies
246 if self.twl.search(depname): 323 if self.twl.search(depname):
@@ -258,38 +335,37 @@ class SignatureGeneratorBasic(SignatureGenerator):
258 335
259 def prep_taskhash(self, tid, deps, dataCaches): 336 def prep_taskhash(self, tid, deps, dataCaches):
260 337
261 (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid) 338 (mc, _, task, mcfn) = bb.runqueue.split_tid_mcfn(tid)
262 339
263 self.basehash[tid] = dataCaches[mc].basetaskhash[tid] 340 self.basehash[tid] = dataCaches[mc].basetaskhash[tid]
264 self.runtaskdeps[tid] = [] 341 self.runtaskdeps[tid] = []
265 self.file_checksum_values[tid] = [] 342 self.file_checksum_values[tid] = []
266 recipename = dataCaches[mc].pkg_fn[fn] 343 recipename = dataCaches[mc].pkg_fn[mcfn]
267 344
268 self.tidtopn[tid] = recipename 345 self.tidtopn[tid] = recipename
346 # save hashfn for deps into siginfo?
347 for dep in deps:
348 (depmc, _, deptask, depmcfn) = bb.runqueue.split_tid_mcfn(dep)
349 dep_pn = dataCaches[depmc].pkg_fn[depmcfn]
269 350
270 for dep in sorted(deps, key=clean_basepath): 351 if not self.rundep_check(mcfn, recipename, task, dep, dep_pn, dataCaches):
271 (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep)
272 depname = dataCaches[depmc].pkg_fn[depmcfn]
273 if not self.supports_multiconfig_datacaches and mc != depmc:
274 # If the signature generator doesn't understand multiconfig
275 # data caches, any dependency not in the same multiconfig must
276 # be skipped for backward compatibility
277 continue
278 if not self.rundep_check(fn, recipename, task, dep, depname, dataCaches):
279 continue 352 continue
353
280 if dep not in self.taskhash: 354 if dep not in self.taskhash:
281 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep) 355 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep)
282 self.runtaskdeps[tid].append(dep)
283 356
284 if task in dataCaches[mc].file_checksums[fn]: 357 dep_pnid = build_pnid(depmc, dep_pn, deptask)
358 self.runtaskdeps[tid].append((dep_pnid, dep))
359
360 if task in dataCaches[mc].file_checksums[mcfn]:
285 if self.checksum_cache: 361 if self.checksum_cache:
286 checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude) 362 checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[mcfn][task], recipename, self.localdirsexclude)
287 else: 363 else:
288 checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude) 364 checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[mcfn][task], recipename, self.localdirsexclude)
289 for (f,cs) in checksums: 365 for (f,cs) in checksums:
290 self.file_checksum_values[tid].append((f,cs)) 366 self.file_checksum_values[tid].append((f,cs))
291 367
292 taskdep = dataCaches[mc].task_deps[fn] 368 taskdep = dataCaches[mc].task_deps[mcfn]
293 if 'nostamp' in taskdep and task in taskdep['nostamp']: 369 if 'nostamp' in taskdep and task in taskdep['nostamp']:
294 # Nostamp tasks need an implicit taint so that they force any dependent tasks to run 370 # Nostamp tasks need an implicit taint so that they force any dependent tasks to run
295 if tid in self.taints and self.taints[tid].startswith("nostamp:"): 371 if tid in self.taints and self.taints[tid].startswith("nostamp:"):
@@ -300,32 +376,34 @@ class SignatureGeneratorBasic(SignatureGenerator):
300 taint = str(uuid.uuid4()) 376 taint = str(uuid.uuid4())
301 self.taints[tid] = "nostamp:" + taint 377 self.taints[tid] = "nostamp:" + taint
302 378
303 taint = self.read_taint(fn, task, dataCaches[mc].stamp[fn]) 379 taint = self.read_taint(mcfn, task, dataCaches[mc].stamp[mcfn])
304 if taint: 380 if taint:
305 self.taints[tid] = taint 381 self.taints[tid] = taint
306 logger.warning("%s is tainted from a forced run" % tid) 382 logger.warning("%s is tainted from a forced run" % tid)
307 383
308 return 384 return set(dep for _, dep in self.runtaskdeps[tid])
309 385
310 def get_taskhash(self, tid, deps, dataCaches): 386 def get_taskhash(self, tid, deps, dataCaches):
311 387
312 data = self.basehash[tid] 388 data = self.basehash[tid]
313 for dep in self.runtaskdeps[tid]: 389 for dep in sorted(self.runtaskdeps[tid]):
314 data = data + self.get_unihash(dep) 390 data += self.get_unihash(dep[1])
315 391
316 for (f, cs) in self.file_checksum_values[tid]: 392 for (f, cs) in sorted(self.file_checksum_values[tid], key=clean_checksum_file_path):
317 if cs: 393 if cs:
318 data = data + cs 394 if "/./" in f:
395 data += "./" + f.split("/./")[1]
396 data += cs
319 397
320 if tid in self.taints: 398 if tid in self.taints:
321 if self.taints[tid].startswith("nostamp:"): 399 if self.taints[tid].startswith("nostamp:"):
322 data = data + self.taints[tid][8:] 400 data += self.taints[tid][8:]
323 else: 401 else:
324 data = data + self.taints[tid] 402 data += self.taints[tid]
325 403
326 h = hashlib.sha256(data.encode("utf-8")).hexdigest() 404 h = hashlib.sha256(data.encode("utf-8")).hexdigest()
327 self.taskhash[tid] = h 405 self.taskhash[tid] = h
328 #d.setVar("BB_TASKHASH_task-%s" % task, taskhash[task]) 406 #d.setVar("BB_TASKHASH:task-%s" % task, taskhash[task])
329 return h 407 return h
330 408
331 def writeout_file_checksum_cache(self): 409 def writeout_file_checksum_cache(self):
@@ -340,9 +418,12 @@ class SignatureGeneratorBasic(SignatureGenerator):
340 def save_unitaskhashes(self): 418 def save_unitaskhashes(self):
341 self.unihash_cache.save(self.unitaskhashes) 419 self.unihash_cache.save(self.unitaskhashes)
342 420
343 def dump_sigtask(self, fn, task, stampbase, runtime): 421 def copy_unitaskhashes(self, targetdir):
422 self.unihash_cache.copyfile(targetdir)
344 423
345 tid = fn + ":" + task 424 def dump_sigtask(self, mcfn, task, stampbase, runtime):
425 tid = mcfn + ":" + task
426 mc = bb.runqueue.mc_from_tid(mcfn)
346 referencestamp = stampbase 427 referencestamp = stampbase
347 if isinstance(runtime, str) and runtime.startswith("customfile"): 428 if isinstance(runtime, str) and runtime.startswith("customfile"):
348 sigfile = stampbase 429 sigfile = stampbase
@@ -357,29 +438,34 @@ class SignatureGeneratorBasic(SignatureGenerator):
357 438
358 data = {} 439 data = {}
359 data['task'] = task 440 data['task'] = task
360 data['basewhitelist'] = self.basewhitelist 441 data['basehash_ignore_vars'] = self.basehash_ignore_vars
361 data['taskwhitelist'] = self.taskwhitelist 442 data['taskhash_ignore_tasks'] = self.taskhash_ignore_tasks
362 data['taskdeps'] = self.taskdeps[fn][task] 443 data['taskdeps'] = self.datacaches[mc].siggen_taskdeps[mcfn][task]
363 data['basehash'] = self.basehash[tid] 444 data['basehash'] = self.basehash[tid]
364 data['gendeps'] = {} 445 data['gendeps'] = {}
365 data['varvals'] = {} 446 data['varvals'] = {}
366 data['varvals'][task] = self.lookupcache[fn][task] 447 data['varvals'][task] = self.datacaches[mc].siggen_varvals[mcfn][task]
367 for dep in self.taskdeps[fn][task]: 448 for dep in self.datacaches[mc].siggen_taskdeps[mcfn][task]:
368 if dep in self.basewhitelist: 449 if dep in self.basehash_ignore_vars:
369 continue 450 continue
370 data['gendeps'][dep] = self.gendeps[fn][dep] 451 data['gendeps'][dep] = self.datacaches[mc].siggen_gendeps[mcfn][dep]
371 data['varvals'][dep] = self.lookupcache[fn][dep] 452 data['varvals'][dep] = self.datacaches[mc].siggen_varvals[mcfn][dep]
372 453
373 if runtime and tid in self.taskhash: 454 if runtime and tid in self.taskhash:
374 data['runtaskdeps'] = self.runtaskdeps[tid] 455 data['runtaskdeps'] = [dep[0] for dep in sorted(self.runtaskdeps[tid])]
375 data['file_checksum_values'] = [(os.path.basename(f), cs) for f,cs in self.file_checksum_values[tid]] 456 data['file_checksum_values'] = []
457 for f,cs in sorted(self.file_checksum_values[tid], key=clean_checksum_file_path):
458 if "/./" in f:
459 data['file_checksum_values'].append(("./" + f.split("/./")[1], cs))
460 else:
461 data['file_checksum_values'].append((os.path.basename(f), cs))
376 data['runtaskhashes'] = {} 462 data['runtaskhashes'] = {}
377 for dep in data['runtaskdeps']: 463 for dep in self.runtaskdeps[tid]:
378 data['runtaskhashes'][dep] = self.get_unihash(dep) 464 data['runtaskhashes'][dep[0]] = self.get_unihash(dep[1])
379 data['taskhash'] = self.taskhash[tid] 465 data['taskhash'] = self.taskhash[tid]
380 data['unihash'] = self.get_unihash(tid) 466 data['unihash'] = self.get_unihash(tid)
381 467
382 taint = self.read_taint(fn, task, referencestamp) 468 taint = self.read_taint(mcfn, task, referencestamp)
383 if taint: 469 if taint:
384 data['taint'] = taint 470 data['taint'] = taint
385 471
@@ -396,13 +482,13 @@ class SignatureGeneratorBasic(SignatureGenerator):
396 bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid)) 482 bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid))
397 sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash) 483 sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash)
398 484
399 fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.") 485 fd, tmpfile = bb.utils.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.")
400 try: 486 try:
401 with os.fdopen(fd, "wb") as stream: 487 with bb.compress.zstd.open(fd, "wt", encoding="utf-8", num_threads=1) as f:
402 p = pickle.dump(data, stream, -1) 488 json.dump(data, f, sort_keys=True, separators=(",", ":"), cls=SetEncoder)
403 stream.flush() 489 f.flush()
404 os.chmod(tmpfile, 0o664) 490 os.chmod(tmpfile, 0o664)
405 os.rename(tmpfile, sigfile) 491 bb.utils.rename(tmpfile, sigfile)
406 except (OSError, IOError) as err: 492 except (OSError, IOError) as err:
407 try: 493 try:
408 os.unlink(tmpfile) 494 os.unlink(tmpfile)
@@ -410,18 +496,6 @@ class SignatureGeneratorBasic(SignatureGenerator):
410 pass 496 pass
411 raise err 497 raise err
412 498
413 def dump_sigfn(self, fn, dataCaches, options):
414 if fn in self.taskdeps:
415 for task in self.taskdeps[fn]:
416 tid = fn + ":" + task
417 mc = bb.runqueue.mc_from_tid(tid)
418 if tid not in self.taskhash:
419 continue
420 if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]:
421 bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid)
422 bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid]))
423 self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True)
424
425class SignatureGeneratorBasicHash(SignatureGeneratorBasic): 499class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
426 name = "basichash" 500 name = "basichash"
427 501
@@ -432,11 +506,11 @@ class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
432 # If task is not in basehash, then error 506 # If task is not in basehash, then error
433 return self.basehash[tid] 507 return self.basehash[tid]
434 508
435 def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False): 509 def stampfile(self, stampbase, mcfn, taskname, extrainfo, clean=False):
436 if taskname != "do_setscene" and taskname.endswith("_setscene"): 510 if taskname.endswith("_setscene"):
437 tid = fn + ":" + taskname[:-9] 511 tid = mcfn + ":" + taskname[:-9]
438 else: 512 else:
439 tid = fn + ":" + taskname 513 tid = mcfn + ":" + taskname
440 if clean: 514 if clean:
441 h = "*" 515 h = "*"
442 else: 516 else:
@@ -444,29 +518,106 @@ class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
444 518
445 return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.') 519 return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.')
446 520
447 def stampcleanmask(self, stampbase, fn, taskname, extrainfo): 521 def stampcleanmask(self, stampbase, mcfn, taskname, extrainfo):
448 return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True) 522 return self.stampfile(stampbase, mcfn, taskname, extrainfo, clean=True)
523
524 def invalidate_task(self, task, mcfn):
525 bb.note("Tainting hash to force rebuild of task %s, %s" % (mcfn, task))
526
527 mc = bb.runqueue.mc_from_tid(mcfn)
528 stamp = self.datacaches[mc].stamp[mcfn]
529
530 taintfn = stamp + '.' + task + '.taint'
449 531
450 def invalidate_task(self, task, d, fn): 532 import uuid
451 bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task)) 533 bb.utils.mkdirhier(os.path.dirname(taintfn))
452 bb.build.write_taint(task, d, fn) 534 # The specific content of the taint file is not really important,
535 # we just need it to be random, so a random UUID is used
536 with open(taintfn, 'w') as taintf:
537 taintf.write(str(uuid.uuid4()))
453 538
454class SignatureGeneratorUniHashMixIn(object): 539class SignatureGeneratorUniHashMixIn(object):
455 def __init__(self, data): 540 def __init__(self, data):
456 self.extramethod = {} 541 self.extramethod = {}
542 # NOTE: The cache only tracks hashes that exist. Hashes that don't
543 # exist are always queries from the server since it is possible for
544 # hashes to appear over time, but much less likely for them to
545 # disappear
546 self.unihash_exists_cache = set()
547 self.username = None
548 self.password = None
549 self.env = {}
550
551 origenv = data.getVar("BB_ORIGENV")
552 for e in HASHSERV_ENVVARS:
553 value = data.getVar(e)
554 if not value and origenv:
555 value = origenv.getVar(e)
556 if value:
557 self.env[e] = value
457 super().__init__(data) 558 super().__init__(data)
458 559
459 def get_taskdata(self): 560 def get_taskdata(self):
460 return (self.server, self.method, self.extramethod) + super().get_taskdata() 561 return (self.server, self.method, self.extramethod, self.max_parallel, self.username, self.password, self.env) + super().get_taskdata()
461 562
462 def set_taskdata(self, data): 563 def set_taskdata(self, data):
463 self.server, self.method, self.extramethod = data[:3] 564 self.server, self.method, self.extramethod, self.max_parallel, self.username, self.password, self.env = data[:7]
464 super().set_taskdata(data[3:]) 565 super().set_taskdata(data[7:])
566
567 def get_hashserv_creds(self):
568 if self.username and self.password:
569 return {
570 "username": self.username,
571 "password": self.password,
572 }
573
574 return {}
575
576 @contextmanager
577 def _client_env(self):
578 orig_env = os.environ.copy()
579 try:
580 for k, v in self.env.items():
581 os.environ[k] = v
582
583 yield
584 finally:
585 for k, v in self.env.items():
586 if k in orig_env:
587 os.environ[k] = orig_env[k]
588 else:
589 del os.environ[k]
465 590
591 @contextmanager
466 def client(self): 592 def client(self):
467 if getattr(self, '_client', None) is None: 593 with self._client_env():
468 self._client = hashserv.create_client(self.server) 594 if getattr(self, '_client', None) is None:
469 return self._client 595 self._client = hashserv.create_client(self.server, **self.get_hashserv_creds())
596 yield self._client
597
598 @contextmanager
599 def client_pool(self):
600 with self._client_env():
601 if getattr(self, '_client_pool', None) is None:
602 self._client_pool = hashserv.client.ClientPool(self.server, self.max_parallel, **self.get_hashserv_creds())
603 yield self._client_pool
604
605 def reset(self, data):
606 self.__close_clients()
607 return super().reset(data)
608
609 def exit(self):
610 self.__close_clients()
611 return super().exit()
612
613 def __close_clients(self):
614 with self._client_env():
615 if getattr(self, '_client', None) is not None:
616 self._client.close()
617 self._client = None
618 if getattr(self, '_client_pool', None) is not None:
619 self._client_pool.close()
620 self._client_pool = None
470 621
471 def get_stampfile_hash(self, tid): 622 def get_stampfile_hash(self, tid):
472 if tid in self.taskhash: 623 if tid in self.taskhash:
@@ -499,7 +650,7 @@ class SignatureGeneratorUniHashMixIn(object):
499 return None 650 return None
500 return unihash 651 return unihash
501 652
502 def get_unihash(self, tid): 653 def get_cached_unihash(self, tid):
503 taskhash = self.taskhash[tid] 654 taskhash = self.taskhash[tid]
504 655
505 # If its not a setscene task we can return 656 # If its not a setscene task we can return
@@ -514,40 +665,105 @@ class SignatureGeneratorUniHashMixIn(object):
514 self.unihash[tid] = unihash 665 self.unihash[tid] = unihash
515 return unihash 666 return unihash
516 667
517 # In the absence of being able to discover a unique hash from the 668 return None
518 # server, make it be equivalent to the taskhash. The unique "hash" only
519 # really needs to be a unique string (not even necessarily a hash), but
520 # making it match the taskhash has a few advantages:
521 #
522 # 1) All of the sstate code that assumes hashes can be the same
523 # 2) It provides maximal compatibility with builders that don't use
524 # an equivalency server
525 # 3) The value is easy for multiple independent builders to derive the
526 # same unique hash from the same input. This means that if the
527 # independent builders find the same taskhash, but it isn't reported
528 # to the server, there is a better chance that they will agree on
529 # the unique hash.
530 unihash = taskhash
531 669
532 try: 670 def _get_method(self, tid):
533 method = self.method 671 method = self.method
534 if tid in self.extramethod: 672 if tid in self.extramethod:
535 method = method + self.extramethod[tid] 673 method = method + self.extramethod[tid]
536 data = self.client().get_unihash(method, self.taskhash[tid]) 674
537 if data: 675 return method
538 unihash = data 676
677 def unihashes_exist(self, query):
678 if len(query) == 0:
679 return {}
680
681 uncached_query = {}
682 result = {}
683 for key, unihash in query.items():
684 if unihash in self.unihash_exists_cache:
685 result[key] = True
686 else:
687 uncached_query[key] = unihash
688
689 if self.max_parallel <= 1 or len(uncached_query) <= 1:
690 # No parallelism required. Make the query serially with the single client
691 with self.client() as client:
692 uncached_result = {
693 key: client.unihash_exists(value) for key, value in uncached_query.items()
694 }
695 else:
696 with self.client_pool() as client_pool:
697 uncached_result = client_pool.unihashes_exist(uncached_query)
698
699 for key, exists in uncached_result.items():
700 if exists:
701 self.unihash_exists_cache.add(query[key])
702 result[key] = exists
703
704 return result
705
706 def get_unihash(self, tid):
707 return self.get_unihashes([tid])[tid]
708
709 def get_unihashes(self, tids):
710 """
711 For a iterable of tids, returns a dictionary that maps each tid to a
712 unihash
713 """
714 result = {}
715 queries = {}
716 query_result = {}
717
718 for tid in tids:
719 unihash = self.get_cached_unihash(tid)
720 if unihash:
721 result[tid] = unihash
722 else:
723 queries[tid] = (self._get_method(tid), self.taskhash[tid])
724
725 if len(queries) == 0:
726 return result
727
728 if self.max_parallel <= 1 or len(queries) <= 1:
729 # No parallelism required. Make the query serially with the single client
730 with self.client() as client:
731 for tid, args in queries.items():
732 query_result[tid] = client.get_unihash(*args)
733 else:
734 with self.client_pool() as client_pool:
735 query_result = client_pool.get_unihashes(queries)
736
737 for tid, unihash in query_result.items():
738 # In the absence of being able to discover a unique hash from the
739 # server, make it be equivalent to the taskhash. The unique "hash" only
740 # really needs to be a unique string (not even necessarily a hash), but
741 # making it match the taskhash has a few advantages:
742 #
743 # 1) All of the sstate code that assumes hashes can be the same
744 # 2) It provides maximal compatibility with builders that don't use
745 # an equivalency server
746 # 3) The value is easy for multiple independent builders to derive the
747 # same unique hash from the same input. This means that if the
748 # independent builders find the same taskhash, but it isn't reported
749 # to the server, there is a better chance that they will agree on
750 # the unique hash.
751 taskhash = self.taskhash[tid]
752 if unihash:
539 # A unique hash equal to the taskhash is not very interesting, 753 # A unique hash equal to the taskhash is not very interesting,
540 # so it is reported it at debug level 2. If they differ, that 754 # so it is reported it at debug level 2. If they differ, that
541 # is much more interesting, so it is reported at debug level 1 755 # is much more interesting, so it is reported at debug level 1
542 hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server)) 756 hashequiv_logger.bbdebug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
543 else: 757 else:
544 hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server)) 758 hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
545 except hashserv.client.HashConnectionError as e: 759 unihash = taskhash
546 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
547 760
548 self.set_unihash(tid, unihash) 761
549 self.unihash[tid] = unihash 762 self.set_unihash(tid, unihash)
550 return unihash 763 self.unihash[tid] = unihash
764 result[tid] = unihash
765
766 return result
551 767
552 def report_unihash(self, path, task, d): 768 def report_unihash(self, path, task, d):
553 import importlib 769 import importlib
@@ -556,14 +772,14 @@ class SignatureGeneratorUniHashMixIn(object):
556 unihash = d.getVar('BB_UNIHASH') 772 unihash = d.getVar('BB_UNIHASH')
557 report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1' 773 report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1'
558 tempdir = d.getVar('T') 774 tempdir = d.getVar('T')
559 fn = d.getVar('BB_FILENAME') 775 mcfn = d.getVar('BB_FILENAME')
560 tid = fn + ':do_' + task 776 tid = mcfn + ':do_' + task
561 key = tid + ':' + taskhash 777 key = tid + ':' + taskhash
562 778
563 if self.setscenetasks and tid not in self.setscenetasks: 779 if self.setscenetasks and tid not in self.setscenetasks:
564 return 780 return
565 781
566 # This can happen if locked sigs are in action. Detect and just abort 782 # This can happen if locked sigs are in action. Detect and just exit
567 if taskhash != self.taskhash[tid]: 783 if taskhash != self.taskhash[tid]:
568 return 784 return
569 785
@@ -611,17 +827,19 @@ class SignatureGeneratorUniHashMixIn(object):
611 if tid in self.extramethod: 827 if tid in self.extramethod:
612 method = method + self.extramethod[tid] 828 method = method + self.extramethod[tid]
613 829
614 data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data) 830 with self.client() as client:
831 data = client.report_unihash(taskhash, method, outhash, unihash, extra_data)
832
615 new_unihash = data['unihash'] 833 new_unihash = data['unihash']
616 834
617 if new_unihash != unihash: 835 if new_unihash != unihash:
618 hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server)) 836 hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
619 bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d) 837 bb.event.fire(bb.runqueue.taskUniHashUpdate(mcfn + ':do_' + task, new_unihash), d)
620 self.set_unihash(tid, new_unihash) 838 self.set_unihash(tid, new_unihash)
621 d.setVar('BB_UNIHASH', new_unihash) 839 d.setVar('BB_UNIHASH', new_unihash)
622 else: 840 else:
623 hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server)) 841 hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
624 except hashserv.client.HashConnectionError as e: 842 except ConnectionError as e:
625 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 843 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
626 finally: 844 finally:
627 if sigfile: 845 if sigfile:
@@ -642,7 +860,9 @@ class SignatureGeneratorUniHashMixIn(object):
642 if tid in self.extramethod: 860 if tid in self.extramethod:
643 method = method + self.extramethod[tid] 861 method = method + self.extramethod[tid]
644 862
645 data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data) 863 with self.client() as client:
864 data = client.report_unihash_equiv(taskhash, method, wanted_unihash, extra_data)
865
646 hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data))) 866 hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data)))
647 867
648 if data is None: 868 if data is None:
@@ -661,7 +881,7 @@ class SignatureGeneratorUniHashMixIn(object):
661 # TODO: What to do here? 881 # TODO: What to do here?
662 hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash)) 882 hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash))
663 883
664 except hashserv.client.HashConnectionError as e: 884 except ConnectionError as e:
665 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 885 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
666 886
667 return False 887 return False
@@ -675,20 +895,20 @@ class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureG
675 super().init_rundepcheck(data) 895 super().init_rundepcheck(data)
676 self.server = data.getVar('BB_HASHSERVE') 896 self.server = data.getVar('BB_HASHSERVE')
677 self.method = "sstate_output_hash" 897 self.method = "sstate_output_hash"
898 self.max_parallel = 1
678 899
679# 900def clean_checksum_file_path(file_checksum_tuple):
680# Dummy class used for bitbake-selftest 901 f, cs = file_checksum_tuple
681# 902 if "/./" in f:
682class SignatureGeneratorTestMulticonfigDepends(SignatureGeneratorBasicHash): 903 return "./" + f.split("/./")[1]
683 name = "TestMulticonfigDepends" 904 return f
684 supports_multiconfig_datacaches = True
685 905
686def dump_this_task(outfile, d): 906def dump_this_task(outfile, d):
687 import bb.parse 907 import bb.parse
688 fn = d.getVar("BB_FILENAME") 908 mcfn = d.getVar("BB_FILENAME")
689 task = "do_" + d.getVar("BB_CURRENTTASK") 909 task = "do_" + d.getVar("BB_CURRENTTASK")
690 referencestamp = bb.build.stamp_internal(task, d, None, True) 910 referencestamp = bb.parse.siggen.stampfile_base(mcfn)
691 bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp) 911 bb.parse.siggen.dump_sigtask(mcfn, task, outfile, "customfile:" + referencestamp)
692 912
693def init_colors(enable_color): 913def init_colors(enable_color):
694 """Initialise colour dict for passing to compare_sigfiles()""" 914 """Initialise colour dict for passing to compare_sigfiles()"""
@@ -741,38 +961,15 @@ def list_inline_diff(oldlist, newlist, colors=None):
741 ret.append(item) 961 ret.append(item)
742 return '[%s]' % (', '.join(ret)) 962 return '[%s]' % (', '.join(ret))
743 963
744def clean_basepath(basepath): 964# Handled renamed fields
745 basepath, dir, recipe_task = basepath.rsplit("/", 2) 965def handle_renames(data):
746 cleaned = dir + '/' + recipe_task 966 if 'basewhitelist' in data:
747 967 data['basehash_ignore_vars'] = data['basewhitelist']
748 if basepath[0] == '/': 968 del data['basewhitelist']
749 return cleaned 969 if 'taskwhitelist' in data:
750 970 data['taskhash_ignore_tasks'] = data['taskwhitelist']
751 if basepath.startswith("mc:") and basepath.count(':') >= 2: 971 del data['taskwhitelist']
752 mc, mc_name, basepath = basepath.split(":", 2)
753 mc_suffix = ':mc:' + mc_name
754 else:
755 mc_suffix = ''
756
757 # mc stuff now removed from basepath. Whatever was next, if present will be the first
758 # suffix. ':/', recipe path start, marks the end of this. Something like
759 # 'virtual:a[:b[:c]]:/path...' (b and c being optional)
760 if basepath[0] != '/':
761 cleaned += ':' + basepath.split(':/', 1)[0]
762
763 return cleaned + mc_suffix
764 972
765def clean_basepaths(a):
766 b = {}
767 for x in a:
768 b[clean_basepath(x)] = a[x]
769 return b
770
771def clean_basepaths_list(a):
772 b = []
773 for x in a:
774 b.append(clean_basepath(x))
775 return b
776 973
777def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False): 974def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
778 output = [] 975 output = []
@@ -794,20 +991,29 @@ def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
794 formatparams.update(values) 991 formatparams.update(values)
795 return formatstr.format(**formatparams) 992 return formatstr.format(**formatparams)
796 993
797 with open(a, 'rb') as f: 994 try:
798 p1 = pickle.Unpickler(f) 995 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
799 a_data = p1.load() 996 a_data = json.load(f, object_hook=SetDecoder)
800 with open(b, 'rb') as f: 997 except (TypeError, OSError) as err:
801 p2 = pickle.Unpickler(f) 998 bb.error("Failed to open sigdata file '%s': %s" % (a, str(err)))
802 b_data = p2.load() 999 raise err
803 1000 try:
804 def dict_diff(a, b, whitelist=set()): 1001 with bb.compress.zstd.open(b, "rt", encoding="utf-8", num_threads=1) as f:
1002 b_data = json.load(f, object_hook=SetDecoder)
1003 except (TypeError, OSError) as err:
1004 bb.error("Failed to open sigdata file '%s': %s" % (b, str(err)))
1005 raise err
1006
1007 for data in [a_data, b_data]:
1008 handle_renames(data)
1009
1010 def dict_diff(a, b, ignored_vars=set()):
805 sa = set(a.keys()) 1011 sa = set(a.keys())
806 sb = set(b.keys()) 1012 sb = set(b.keys())
807 common = sa & sb 1013 common = sa & sb
808 changed = set() 1014 changed = set()
809 for i in common: 1015 for i in common:
810 if a[i] != b[i] and i not in whitelist: 1016 if a[i] != b[i] and i not in ignored_vars:
811 changed.add(i) 1017 changed.add(i)
812 added = sb - sa 1018 added = sb - sa
813 removed = sa - sb 1019 removed = sa - sb
@@ -815,11 +1021,11 @@ def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
815 1021
816 def file_checksums_diff(a, b): 1022 def file_checksums_diff(a, b):
817 from collections import Counter 1023 from collections import Counter
818 # Handle old siginfo format 1024
819 if isinstance(a, dict): 1025 # Convert lists back to tuples
820 a = [(os.path.basename(f), cs) for f, cs in a.items()] 1026 a = [(f[0], f[1]) for f in a]
821 if isinstance(b, dict): 1027 b = [(f[0], f[1]) for f in b]
822 b = [(os.path.basename(f), cs) for f, cs in b.items()] 1028
823 # Compare lists, ensuring we can handle duplicate filenames if they exist 1029 # Compare lists, ensuring we can handle duplicate filenames if they exist
824 removedcount = Counter(a) 1030 removedcount = Counter(a)
825 removedcount.subtract(b) 1031 removedcount.subtract(b)
@@ -846,15 +1052,15 @@ def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
846 removed = [x[0] for x in removed] 1052 removed = [x[0] for x in removed]
847 return changed, added, removed 1053 return changed, added, removed
848 1054
849 if 'basewhitelist' in a_data and a_data['basewhitelist'] != b_data['basewhitelist']: 1055 if 'basehash_ignore_vars' in a_data and a_data['basehash_ignore_vars'] != b_data['basehash_ignore_vars']:
850 output.append(color_format("{color_title}basewhitelist changed{color_default} from '%s' to '%s'") % (a_data['basewhitelist'], b_data['basewhitelist'])) 1056 output.append(color_format("{color_title}basehash_ignore_vars changed{color_default} from '%s' to '%s'") % (a_data['basehash_ignore_vars'], b_data['basehash_ignore_vars']))
851 if a_data['basewhitelist'] and b_data['basewhitelist']: 1057 if a_data['basehash_ignore_vars'] and b_data['basehash_ignore_vars']:
852 output.append("changed items: %s" % a_data['basewhitelist'].symmetric_difference(b_data['basewhitelist'])) 1058 output.append("changed items: %s" % a_data['basehash_ignore_vars'].symmetric_difference(b_data['basehash_ignore_vars']))
853 1059
854 if 'taskwhitelist' in a_data and a_data['taskwhitelist'] != b_data['taskwhitelist']: 1060 if 'taskhash_ignore_tasks' in a_data and a_data['taskhash_ignore_tasks'] != b_data['taskhash_ignore_tasks']:
855 output.append(color_format("{color_title}taskwhitelist changed{color_default} from '%s' to '%s'") % (a_data['taskwhitelist'], b_data['taskwhitelist'])) 1061 output.append(color_format("{color_title}taskhash_ignore_tasks changed{color_default} from '%s' to '%s'") % (a_data['taskhash_ignore_tasks'], b_data['taskhash_ignore_tasks']))
856 if a_data['taskwhitelist'] and b_data['taskwhitelist']: 1062 if a_data['taskhash_ignore_tasks'] and b_data['taskhash_ignore_tasks']:
857 output.append("changed items: %s" % a_data['taskwhitelist'].symmetric_difference(b_data['taskwhitelist'])) 1063 output.append("changed items: %s" % a_data['taskhash_ignore_tasks'].symmetric_difference(b_data['taskhash_ignore_tasks']))
858 1064
859 if a_data['taskdeps'] != b_data['taskdeps']: 1065 if a_data['taskdeps'] != b_data['taskdeps']:
860 output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps']))) 1066 output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps'])))
@@ -862,23 +1068,23 @@ def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
862 if a_data['basehash'] != b_data['basehash'] and not collapsed: 1068 if a_data['basehash'] != b_data['basehash'] and not collapsed:
863 output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash'])) 1069 output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash']))
864 1070
865 changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basewhitelist'] & b_data['basewhitelist']) 1071 changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basehash_ignore_vars'] & b_data['basehash_ignore_vars'])
866 if changed: 1072 if changed:
867 for dep in changed: 1073 for dep in sorted(changed):
868 output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep])) 1074 output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep]))
869 if a_data['gendeps'][dep] and b_data['gendeps'][dep]: 1075 if a_data['gendeps'][dep] and b_data['gendeps'][dep]:
870 output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep])) 1076 output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep]))
871 if added: 1077 if added:
872 for dep in added: 1078 for dep in sorted(added):
873 output.append(color_format("{color_title}Dependency on variable %s was added") % (dep)) 1079 output.append(color_format("{color_title}Dependency on variable %s was added") % (dep))
874 if removed: 1080 if removed:
875 for dep in removed: 1081 for dep in sorted(removed):
876 output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep)) 1082 output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep))
877 1083
878 1084
879 changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals']) 1085 changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals'])
880 if changed: 1086 if changed:
881 for dep in changed: 1087 for dep in sorted(changed):
882 oldval = a_data['varvals'][dep] 1088 oldval = a_data['varvals'][dep]
883 newval = b_data['varvals'][dep] 1089 newval = b_data['varvals'][dep]
884 if newval and oldval and ('\n' in oldval or '\n' in newval): 1090 if newval and oldval and ('\n' in oldval or '\n' in newval):
@@ -902,9 +1108,9 @@ def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
902 output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval)) 1108 output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval))
903 1109
904 if not 'file_checksum_values' in a_data: 1110 if not 'file_checksum_values' in a_data:
905 a_data['file_checksum_values'] = {} 1111 a_data['file_checksum_values'] = []
906 if not 'file_checksum_values' in b_data: 1112 if not 'file_checksum_values' in b_data:
907 b_data['file_checksum_values'] = {} 1113 b_data['file_checksum_values'] = []
908 1114
909 changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values']) 1115 changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
910 if changed: 1116 if changed:
@@ -931,11 +1137,11 @@ def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
931 a = a_data['runtaskdeps'][idx] 1137 a = a_data['runtaskdeps'][idx]
932 b = b_data['runtaskdeps'][idx] 1138 b = b_data['runtaskdeps'][idx]
933 if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed: 1139 if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed:
934 changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b])) 1140 changed.append("%s with hash %s\n changed to\n%s with hash %s" % (a, a_data['runtaskhashes'][a], b, b_data['runtaskhashes'][b]))
935 1141
936 if changed: 1142 if changed:
937 clean_a = clean_basepaths_list(a_data['runtaskdeps']) 1143 clean_a = a_data['runtaskdeps']
938 clean_b = clean_basepaths_list(b_data['runtaskdeps']) 1144 clean_b = b_data['runtaskdeps']
939 if clean_a != clean_b: 1145 if clean_a != clean_b:
940 output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors)) 1146 output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors))
941 else: 1147 else:
@@ -948,7 +1154,7 @@ def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
948 b = b_data['runtaskhashes'] 1154 b = b_data['runtaskhashes']
949 changed, added, removed = dict_diff(a, b) 1155 changed, added, removed = dict_diff(a, b)
950 if added: 1156 if added:
951 for dep in added: 1157 for dep in sorted(added):
952 bdep_found = False 1158 bdep_found = False
953 if removed: 1159 if removed:
954 for bdep in removed: 1160 for bdep in removed:
@@ -956,9 +1162,9 @@ def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
956 #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep)) 1162 #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep))
957 bdep_found = True 1163 bdep_found = True
958 if not bdep_found: 1164 if not bdep_found:
959 output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (clean_basepath(dep), b[dep])) 1165 output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (dep, b[dep]))
960 if removed: 1166 if removed:
961 for dep in removed: 1167 for dep in sorted(removed):
962 adep_found = False 1168 adep_found = False
963 if added: 1169 if added:
964 for adep in added: 1170 for adep in added:
@@ -966,11 +1172,11 @@ def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
966 #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep)) 1172 #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep))
967 adep_found = True 1173 adep_found = True
968 if not adep_found: 1174 if not adep_found:
969 output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (clean_basepath(dep), a[dep])) 1175 output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (dep, a[dep]))
970 if changed: 1176 if changed:
971 for dep in changed: 1177 for dep in sorted(changed):
972 if not collapsed: 1178 if not collapsed:
973 output.append(color_format("{color_title}Hash for dependent task %s changed{color_default} from %s to %s") % (clean_basepath(dep), a[dep], b[dep])) 1179 output.append(color_format("{color_title}Hash for task dependency %s changed{color_default} from %s to %s") % (dep, a[dep], b[dep]))
974 if callable(recursecb): 1180 if callable(recursecb):
975 recout = recursecb(dep, a[dep], b[dep]) 1181 recout = recursecb(dep, a[dep], b[dep])
976 if recout: 1182 if recout:
@@ -980,6 +1186,7 @@ def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
980 # If a dependent hash changed, might as well print the line above and then defer to the changes in 1186 # If a dependent hash changed, might as well print the line above and then defer to the changes in
981 # that hash since in all likelyhood, they're the same changes this task also saw. 1187 # that hash since in all likelyhood, they're the same changes this task also saw.
982 output = [output[-1]] + recout 1188 output = [output[-1]] + recout
1189 break
983 1190
984 a_taint = a_data.get('taint', None) 1191 a_taint = a_data.get('taint', None)
985 b_taint = b_data.get('taint', None) 1192 b_taint = b_data.get('taint', None)
@@ -1001,7 +1208,7 @@ def calc_basehash(sigdata):
1001 basedata = '' 1208 basedata = ''
1002 1209
1003 alldeps = sigdata['taskdeps'] 1210 alldeps = sigdata['taskdeps']
1004 for dep in alldeps: 1211 for dep in sorted(alldeps):
1005 basedata = basedata + dep 1212 basedata = basedata + dep
1006 val = sigdata['varvals'][dep] 1213 val = sigdata['varvals'][dep]
1007 if val is not None: 1214 if val is not None:
@@ -1017,6 +1224,8 @@ def calc_taskhash(sigdata):
1017 1224
1018 for c in sigdata['file_checksum_values']: 1225 for c in sigdata['file_checksum_values']:
1019 if c[1]: 1226 if c[1]:
1227 if "./" in c[0]:
1228 data = data + c[0]
1020 data = data + c[1] 1229 data = data + c[1]
1021 1230
1022 if 'taint' in sigdata: 1231 if 'taint' in sigdata:
@@ -1031,32 +1240,37 @@ def calc_taskhash(sigdata):
1031def dump_sigfile(a): 1240def dump_sigfile(a):
1032 output = [] 1241 output = []
1033 1242
1034 with open(a, 'rb') as f: 1243 try:
1035 p1 = pickle.Unpickler(f) 1244 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
1036 a_data = p1.load() 1245 a_data = json.load(f, object_hook=SetDecoder)
1246 except (TypeError, OSError) as err:
1247 bb.error("Failed to open sigdata file '%s': %s" % (a, str(err)))
1248 raise err
1249
1250 handle_renames(a_data)
1037 1251
1038 output.append("basewhitelist: %s" % (a_data['basewhitelist'])) 1252 output.append("basehash_ignore_vars: %s" % (sorted(a_data['basehash_ignore_vars'])))
1039 1253
1040 output.append("taskwhitelist: %s" % (a_data['taskwhitelist'])) 1254 output.append("taskhash_ignore_tasks: %s" % (sorted(a_data['taskhash_ignore_tasks'] or [])))
1041 1255
1042 output.append("Task dependencies: %s" % (sorted(a_data['taskdeps']))) 1256 output.append("Task dependencies: %s" % (sorted(a_data['taskdeps'])))
1043 1257
1044 output.append("basehash: %s" % (a_data['basehash'])) 1258 output.append("basehash: %s" % (a_data['basehash']))
1045 1259
1046 for dep in a_data['gendeps']: 1260 for dep in sorted(a_data['gendeps']):
1047 output.append("List of dependencies for variable %s is %s" % (dep, a_data['gendeps'][dep])) 1261 output.append("List of dependencies for variable %s is %s" % (dep, sorted(a_data['gendeps'][dep])))
1048 1262
1049 for dep in a_data['varvals']: 1263 for dep in sorted(a_data['varvals']):
1050 output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep])) 1264 output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep]))
1051 1265
1052 if 'runtaskdeps' in a_data: 1266 if 'runtaskdeps' in a_data:
1053 output.append("Tasks this task depends on: %s" % (a_data['runtaskdeps'])) 1267 output.append("Tasks this task depends on: %s" % (sorted(a_data['runtaskdeps'])))
1054 1268
1055 if 'file_checksum_values' in a_data: 1269 if 'file_checksum_values' in a_data:
1056 output.append("This task depends on the checksums of files: %s" % (a_data['file_checksum_values'])) 1270 output.append("This task depends on the checksums of files: %s" % (sorted(a_data['file_checksum_values'])))
1057 1271
1058 if 'runtaskhashes' in a_data: 1272 if 'runtaskhashes' in a_data:
1059 for dep in a_data['runtaskhashes']: 1273 for dep in sorted(a_data['runtaskhashes']):
1060 output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep])) 1274 output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep]))
1061 1275
1062 if 'taint' in a_data: 1276 if 'taint' in a_data: