summaryrefslogtreecommitdiffstats
path: root/bitbake/lib/bb/siggen.py
diff options
context:
space:
mode:
Diffstat (limited to 'bitbake/lib/bb/siggen.py')
-rw-r--r--bitbake/lib/bb/siggen.py731
1 files changed, 461 insertions, 270 deletions
diff --git a/bitbake/lib/bb/siggen.py b/bitbake/lib/bb/siggen.py
index 0d88c6ec68..a6163b55ea 100644
--- a/bitbake/lib/bb/siggen.py
+++ b/bitbake/lib/bb/siggen.py
@@ -1,4 +1,6 @@
1# 1#
2# Copyright BitBake Contributors
3#
2# SPDX-License-Identifier: GPL-2.0-only 4# SPDX-License-Identifier: GPL-2.0-only
3# 5#
4 6
@@ -11,6 +13,10 @@ import pickle
11import bb.data 13import bb.data
12import difflib 14import difflib
13import simplediff 15import simplediff
16import json
17import types
18from contextlib import contextmanager
19import bb.compress.zstd
14from bb.checksum import FileChecksumCache 20from bb.checksum import FileChecksumCache
15from bb import runqueue 21from bb import runqueue
16import hashserv 22import hashserv
@@ -19,6 +25,35 @@ import hashserv.client
19logger = logging.getLogger('BitBake.SigGen') 25logger = logging.getLogger('BitBake.SigGen')
20hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv') 26hashequiv_logger = logging.getLogger('BitBake.SigGen.HashEquiv')
21 27
28#find_siginfo and find_siginfo_version are set by the metadata siggen
29# The minimum version of the find_siginfo function we need
30find_siginfo_minversion = 2
31
32HASHSERV_ENVVARS = [
33 "SSL_CERT_DIR",
34 "SSL_CERT_FILE",
35 "NO_PROXY",
36 "HTTPS_PROXY",
37 "HTTP_PROXY"
38]
39
40def check_siggen_version(siggen):
41 if not hasattr(siggen, "find_siginfo_version"):
42 bb.fatal("Siggen from metadata (OE-Core?) is too old, please update it (no version found)")
43 if siggen.find_siginfo_version < siggen.find_siginfo_minversion:
44 bb.fatal("Siggen from metadata (OE-Core?) is too old, please update it (%s vs %s)" % (siggen.find_siginfo_version, siggen.find_siginfo_minversion))
45
46class SetEncoder(json.JSONEncoder):
47 def default(self, obj):
48 if isinstance(obj, set) or isinstance(obj, frozenset):
49 return dict(_set_object=list(sorted(obj)))
50 return json.JSONEncoder.default(self, obj)
51
52def SetDecoder(dct):
53 if '_set_object' in dct:
54 return frozenset(dct['_set_object'])
55 return dct
56
22def init(d): 57def init(d):
23 siggens = [obj for obj in globals().values() 58 siggens = [obj for obj in globals().values()
24 if type(obj) is type and issubclass(obj, SignatureGenerator)] 59 if type(obj) is type and issubclass(obj, SignatureGenerator)]
@@ -27,7 +62,6 @@ def init(d):
27 for sg in siggens: 62 for sg in siggens:
28 if desired == sg.name: 63 if desired == sg.name:
29 return sg(d) 64 return sg(d)
30 break
31 else: 65 else:
32 logger.error("Invalid signature generator '%s', using default 'noop'\n" 66 logger.error("Invalid signature generator '%s', using default 'noop'\n"
33 "Available generators: %s", desired, 67 "Available generators: %s", desired,
@@ -39,11 +73,6 @@ class SignatureGenerator(object):
39 """ 73 """
40 name = "noop" 74 name = "noop"
41 75
42 # If the derived class supports multiconfig datacaches, set this to True
43 # The default is False for backward compatibility with derived signature
44 # generators that do not understand multiconfig caches
45 supports_multiconfig_datacaches = False
46
47 def __init__(self, data): 76 def __init__(self, data):
48 self.basehash = {} 77 self.basehash = {}
49 self.taskhash = {} 78 self.taskhash = {}
@@ -61,9 +90,39 @@ class SignatureGenerator(object):
61 def postparsing_clean_cache(self): 90 def postparsing_clean_cache(self):
62 return 91 return
63 92
93 def setup_datacache(self, datacaches):
94 self.datacaches = datacaches
95
96 def setup_datacache_from_datastore(self, mcfn, d):
97 # In task context we have no cache so setup internal data structures
98 # from the fully parsed data store provided
99
100 mc = d.getVar("__BBMULTICONFIG", False) or ""
101 tasks = d.getVar('__BBTASKS', False)
102
103 self.datacaches = {}
104 self.datacaches[mc] = types.SimpleNamespace()
105 setattr(self.datacaches[mc], "stamp", {})
106 self.datacaches[mc].stamp[mcfn] = d.getVar('STAMP')
107 setattr(self.datacaches[mc], "stamp_extrainfo", {})
108 self.datacaches[mc].stamp_extrainfo[mcfn] = {}
109 for t in tasks:
110 flag = d.getVarFlag(t, "stamp-extra-info")
111 if flag:
112 self.datacaches[mc].stamp_extrainfo[mcfn][t] = flag
113
114 def get_cached_unihash(self, tid):
115 return None
116
64 def get_unihash(self, tid): 117 def get_unihash(self, tid):
118 unihash = self.get_cached_unihash(tid)
119 if unihash:
120 return unihash
65 return self.taskhash[tid] 121 return self.taskhash[tid]
66 122
123 def get_unihashes(self, tids):
124 return {tid: self.get_unihash(tid) for tid in tids}
125
67 def prep_taskhash(self, tid, deps, dataCaches): 126 def prep_taskhash(self, tid, deps, dataCaches):
68 return 127 return
69 128
@@ -75,17 +134,51 @@ class SignatureGenerator(object):
75 """Write/update the file checksum cache onto disk""" 134 """Write/update the file checksum cache onto disk"""
76 return 135 return
77 136
137 def stampfile_base(self, mcfn):
138 mc = bb.runqueue.mc_from_tid(mcfn)
139 return self.datacaches[mc].stamp[mcfn]
140
141 def stampfile_mcfn(self, taskname, mcfn, extrainfo=True):
142 mc = bb.runqueue.mc_from_tid(mcfn)
143 stamp = self.datacaches[mc].stamp[mcfn]
144 if not stamp:
145 return
146
147 stamp_extrainfo = ""
148 if extrainfo:
149 taskflagname = taskname
150 if taskname.endswith("_setscene"):
151 taskflagname = taskname.replace("_setscene", "")
152 stamp_extrainfo = self.datacaches[mc].stamp_extrainfo[mcfn].get(taskflagname) or ""
153
154 return self.stampfile(stamp, mcfn, taskname, stamp_extrainfo)
155
78 def stampfile(self, stampbase, file_name, taskname, extrainfo): 156 def stampfile(self, stampbase, file_name, taskname, extrainfo):
79 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 157 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
80 158
159 def stampcleanmask_mcfn(self, taskname, mcfn):
160 mc = bb.runqueue.mc_from_tid(mcfn)
161 stamp = self.datacaches[mc].stamp[mcfn]
162 if not stamp:
163 return []
164
165 taskflagname = taskname
166 if taskname.endswith("_setscene"):
167 taskflagname = taskname.replace("_setscene", "")
168 stamp_extrainfo = self.datacaches[mc].stamp_extrainfo[mcfn].get(taskflagname) or ""
169
170 return self.stampcleanmask(stamp, mcfn, taskname, stamp_extrainfo)
171
81 def stampcleanmask(self, stampbase, file_name, taskname, extrainfo): 172 def stampcleanmask(self, stampbase, file_name, taskname, extrainfo):
82 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.') 173 return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
83 174
84 def dump_sigtask(self, fn, task, stampbase, runtime): 175 def dump_sigtask(self, mcfn, task, stampbase, runtime):
85 return 176 return
86 177
87 def invalidate_task(self, task, d, fn): 178 def invalidate_task(self, task, mcfn):
88 bb.build.del_stamp(task, d, fn) 179 mc = bb.runqueue.mc_from_tid(mcfn)
180 stamp = self.datacaches[mc].stamp[mcfn]
181 bb.utils.remove(stamp)
89 182
90 def dump_sigs(self, dataCache, options): 183 def dump_sigs(self, dataCache, options):
91 return 184 return
@@ -111,37 +204,13 @@ class SignatureGenerator(object):
111 def set_setscene_tasks(self, setscene_tasks): 204 def set_setscene_tasks(self, setscene_tasks):
112 return 205 return
113 206
114 @classmethod 207 def exit(self):
115 def get_data_caches(cls, dataCaches, mc): 208 return
116 """
117 This function returns the datacaches that should be passed to signature
118 generator functions. If the signature generator supports multiconfig
119 caches, the entire dictionary of data caches is sent, otherwise a
120 special proxy is sent that support both index access to all
121 multiconfigs, and also direct access for the default multiconfig.
122
123 The proxy class allows code in this class itself to always use
124 multiconfig aware code (to ease maintenance), but derived classes that
125 are unaware of multiconfig data caches can still access the default
126 multiconfig as expected.
127
128 Do not override this function in derived classes; it will be removed in
129 the future when support for multiconfig data caches is mandatory
130 """
131 class DataCacheProxy(object):
132 def __init__(self):
133 pass
134
135 def __getitem__(self, key):
136 return dataCaches[key]
137
138 def __getattr__(self, name):
139 return getattr(dataCaches[mc], name)
140
141 if cls.supports_multiconfig_datacaches:
142 return dataCaches
143 209
144 return DataCacheProxy() 210def build_pnid(mc, pn, taskname):
211 if mc:
212 return "mc:" + mc + ":" + pn + ":" + taskname
213 return pn + ":" + taskname
145 214
146class SignatureGeneratorBasic(SignatureGenerator): 215class SignatureGeneratorBasic(SignatureGenerator):
147 """ 216 """
@@ -152,15 +221,12 @@ class SignatureGeneratorBasic(SignatureGenerator):
152 self.basehash = {} 221 self.basehash = {}
153 self.taskhash = {} 222 self.taskhash = {}
154 self.unihash = {} 223 self.unihash = {}
155 self.taskdeps = {}
156 self.runtaskdeps = {} 224 self.runtaskdeps = {}
157 self.file_checksum_values = {} 225 self.file_checksum_values = {}
158 self.taints = {} 226 self.taints = {}
159 self.gendeps = {}
160 self.lookupcache = {}
161 self.setscenetasks = set() 227 self.setscenetasks = set()
162 self.basewhitelist = set((data.getVar("BB_HASHBASE_WHITELIST") or "").split()) 228 self.basehash_ignore_vars = set((data.getVar("BB_BASEHASH_IGNORE_VARS") or "").split())
163 self.taskwhitelist = None 229 self.taskhash_ignore_tasks = None
164 self.init_rundepcheck(data) 230 self.init_rundepcheck(data)
165 checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE") 231 checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE")
166 if checksum_cache_file: 232 if checksum_cache_file:
@@ -175,21 +241,21 @@ class SignatureGeneratorBasic(SignatureGenerator):
175 self.tidtopn = {} 241 self.tidtopn = {}
176 242
177 def init_rundepcheck(self, data): 243 def init_rundepcheck(self, data):
178 self.taskwhitelist = data.getVar("BB_HASHTASK_WHITELIST") or None 244 self.taskhash_ignore_tasks = data.getVar("BB_TASKHASH_IGNORE_TASKS") or None
179 if self.taskwhitelist: 245 if self.taskhash_ignore_tasks:
180 self.twl = re.compile(self.taskwhitelist) 246 self.twl = re.compile(self.taskhash_ignore_tasks)
181 else: 247 else:
182 self.twl = None 248 self.twl = None
183 249
184 def _build_data(self, fn, d): 250 def _build_data(self, mcfn, d):
185 251
186 ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1') 252 ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1')
187 tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basewhitelist) 253 tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d, self.basehash_ignore_vars)
188 254
189 taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basewhitelist, fn) 255 taskdeps, basehash = bb.data.generate_dependency_hash(tasklist, gendeps, lookupcache, self.basehash_ignore_vars, mcfn)
190 256
191 for task in tasklist: 257 for task in tasklist:
192 tid = fn + ":" + task 258 tid = mcfn + ":" + task
193 if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]: 259 if not ignore_mismatch and tid in self.basehash and self.basehash[tid] != basehash[tid]:
194 bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid])) 260 bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (tid, self.basehash[tid], basehash[tid]))
195 bb.error("The following commands may help:") 261 bb.error("The following commands may help:")
@@ -200,11 +266,7 @@ class SignatureGeneratorBasic(SignatureGenerator):
200 bb.error("%s -Sprintdiff\n" % cmd) 266 bb.error("%s -Sprintdiff\n" % cmd)
201 self.basehash[tid] = basehash[tid] 267 self.basehash[tid] = basehash[tid]
202 268
203 self.taskdeps[fn] = taskdeps 269 return taskdeps, gendeps, lookupcache
204 self.gendeps[fn] = gendeps
205 self.lookupcache[fn] = lookupcache
206
207 return taskdeps
208 270
209 def set_setscene_tasks(self, setscene_tasks): 271 def set_setscene_tasks(self, setscene_tasks):
210 self.setscenetasks = set(setscene_tasks) 272 self.setscenetasks = set(setscene_tasks)
@@ -212,35 +274,47 @@ class SignatureGeneratorBasic(SignatureGenerator):
212 def finalise(self, fn, d, variant): 274 def finalise(self, fn, d, variant):
213 275
214 mc = d.getVar("__BBMULTICONFIG", False) or "" 276 mc = d.getVar("__BBMULTICONFIG", False) or ""
277 mcfn = fn
215 if variant or mc: 278 if variant or mc:
216 fn = bb.cache.realfn2virtual(fn, variant, mc) 279 mcfn = bb.cache.realfn2virtual(fn, variant, mc)
217 280
218 try: 281 try:
219 taskdeps = self._build_data(fn, d) 282 taskdeps, gendeps, lookupcache = self._build_data(mcfn, d)
220 except bb.parse.SkipRecipe: 283 except bb.parse.SkipRecipe:
221 raise 284 raise
222 except: 285 except:
223 bb.warn("Error during finalise of %s" % fn) 286 bb.warn("Error during finalise of %s" % mcfn)
224 raise 287 raise
225 288
289 basehashes = {}
290 for task in taskdeps:
291 basehashes[task] = self.basehash[mcfn + ":" + task]
292
293 d.setVar("__siggen_basehashes", basehashes)
294 d.setVar("__siggen_gendeps", gendeps)
295 d.setVar("__siggen_varvals", lookupcache)
296 d.setVar("__siggen_taskdeps", taskdeps)
297
226 #Slow but can be useful for debugging mismatched basehashes 298 #Slow but can be useful for debugging mismatched basehashes
227 #for task in self.taskdeps[fn]: 299 #self.setup_datacache_from_datastore(mcfn, d)
228 # self.dump_sigtask(fn, task, d.getVar("STAMP"), False) 300 #for task in taskdeps:
301 # self.dump_sigtask(mcfn, task, d.getVar("STAMP"), False)
229 302
230 for task in taskdeps: 303 def setup_datacache_from_datastore(self, mcfn, d):
231 d.setVar("BB_BASEHASH_task-%s" % task, self.basehash[fn + ":" + task]) 304 super().setup_datacache_from_datastore(mcfn, d)
232 305
233 def postparsing_clean_cache(self): 306 mc = bb.runqueue.mc_from_tid(mcfn)
234 # 307 for attr in ["siggen_varvals", "siggen_taskdeps", "siggen_gendeps"]:
235 # After parsing we can remove some things from memory to reduce our memory footprint 308 if not hasattr(self.datacaches[mc], attr):
236 # 309 setattr(self.datacaches[mc], attr, {})
237 self.gendeps = {} 310 self.datacaches[mc].siggen_varvals[mcfn] = d.getVar("__siggen_varvals")
238 self.lookupcache = {} 311 self.datacaches[mc].siggen_taskdeps[mcfn] = d.getVar("__siggen_taskdeps")
239 self.taskdeps = {} 312 self.datacaches[mc].siggen_gendeps[mcfn] = d.getVar("__siggen_gendeps")
240 313
241 def rundep_check(self, fn, recipename, task, dep, depname, dataCaches): 314 def rundep_check(self, fn, recipename, task, dep, depname, dataCaches):
242 # Return True if we should keep the dependency, False to drop it 315 # Return True if we should keep the dependency, False to drop it
243 # We only manipulate the dependencies for packages not in the whitelist 316 # We only manipulate the dependencies for packages not in the ignore
317 # list
244 if self.twl and not self.twl.search(recipename): 318 if self.twl and not self.twl.search(recipename):
245 # then process the actual dependencies 319 # then process the actual dependencies
246 if self.twl.search(depname): 320 if self.twl.search(depname):
@@ -258,38 +332,37 @@ class SignatureGeneratorBasic(SignatureGenerator):
258 332
259 def prep_taskhash(self, tid, deps, dataCaches): 333 def prep_taskhash(self, tid, deps, dataCaches):
260 334
261 (mc, _, task, fn) = bb.runqueue.split_tid_mcfn(tid) 335 (mc, _, task, mcfn) = bb.runqueue.split_tid_mcfn(tid)
262 336
263 self.basehash[tid] = dataCaches[mc].basetaskhash[tid] 337 self.basehash[tid] = dataCaches[mc].basetaskhash[tid]
264 self.runtaskdeps[tid] = [] 338 self.runtaskdeps[tid] = []
265 self.file_checksum_values[tid] = [] 339 self.file_checksum_values[tid] = []
266 recipename = dataCaches[mc].pkg_fn[fn] 340 recipename = dataCaches[mc].pkg_fn[mcfn]
267 341
268 self.tidtopn[tid] = recipename 342 self.tidtopn[tid] = recipename
343 # save hashfn for deps into siginfo?
344 for dep in deps:
345 (depmc, _, deptask, depmcfn) = bb.runqueue.split_tid_mcfn(dep)
346 dep_pn = dataCaches[depmc].pkg_fn[depmcfn]
269 347
270 for dep in sorted(deps, key=clean_basepath): 348 if not self.rundep_check(mcfn, recipename, task, dep, dep_pn, dataCaches):
271 (depmc, _, _, depmcfn) = bb.runqueue.split_tid_mcfn(dep)
272 depname = dataCaches[depmc].pkg_fn[depmcfn]
273 if not self.supports_multiconfig_datacaches and mc != depmc:
274 # If the signature generator doesn't understand multiconfig
275 # data caches, any dependency not in the same multiconfig must
276 # be skipped for backward compatibility
277 continue
278 if not self.rundep_check(fn, recipename, task, dep, depname, dataCaches):
279 continue 349 continue
350
280 if dep not in self.taskhash: 351 if dep not in self.taskhash:
281 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep) 352 bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep)
282 self.runtaskdeps[tid].append(dep)
283 353
284 if task in dataCaches[mc].file_checksums[fn]: 354 dep_pnid = build_pnid(depmc, dep_pn, deptask)
355 self.runtaskdeps[tid].append((dep_pnid, dep))
356
357 if task in dataCaches[mc].file_checksums[mcfn]:
285 if self.checksum_cache: 358 if self.checksum_cache:
286 checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude) 359 checksums = self.checksum_cache.get_checksums(dataCaches[mc].file_checksums[mcfn][task], recipename, self.localdirsexclude)
287 else: 360 else:
288 checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[fn][task], recipename, self.localdirsexclude) 361 checksums = bb.fetch2.get_file_checksums(dataCaches[mc].file_checksums[mcfn][task], recipename, self.localdirsexclude)
289 for (f,cs) in checksums: 362 for (f,cs) in checksums:
290 self.file_checksum_values[tid].append((f,cs)) 363 self.file_checksum_values[tid].append((f,cs))
291 364
292 taskdep = dataCaches[mc].task_deps[fn] 365 taskdep = dataCaches[mc].task_deps[mcfn]
293 if 'nostamp' in taskdep and task in taskdep['nostamp']: 366 if 'nostamp' in taskdep and task in taskdep['nostamp']:
294 # Nostamp tasks need an implicit taint so that they force any dependent tasks to run 367 # Nostamp tasks need an implicit taint so that they force any dependent tasks to run
295 if tid in self.taints and self.taints[tid].startswith("nostamp:"): 368 if tid in self.taints and self.taints[tid].startswith("nostamp:"):
@@ -300,32 +373,34 @@ class SignatureGeneratorBasic(SignatureGenerator):
300 taint = str(uuid.uuid4()) 373 taint = str(uuid.uuid4())
301 self.taints[tid] = "nostamp:" + taint 374 self.taints[tid] = "nostamp:" + taint
302 375
303 taint = self.read_taint(fn, task, dataCaches[mc].stamp[fn]) 376 taint = self.read_taint(mcfn, task, dataCaches[mc].stamp[mcfn])
304 if taint: 377 if taint:
305 self.taints[tid] = taint 378 self.taints[tid] = taint
306 logger.warning("%s is tainted from a forced run" % tid) 379 logger.warning("%s is tainted from a forced run" % tid)
307 380
308 return 381 return set(dep for _, dep in self.runtaskdeps[tid])
309 382
310 def get_taskhash(self, tid, deps, dataCaches): 383 def get_taskhash(self, tid, deps, dataCaches):
311 384
312 data = self.basehash[tid] 385 data = self.basehash[tid]
313 for dep in self.runtaskdeps[tid]: 386 for dep in sorted(self.runtaskdeps[tid]):
314 data = data + self.get_unihash(dep) 387 data += self.get_unihash(dep[1])
315 388
316 for (f, cs) in self.file_checksum_values[tid]: 389 for (f, cs) in sorted(self.file_checksum_values[tid], key=clean_checksum_file_path):
317 if cs: 390 if cs:
318 data = data + cs 391 if "/./" in f:
392 data += "./" + f.split("/./")[1]
393 data += cs
319 394
320 if tid in self.taints: 395 if tid in self.taints:
321 if self.taints[tid].startswith("nostamp:"): 396 if self.taints[tid].startswith("nostamp:"):
322 data = data + self.taints[tid][8:] 397 data += self.taints[tid][8:]
323 else: 398 else:
324 data = data + self.taints[tid] 399 data += self.taints[tid]
325 400
326 h = hashlib.sha256(data.encode("utf-8")).hexdigest() 401 h = hashlib.sha256(data.encode("utf-8")).hexdigest()
327 self.taskhash[tid] = h 402 self.taskhash[tid] = h
328 #d.setVar("BB_TASKHASH_task-%s" % task, taskhash[task]) 403 #d.setVar("BB_TASKHASH:task-%s" % task, taskhash[task])
329 return h 404 return h
330 405
331 def writeout_file_checksum_cache(self): 406 def writeout_file_checksum_cache(self):
@@ -340,9 +415,9 @@ class SignatureGeneratorBasic(SignatureGenerator):
340 def save_unitaskhashes(self): 415 def save_unitaskhashes(self):
341 self.unihash_cache.save(self.unitaskhashes) 416 self.unihash_cache.save(self.unitaskhashes)
342 417
343 def dump_sigtask(self, fn, task, stampbase, runtime): 418 def dump_sigtask(self, mcfn, task, stampbase, runtime):
344 419 tid = mcfn + ":" + task
345 tid = fn + ":" + task 420 mc = bb.runqueue.mc_from_tid(mcfn)
346 referencestamp = stampbase 421 referencestamp = stampbase
347 if isinstance(runtime, str) and runtime.startswith("customfile"): 422 if isinstance(runtime, str) and runtime.startswith("customfile"):
348 sigfile = stampbase 423 sigfile = stampbase
@@ -357,29 +432,34 @@ class SignatureGeneratorBasic(SignatureGenerator):
357 432
358 data = {} 433 data = {}
359 data['task'] = task 434 data['task'] = task
360 data['basewhitelist'] = self.basewhitelist 435 data['basehash_ignore_vars'] = self.basehash_ignore_vars
361 data['taskwhitelist'] = self.taskwhitelist 436 data['taskhash_ignore_tasks'] = self.taskhash_ignore_tasks
362 data['taskdeps'] = self.taskdeps[fn][task] 437 data['taskdeps'] = self.datacaches[mc].siggen_taskdeps[mcfn][task]
363 data['basehash'] = self.basehash[tid] 438 data['basehash'] = self.basehash[tid]
364 data['gendeps'] = {} 439 data['gendeps'] = {}
365 data['varvals'] = {} 440 data['varvals'] = {}
366 data['varvals'][task] = self.lookupcache[fn][task] 441 data['varvals'][task] = self.datacaches[mc].siggen_varvals[mcfn][task]
367 for dep in self.taskdeps[fn][task]: 442 for dep in self.datacaches[mc].siggen_taskdeps[mcfn][task]:
368 if dep in self.basewhitelist: 443 if dep in self.basehash_ignore_vars:
369 continue 444 continue
370 data['gendeps'][dep] = self.gendeps[fn][dep] 445 data['gendeps'][dep] = self.datacaches[mc].siggen_gendeps[mcfn][dep]
371 data['varvals'][dep] = self.lookupcache[fn][dep] 446 data['varvals'][dep] = self.datacaches[mc].siggen_varvals[mcfn][dep]
372 447
373 if runtime and tid in self.taskhash: 448 if runtime and tid in self.taskhash:
374 data['runtaskdeps'] = self.runtaskdeps[tid] 449 data['runtaskdeps'] = [dep[0] for dep in sorted(self.runtaskdeps[tid])]
375 data['file_checksum_values'] = [(os.path.basename(f), cs) for f,cs in self.file_checksum_values[tid]] 450 data['file_checksum_values'] = []
451 for f,cs in sorted(self.file_checksum_values[tid], key=clean_checksum_file_path):
452 if "/./" in f:
453 data['file_checksum_values'].append(("./" + f.split("/./")[1], cs))
454 else:
455 data['file_checksum_values'].append((os.path.basename(f), cs))
376 data['runtaskhashes'] = {} 456 data['runtaskhashes'] = {}
377 for dep in data['runtaskdeps']: 457 for dep in self.runtaskdeps[tid]:
378 data['runtaskhashes'][dep] = self.get_unihash(dep) 458 data['runtaskhashes'][dep[0]] = self.get_unihash(dep[1])
379 data['taskhash'] = self.taskhash[tid] 459 data['taskhash'] = self.taskhash[tid]
380 data['unihash'] = self.get_unihash(tid) 460 data['unihash'] = self.get_unihash(tid)
381 461
382 taint = self.read_taint(fn, task, referencestamp) 462 taint = self.read_taint(mcfn, task, referencestamp)
383 if taint: 463 if taint:
384 data['taint'] = taint 464 data['taint'] = taint
385 465
@@ -396,13 +476,13 @@ class SignatureGeneratorBasic(SignatureGenerator):
396 bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid)) 476 bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[tid], tid))
397 sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash) 477 sigfile = sigfile.replace(self.taskhash[tid], computed_taskhash)
398 478
399 fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.") 479 fd, tmpfile = bb.utils.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.")
400 try: 480 try:
401 with os.fdopen(fd, "wb") as stream: 481 with bb.compress.zstd.open(fd, "wt", encoding="utf-8", num_threads=1) as f:
402 p = pickle.dump(data, stream, -1) 482 json.dump(data, f, sort_keys=True, separators=(",", ":"), cls=SetEncoder)
403 stream.flush() 483 f.flush()
404 os.chmod(tmpfile, 0o664) 484 os.chmod(tmpfile, 0o664)
405 os.rename(tmpfile, sigfile) 485 bb.utils.rename(tmpfile, sigfile)
406 except (OSError, IOError) as err: 486 except (OSError, IOError) as err:
407 try: 487 try:
408 os.unlink(tmpfile) 488 os.unlink(tmpfile)
@@ -410,18 +490,6 @@ class SignatureGeneratorBasic(SignatureGenerator):
410 pass 490 pass
411 raise err 491 raise err
412 492
413 def dump_sigfn(self, fn, dataCaches, options):
414 if fn in self.taskdeps:
415 for task in self.taskdeps[fn]:
416 tid = fn + ":" + task
417 mc = bb.runqueue.mc_from_tid(tid)
418 if tid not in self.taskhash:
419 continue
420 if dataCaches[mc].basetaskhash[tid] != self.basehash[tid]:
421 bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % tid)
422 bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[tid], self.basehash[tid]))
423 self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True)
424
425class SignatureGeneratorBasicHash(SignatureGeneratorBasic): 493class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
426 name = "basichash" 494 name = "basichash"
427 495
@@ -432,11 +500,11 @@ class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
432 # If task is not in basehash, then error 500 # If task is not in basehash, then error
433 return self.basehash[tid] 501 return self.basehash[tid]
434 502
435 def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False): 503 def stampfile(self, stampbase, mcfn, taskname, extrainfo, clean=False):
436 if taskname != "do_setscene" and taskname.endswith("_setscene"): 504 if taskname.endswith("_setscene"):
437 tid = fn + ":" + taskname[:-9] 505 tid = mcfn + ":" + taskname[:-9]
438 else: 506 else:
439 tid = fn + ":" + taskname 507 tid = mcfn + ":" + taskname
440 if clean: 508 if clean:
441 h = "*" 509 h = "*"
442 else: 510 else:
@@ -444,29 +512,99 @@ class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
444 512
445 return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.') 513 return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.')
446 514
447 def stampcleanmask(self, stampbase, fn, taskname, extrainfo): 515 def stampcleanmask(self, stampbase, mcfn, taskname, extrainfo):
448 return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True) 516 return self.stampfile(stampbase, mcfn, taskname, extrainfo, clean=True)
517
518 def invalidate_task(self, task, mcfn):
519 bb.note("Tainting hash to force rebuild of task %s, %s" % (mcfn, task))
449 520
450 def invalidate_task(self, task, d, fn): 521 mc = bb.runqueue.mc_from_tid(mcfn)
451 bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task)) 522 stamp = self.datacaches[mc].stamp[mcfn]
452 bb.build.write_taint(task, d, fn) 523
524 taintfn = stamp + '.' + task + '.taint'
525
526 import uuid
527 bb.utils.mkdirhier(os.path.dirname(taintfn))
528 # The specific content of the taint file is not really important,
529 # we just need it to be random, so a random UUID is used
530 with open(taintfn, 'w') as taintf:
531 taintf.write(str(uuid.uuid4()))
453 532
454class SignatureGeneratorUniHashMixIn(object): 533class SignatureGeneratorUniHashMixIn(object):
455 def __init__(self, data): 534 def __init__(self, data):
456 self.extramethod = {} 535 self.extramethod = {}
536 # NOTE: The cache only tracks hashes that exist. Hashes that don't
537 # exist are always queried from the server since it is possible for
538 # hashes to appear over time, but much less likely for them to
539 # disappear
540 self.unihash_exists_cache = set()
541 self.username = None
542 self.password = None
543 self.env = {}
544
545 origenv = data.getVar("BB_ORIGENV")
546 for e in HASHSERV_ENVVARS:
547 value = data.getVar(e)
548 if not value and origenv:
549 value = origenv.getVar(e)
550 if value:
551 self.env[e] = value
457 super().__init__(data) 552 super().__init__(data)
458 553
459 def get_taskdata(self): 554 def get_taskdata(self):
460 return (self.server, self.method, self.extramethod) + super().get_taskdata() 555 return (self.server, self.method, self.extramethod, self.username, self.password, self.env) + super().get_taskdata()
461 556
462 def set_taskdata(self, data): 557 def set_taskdata(self, data):
463 self.server, self.method, self.extramethod = data[:3] 558 self.server, self.method, self.extramethod, self.username, self.password, self.env = data[:6]
464 super().set_taskdata(data[3:]) 559 super().set_taskdata(data[6:])
560
561 def get_hashserv_creds(self):
562 if self.username and self.password:
563 return {
564 "username": self.username,
565 "password": self.password,
566 }
567
568 return {}
569
570 @contextmanager
571 def _client_env(self):
572 orig_env = os.environ.copy()
573 try:
574 for k, v in self.env.items():
575 os.environ[k] = v
576
577 yield
578 finally:
579 for k, v in self.env.items():
580 if k in orig_env:
581 os.environ[k] = orig_env[k]
582 else:
583 del os.environ[k]
465 584
585 @contextmanager
466 def client(self): 586 def client(self):
467 if getattr(self, '_client', None) is None: 587 with self._client_env():
468 self._client = hashserv.create_client(self.server) 588 if getattr(self, '_client', None) is None:
469 return self._client 589 self._client = hashserv.create_client(self.server, **self.get_hashserv_creds())
590 yield self._client
591
592 def reset(self, data):
593 self.__close_clients()
594 return super().reset(data)
595
596 def exit(self):
597 self.__close_clients()
598 return super().exit()
599
600 def __close_clients(self):
601 with self._client_env():
602 if getattr(self, '_client', None) is not None:
603 self._client.close()
604 self._client = None
605 if getattr(self, '_client_pool', None) is not None:
606 self._client_pool.close()
607 self._client_pool = None
470 608
471 def get_stampfile_hash(self, tid): 609 def get_stampfile_hash(self, tid):
472 if tid in self.taskhash: 610 if tid in self.taskhash:
@@ -499,7 +637,7 @@ class SignatureGeneratorUniHashMixIn(object):
499 return None 637 return None
500 return unihash 638 return unihash
501 639
502 def get_unihash(self, tid): 640 def get_cached_unihash(self, tid):
503 taskhash = self.taskhash[tid] 641 taskhash = self.taskhash[tid]
504 642
505 # If its not a setscene task we can return 643 # If its not a setscene task we can return
@@ -514,40 +652,96 @@ class SignatureGeneratorUniHashMixIn(object):
514 self.unihash[tid] = unihash 652 self.unihash[tid] = unihash
515 return unihash 653 return unihash
516 654
517 # In the absence of being able to discover a unique hash from the 655 return None
518 # server, make it be equivalent to the taskhash. The unique "hash" only
519 # really needs to be a unique string (not even necessarily a hash), but
520 # making it match the taskhash has a few advantages:
521 #
522 # 1) All of the sstate code that assumes hashes can be the same
523 # 2) It provides maximal compatibility with builders that don't use
524 # an equivalency server
525 # 3) The value is easy for multiple independent builders to derive the
526 # same unique hash from the same input. This means that if the
527 # independent builders find the same taskhash, but it isn't reported
528 # to the server, there is a better chance that they will agree on
529 # the unique hash.
530 unihash = taskhash
531 656
532 try: 657 def _get_method(self, tid):
533 method = self.method 658 method = self.method
534 if tid in self.extramethod: 659 if tid in self.extramethod:
535 method = method + self.extramethod[tid] 660 method = method + self.extramethod[tid]
536 data = self.client().get_unihash(method, self.taskhash[tid]) 661
537 if data: 662 return method
538 unihash = data 663
664 def unihashes_exist(self, query):
665 if len(query) == 0:
666 return {}
667
668 query_keys = []
669 result = {}
670 for key, unihash in query.items():
671 if unihash in self.unihash_exists_cache:
672 result[key] = True
673 else:
674 query_keys.append(key)
675
676 if query_keys:
677 with self.client() as client:
678 query_result = client.unihash_exists_batch(query[k] for k in query_keys)
679
680 for idx, key in enumerate(query_keys):
681 exists = query_result[idx]
682 if exists:
683 self.unihash_exists_cache.add(query[key])
684 result[key] = exists
685
686 return result
687
688 def get_unihash(self, tid):
689 return self.get_unihashes([tid])[tid]
690
691 def get_unihashes(self, tids):
692 """
693 For a iterable of tids, returns a dictionary that maps each tid to a
694 unihash
695 """
696 result = {}
697 query_tids = []
698
699 for tid in tids:
700 unihash = self.get_cached_unihash(tid)
701 if unihash:
702 result[tid] = unihash
703 else:
704 query_tids.append(tid)
705
706 if query_tids:
707 unihashes = []
708 try:
709 with self.client() as client:
710 unihashes = client.get_unihash_batch((self._get_method(tid), self.taskhash[tid]) for tid in query_tids)
711 except (ConnectionError, FileNotFoundError) as e:
712 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
713
714 for idx, tid in enumerate(query_tids):
715 # In the absence of being able to discover a unique hash from the
716 # server, make it be equivalent to the taskhash. The unique "hash" only
717 # really needs to be a unique string (not even necessarily a hash), but
718 # making it match the taskhash has a few advantages:
719 #
720 # 1) All of the sstate code that assumes hashes can be the same
721 # 2) It provides maximal compatibility with builders that don't use
722 # an equivalency server
723 # 3) The value is easy for multiple independent builders to derive the
724 # same unique hash from the same input. This means that if the
725 # independent builders find the same taskhash, but it isn't reported
726 # to the server, there is a better chance that they will agree on
727 # the unique hash.
728 taskhash = self.taskhash[tid]
729
730 if unihashes and unihashes[idx]:
731 unihash = unihashes[idx]
539 # A unique hash equal to the taskhash is not very interesting, 732 # A unique hash equal to the taskhash is not very interesting,
540 # so it is reported it at debug level 2. If they differ, that 733 # so it is reported it at debug level 2. If they differ, that
541 # is much more interesting, so it is reported at debug level 1 734 # is much more interesting, so it is reported at debug level 1
542 hashequiv_logger.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server)) 735 hashequiv_logger.bbdebug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
543 else: 736 else:
544 hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server)) 737 hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
545 except hashserv.client.HashConnectionError as e: 738 unihash = taskhash
546 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
547 739
548 self.set_unihash(tid, unihash) 740 self.set_unihash(tid, unihash)
549 self.unihash[tid] = unihash 741 self.unihash[tid] = unihash
550 return unihash 742 result[tid] = unihash
743
744 return result
551 745
552 def report_unihash(self, path, task, d): 746 def report_unihash(self, path, task, d):
553 import importlib 747 import importlib
@@ -556,14 +750,14 @@ class SignatureGeneratorUniHashMixIn(object):
556 unihash = d.getVar('BB_UNIHASH') 750 unihash = d.getVar('BB_UNIHASH')
557 report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1' 751 report_taskdata = d.getVar('SSTATE_HASHEQUIV_REPORT_TASKDATA') == '1'
558 tempdir = d.getVar('T') 752 tempdir = d.getVar('T')
559 fn = d.getVar('BB_FILENAME') 753 mcfn = d.getVar('BB_FILENAME')
560 tid = fn + ':do_' + task 754 tid = mcfn + ':do_' + task
561 key = tid + ':' + taskhash 755 key = tid + ':' + taskhash
562 756
563 if self.setscenetasks and tid not in self.setscenetasks: 757 if self.setscenetasks and tid not in self.setscenetasks:
564 return 758 return
565 759
566 # This can happen if locked sigs are in action. Detect and just abort 760 # This can happen if locked sigs are in action. Detect and just exit
567 if taskhash != self.taskhash[tid]: 761 if taskhash != self.taskhash[tid]:
568 return 762 return
569 763
@@ -611,17 +805,19 @@ class SignatureGeneratorUniHashMixIn(object):
611 if tid in self.extramethod: 805 if tid in self.extramethod:
612 method = method + self.extramethod[tid] 806 method = method + self.extramethod[tid]
613 807
614 data = self.client().report_unihash(taskhash, method, outhash, unihash, extra_data) 808 with self.client() as client:
809 data = client.report_unihash(taskhash, method, outhash, unihash, extra_data)
810
615 new_unihash = data['unihash'] 811 new_unihash = data['unihash']
616 812
617 if new_unihash != unihash: 813 if new_unihash != unihash:
618 hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server)) 814 hashequiv_logger.debug('Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
619 bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d) 815 bb.event.fire(bb.runqueue.taskUniHashUpdate(mcfn + ':do_' + task, new_unihash), d)
620 self.set_unihash(tid, new_unihash) 816 self.set_unihash(tid, new_unihash)
621 d.setVar('BB_UNIHASH', new_unihash) 817 d.setVar('BB_UNIHASH', new_unihash)
622 else: 818 else:
623 hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server)) 819 hashequiv_logger.debug('Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
624 except hashserv.client.HashConnectionError as e: 820 except (ConnectionError, FileNotFoundError) as e:
625 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 821 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
626 finally: 822 finally:
627 if sigfile: 823 if sigfile:
@@ -642,7 +838,9 @@ class SignatureGeneratorUniHashMixIn(object):
642 if tid in self.extramethod: 838 if tid in self.extramethod:
643 method = method + self.extramethod[tid] 839 method = method + self.extramethod[tid]
644 840
645 data = self.client().report_unihash_equiv(taskhash, method, wanted_unihash, extra_data) 841 with self.client() as client:
842 data = client.report_unihash_equiv(taskhash, method, wanted_unihash, extra_data)
843
646 hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data))) 844 hashequiv_logger.verbose('Reported task %s as unihash %s to %s (%s)' % (tid, wanted_unihash, self.server, str(data)))
647 845
648 if data is None: 846 if data is None:
@@ -661,7 +859,7 @@ class SignatureGeneratorUniHashMixIn(object):
661 # TODO: What to do here? 859 # TODO: What to do here?
662 hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash)) 860 hashequiv_logger.verbose('Task %s unihash reported as unwanted hash %s' % (tid, finalunihash))
663 861
664 except hashserv.client.HashConnectionError as e: 862 except (ConnectionError, FileNotFoundError) as e:
665 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 863 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
666 864
667 return False 865 return False
@@ -676,19 +874,18 @@ class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureG
676 self.server = data.getVar('BB_HASHSERVE') 874 self.server = data.getVar('BB_HASHSERVE')
677 self.method = "sstate_output_hash" 875 self.method = "sstate_output_hash"
678 876
679# 877def clean_checksum_file_path(file_checksum_tuple):
680# Dummy class used for bitbake-selftest 878 f, cs = file_checksum_tuple
681# 879 if "/./" in f:
682class SignatureGeneratorTestMulticonfigDepends(SignatureGeneratorBasicHash): 880 return "./" + f.split("/./")[1]
683 name = "TestMulticonfigDepends" 881 return os.path.basename(f)
684 supports_multiconfig_datacaches = True
685 882
686def dump_this_task(outfile, d): 883def dump_this_task(outfile, d):
687 import bb.parse 884 import bb.parse
688 fn = d.getVar("BB_FILENAME") 885 mcfn = d.getVar("BB_FILENAME")
689 task = "do_" + d.getVar("BB_CURRENTTASK") 886 task = "do_" + d.getVar("BB_CURRENTTASK")
690 referencestamp = bb.build.stamp_internal(task, d, None, True) 887 referencestamp = bb.parse.siggen.stampfile_base(mcfn)
691 bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp) 888 bb.parse.siggen.dump_sigtask(mcfn, task, outfile, "customfile:" + referencestamp)
692 889
693def init_colors(enable_color): 890def init_colors(enable_color):
694 """Initialise colour dict for passing to compare_sigfiles()""" 891 """Initialise colour dict for passing to compare_sigfiles()"""
@@ -741,38 +938,15 @@ def list_inline_diff(oldlist, newlist, colors=None):
741 ret.append(item) 938 ret.append(item)
742 return '[%s]' % (', '.join(ret)) 939 return '[%s]' % (', '.join(ret))
743 940
744def clean_basepath(basepath): 941# Handled renamed fields
745 basepath, dir, recipe_task = basepath.rsplit("/", 2) 942def handle_renames(data):
746 cleaned = dir + '/' + recipe_task 943 if 'basewhitelist' in data:
747 944 data['basehash_ignore_vars'] = data['basewhitelist']
748 if basepath[0] == '/': 945 del data['basewhitelist']
749 return cleaned 946 if 'taskwhitelist' in data:
750 947 data['taskhash_ignore_tasks'] = data['taskwhitelist']
751 if basepath.startswith("mc:") and basepath.count(':') >= 2: 948 del data['taskwhitelist']
752 mc, mc_name, basepath = basepath.split(":", 2)
753 mc_suffix = ':mc:' + mc_name
754 else:
755 mc_suffix = ''
756
757 # mc stuff now removed from basepath. Whatever was next, if present will be the first
758 # suffix. ':/', recipe path start, marks the end of this. Something like
759 # 'virtual:a[:b[:c]]:/path...' (b and c being optional)
760 if basepath[0] != '/':
761 cleaned += ':' + basepath.split(':/', 1)[0]
762
763 return cleaned + mc_suffix
764 949
765def clean_basepaths(a):
766 b = {}
767 for x in a:
768 b[clean_basepath(x)] = a[x]
769 return b
770
771def clean_basepaths_list(a):
772 b = []
773 for x in a:
774 b.append(clean_basepath(x))
775 return b
776 950
777def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False): 951def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
778 output = [] 952 output = []
@@ -794,20 +968,29 @@ def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
794 formatparams.update(values) 968 formatparams.update(values)
795 return formatstr.format(**formatparams) 969 return formatstr.format(**formatparams)
796 970
797 with open(a, 'rb') as f: 971 try:
798 p1 = pickle.Unpickler(f) 972 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
799 a_data = p1.load() 973 a_data = json.load(f, object_hook=SetDecoder)
800 with open(b, 'rb') as f: 974 except (TypeError, OSError) as err:
801 p2 = pickle.Unpickler(f) 975 bb.error("Failed to open sigdata file '%s': %s" % (a, str(err)))
802 b_data = p2.load() 976 raise err
803 977 try:
804 def dict_diff(a, b, whitelist=set()): 978 with bb.compress.zstd.open(b, "rt", encoding="utf-8", num_threads=1) as f:
979 b_data = json.load(f, object_hook=SetDecoder)
980 except (TypeError, OSError) as err:
981 bb.error("Failed to open sigdata file '%s': %s" % (b, str(err)))
982 raise err
983
984 for data in [a_data, b_data]:
985 handle_renames(data)
986
987 def dict_diff(a, b, ignored_vars=set()):
805 sa = set(a.keys()) 988 sa = set(a.keys())
806 sb = set(b.keys()) 989 sb = set(b.keys())
807 common = sa & sb 990 common = sa & sb
808 changed = set() 991 changed = set()
809 for i in common: 992 for i in common:
810 if a[i] != b[i] and i not in whitelist: 993 if a[i] != b[i] and i not in ignored_vars:
811 changed.add(i) 994 changed.add(i)
812 added = sb - sa 995 added = sb - sa
813 removed = sa - sb 996 removed = sa - sb
@@ -815,11 +998,11 @@ def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
815 998
816 def file_checksums_diff(a, b): 999 def file_checksums_diff(a, b):
817 from collections import Counter 1000 from collections import Counter
818 # Handle old siginfo format 1001
819 if isinstance(a, dict): 1002 # Convert lists back to tuples
820 a = [(os.path.basename(f), cs) for f, cs in a.items()] 1003 a = [(f[0], f[1]) for f in a]
821 if isinstance(b, dict): 1004 b = [(f[0], f[1]) for f in b]
822 b = [(os.path.basename(f), cs) for f, cs in b.items()] 1005
823 # Compare lists, ensuring we can handle duplicate filenames if they exist 1006 # Compare lists, ensuring we can handle duplicate filenames if they exist
824 removedcount = Counter(a) 1007 removedcount = Counter(a)
825 removedcount.subtract(b) 1008 removedcount.subtract(b)
@@ -846,15 +1029,15 @@ def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
846 removed = [x[0] for x in removed] 1029 removed = [x[0] for x in removed]
847 return changed, added, removed 1030 return changed, added, removed
848 1031
849 if 'basewhitelist' in a_data and a_data['basewhitelist'] != b_data['basewhitelist']: 1032 if 'basehash_ignore_vars' in a_data and a_data['basehash_ignore_vars'] != b_data['basehash_ignore_vars']:
850 output.append(color_format("{color_title}basewhitelist changed{color_default} from '%s' to '%s'") % (a_data['basewhitelist'], b_data['basewhitelist'])) 1033 output.append(color_format("{color_title}basehash_ignore_vars changed{color_default} from '%s' to '%s'") % (a_data['basehash_ignore_vars'], b_data['basehash_ignore_vars']))
851 if a_data['basewhitelist'] and b_data['basewhitelist']: 1034 if a_data['basehash_ignore_vars'] and b_data['basehash_ignore_vars']:
852 output.append("changed items: %s" % a_data['basewhitelist'].symmetric_difference(b_data['basewhitelist'])) 1035 output.append("changed items: %s" % a_data['basehash_ignore_vars'].symmetric_difference(b_data['basehash_ignore_vars']))
853 1036
854 if 'taskwhitelist' in a_data and a_data['taskwhitelist'] != b_data['taskwhitelist']: 1037 if 'taskhash_ignore_tasks' in a_data and a_data['taskhash_ignore_tasks'] != b_data['taskhash_ignore_tasks']:
855 output.append(color_format("{color_title}taskwhitelist changed{color_default} from '%s' to '%s'") % (a_data['taskwhitelist'], b_data['taskwhitelist'])) 1038 output.append(color_format("{color_title}taskhash_ignore_tasks changed{color_default} from '%s' to '%s'") % (a_data['taskhash_ignore_tasks'], b_data['taskhash_ignore_tasks']))
856 if a_data['taskwhitelist'] and b_data['taskwhitelist']: 1039 if a_data['taskhash_ignore_tasks'] and b_data['taskhash_ignore_tasks']:
857 output.append("changed items: %s" % a_data['taskwhitelist'].symmetric_difference(b_data['taskwhitelist'])) 1040 output.append("changed items: %s" % a_data['taskhash_ignore_tasks'].symmetric_difference(b_data['taskhash_ignore_tasks']))
858 1041
859 if a_data['taskdeps'] != b_data['taskdeps']: 1042 if a_data['taskdeps'] != b_data['taskdeps']:
860 output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps']))) 1043 output.append(color_format("{color_title}Task dependencies changed{color_default} from:\n%s\nto:\n%s") % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps'])))
@@ -862,23 +1045,23 @@ def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
862 if a_data['basehash'] != b_data['basehash'] and not collapsed: 1045 if a_data['basehash'] != b_data['basehash'] and not collapsed:
863 output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash'])) 1046 output.append(color_format("{color_title}basehash changed{color_default} from %s to %s") % (a_data['basehash'], b_data['basehash']))
864 1047
865 changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basewhitelist'] & b_data['basewhitelist']) 1048 changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basehash_ignore_vars'] & b_data['basehash_ignore_vars'])
866 if changed: 1049 if changed:
867 for dep in changed: 1050 for dep in sorted(changed):
868 output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep])) 1051 output.append(color_format("{color_title}List of dependencies for variable %s changed from '{color_default}%s{color_title}' to '{color_default}%s{color_title}'") % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep]))
869 if a_data['gendeps'][dep] and b_data['gendeps'][dep]: 1052 if a_data['gendeps'][dep] and b_data['gendeps'][dep]:
870 output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep])) 1053 output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep]))
871 if added: 1054 if added:
872 for dep in added: 1055 for dep in sorted(added):
873 output.append(color_format("{color_title}Dependency on variable %s was added") % (dep)) 1056 output.append(color_format("{color_title}Dependency on variable %s was added") % (dep))
874 if removed: 1057 if removed:
875 for dep in removed: 1058 for dep in sorted(removed):
876 output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep)) 1059 output.append(color_format("{color_title}Dependency on Variable %s was removed") % (dep))
877 1060
878 1061
879 changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals']) 1062 changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals'])
880 if changed: 1063 if changed:
881 for dep in changed: 1064 for dep in sorted(changed):
882 oldval = a_data['varvals'][dep] 1065 oldval = a_data['varvals'][dep]
883 newval = b_data['varvals'][dep] 1066 newval = b_data['varvals'][dep]
884 if newval and oldval and ('\n' in oldval or '\n' in newval): 1067 if newval and oldval and ('\n' in oldval or '\n' in newval):
@@ -902,9 +1085,9 @@ def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
902 output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval)) 1085 output.append(color_format("{color_title}Variable {var} value changed from '{color_default}{oldval}{color_title}' to '{color_default}{newval}{color_title}'{color_default}", var=dep, oldval=oldval, newval=newval))
903 1086
904 if not 'file_checksum_values' in a_data: 1087 if not 'file_checksum_values' in a_data:
905 a_data['file_checksum_values'] = {} 1088 a_data['file_checksum_values'] = []
906 if not 'file_checksum_values' in b_data: 1089 if not 'file_checksum_values' in b_data:
907 b_data['file_checksum_values'] = {} 1090 b_data['file_checksum_values'] = []
908 1091
909 changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values']) 1092 changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
910 if changed: 1093 if changed:
@@ -931,11 +1114,11 @@ def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
931 a = a_data['runtaskdeps'][idx] 1114 a = a_data['runtaskdeps'][idx]
932 b = b_data['runtaskdeps'][idx] 1115 b = b_data['runtaskdeps'][idx]
933 if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed: 1116 if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b] and not collapsed:
934 changed.append("%s with hash %s\n changed to\n%s with hash %s" % (clean_basepath(a), a_data['runtaskhashes'][a], clean_basepath(b), b_data['runtaskhashes'][b])) 1117 changed.append("%s with hash %s\n changed to\n%s with hash %s" % (a, a_data['runtaskhashes'][a], b, b_data['runtaskhashes'][b]))
935 1118
936 if changed: 1119 if changed:
937 clean_a = clean_basepaths_list(a_data['runtaskdeps']) 1120 clean_a = a_data['runtaskdeps']
938 clean_b = clean_basepaths_list(b_data['runtaskdeps']) 1121 clean_b = b_data['runtaskdeps']
939 if clean_a != clean_b: 1122 if clean_a != clean_b:
940 output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors)) 1123 output.append(color_format("{color_title}runtaskdeps changed:{color_default}\n%s") % list_inline_diff(clean_a, clean_b, colors))
941 else: 1124 else:
@@ -948,7 +1131,7 @@ def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
948 b = b_data['runtaskhashes'] 1131 b = b_data['runtaskhashes']
949 changed, added, removed = dict_diff(a, b) 1132 changed, added, removed = dict_diff(a, b)
950 if added: 1133 if added:
951 for dep in added: 1134 for dep in sorted(added):
952 bdep_found = False 1135 bdep_found = False
953 if removed: 1136 if removed:
954 for bdep in removed: 1137 for bdep in removed:
@@ -956,9 +1139,9 @@ def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
956 #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep)) 1139 #output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep))
957 bdep_found = True 1140 bdep_found = True
958 if not bdep_found: 1141 if not bdep_found:
959 output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (clean_basepath(dep), b[dep])) 1142 output.append(color_format("{color_title}Dependency on task %s was added{color_default} with hash %s") % (dep, b[dep]))
960 if removed: 1143 if removed:
961 for dep in removed: 1144 for dep in sorted(removed):
962 adep_found = False 1145 adep_found = False
963 if added: 1146 if added:
964 for adep in added: 1147 for adep in added:
@@ -966,11 +1149,11 @@ def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
966 #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep)) 1149 #output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep))
967 adep_found = True 1150 adep_found = True
968 if not adep_found: 1151 if not adep_found:
969 output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (clean_basepath(dep), a[dep])) 1152 output.append(color_format("{color_title}Dependency on task %s was removed{color_default} with hash %s") % (dep, a[dep]))
970 if changed: 1153 if changed:
971 for dep in changed: 1154 for dep in sorted(changed):
972 if not collapsed: 1155 if not collapsed:
973 output.append(color_format("{color_title}Hash for dependent task %s changed{color_default} from %s to %s") % (clean_basepath(dep), a[dep], b[dep])) 1156 output.append(color_format("{color_title}Hash for task dependency %s changed{color_default} from %s to %s") % (dep, a[dep], b[dep]))
974 if callable(recursecb): 1157 if callable(recursecb):
975 recout = recursecb(dep, a[dep], b[dep]) 1158 recout = recursecb(dep, a[dep], b[dep])
976 if recout: 1159 if recout:
@@ -980,6 +1163,7 @@ def compare_sigfiles(a, b, recursecb=None, color=False, collapsed=False):
980 # If a dependent hash changed, might as well print the line above and then defer to the changes in 1163 # If a dependent hash changed, might as well print the line above and then defer to the changes in
981 # that hash since in all likelyhood, they're the same changes this task also saw. 1164 # that hash since in all likelyhood, they're the same changes this task also saw.
982 output = [output[-1]] + recout 1165 output = [output[-1]] + recout
1166 break
983 1167
984 a_taint = a_data.get('taint', None) 1168 a_taint = a_data.get('taint', None)
985 b_taint = b_data.get('taint', None) 1169 b_taint = b_data.get('taint', None)
@@ -1001,7 +1185,7 @@ def calc_basehash(sigdata):
1001 basedata = '' 1185 basedata = ''
1002 1186
1003 alldeps = sigdata['taskdeps'] 1187 alldeps = sigdata['taskdeps']
1004 for dep in alldeps: 1188 for dep in sorted(alldeps):
1005 basedata = basedata + dep 1189 basedata = basedata + dep
1006 val = sigdata['varvals'][dep] 1190 val = sigdata['varvals'][dep]
1007 if val is not None: 1191 if val is not None:
@@ -1017,6 +1201,8 @@ def calc_taskhash(sigdata):
1017 1201
1018 for c in sigdata['file_checksum_values']: 1202 for c in sigdata['file_checksum_values']:
1019 if c[1]: 1203 if c[1]:
1204 if "./" in c[0]:
1205 data = data + c[0]
1020 data = data + c[1] 1206 data = data + c[1]
1021 1207
1022 if 'taint' in sigdata: 1208 if 'taint' in sigdata:
@@ -1031,32 +1217,37 @@ def calc_taskhash(sigdata):
1031def dump_sigfile(a): 1217def dump_sigfile(a):
1032 output = [] 1218 output = []
1033 1219
1034 with open(a, 'rb') as f: 1220 try:
1035 p1 = pickle.Unpickler(f) 1221 with bb.compress.zstd.open(a, "rt", encoding="utf-8", num_threads=1) as f:
1036 a_data = p1.load() 1222 a_data = json.load(f, object_hook=SetDecoder)
1223 except (TypeError, OSError) as err:
1224 bb.error("Failed to open sigdata file '%s': %s" % (a, str(err)))
1225 raise err
1226
1227 handle_renames(a_data)
1037 1228
1038 output.append("basewhitelist: %s" % (a_data['basewhitelist'])) 1229 output.append("basehash_ignore_vars: %s" % (sorted(a_data['basehash_ignore_vars'])))
1039 1230
1040 output.append("taskwhitelist: %s" % (a_data['taskwhitelist'])) 1231 output.append("taskhash_ignore_tasks: %s" % (sorted(a_data['taskhash_ignore_tasks'] or [])))
1041 1232
1042 output.append("Task dependencies: %s" % (sorted(a_data['taskdeps']))) 1233 output.append("Task dependencies: %s" % (sorted(a_data['taskdeps'])))
1043 1234
1044 output.append("basehash: %s" % (a_data['basehash'])) 1235 output.append("basehash: %s" % (a_data['basehash']))
1045 1236
1046 for dep in a_data['gendeps']: 1237 for dep in sorted(a_data['gendeps']):
1047 output.append("List of dependencies for variable %s is %s" % (dep, a_data['gendeps'][dep])) 1238 output.append("List of dependencies for variable %s is %s" % (dep, sorted(a_data['gendeps'][dep])))
1048 1239
1049 for dep in a_data['varvals']: 1240 for dep in sorted(a_data['varvals']):
1050 output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep])) 1241 output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep]))
1051 1242
1052 if 'runtaskdeps' in a_data: 1243 if 'runtaskdeps' in a_data:
1053 output.append("Tasks this task depends on: %s" % (a_data['runtaskdeps'])) 1244 output.append("Tasks this task depends on: %s" % (sorted(a_data['runtaskdeps'])))
1054 1245
1055 if 'file_checksum_values' in a_data: 1246 if 'file_checksum_values' in a_data:
1056 output.append("This task depends on the checksums of files: %s" % (a_data['file_checksum_values'])) 1247 output.append("This task depends on the checksums of files: %s" % (sorted(a_data['file_checksum_values'])))
1057 1248
1058 if 'runtaskhashes' in a_data: 1249 if 'runtaskhashes' in a_data:
1059 for dep in a_data['runtaskhashes']: 1250 for dep in sorted(a_data['runtaskhashes']):
1060 output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep])) 1251 output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep]))
1061 1252
1062 if 'taint' in a_data: 1253 if 'taint' in a_data: