summaryrefslogtreecommitdiffstats
path: root/bitbake/lib/bb/siggen.py
diff options
context:
space:
mode:
authorRichard Purdie <richard.purdie@linuxfoundation.org>2019-12-12 18:07:14 +0000
committerRichard Purdie <richard.purdie@linuxfoundation.org>2020-02-08 16:01:33 +0000
commitcedfbac46670a3371bbddaf8736eb8e8555e2ce8 (patch)
treec153070ff7182126f7fbf0af237cccec52cd258c /bitbake/lib/bb/siggen.py
parent306820bad60c62a8096df8d1e4c7cc8e00dc38b7 (diff)
downloadpoky-cedfbac46670a3371bbddaf8736eb8e8555e2ce8.tar.gz
bitbake: siggen: Optimise get_unihash disk based cache handling
Currently the cache can grow huge since any previously used hash is retained in the cache. This change moves to use one hash per task which improves the speed of the functions considerably. Currently performance is an issue, as are very large cache files and cache load time. By moving to a single hash per task, the shorted filename as a key is no longer usable as the same recipe has multiple variants for the same filename so this has to change. (Bitbake rev: ed764e7fcf04b6d0ba6b4cac7415b1ee8f492865) Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'bitbake/lib/bb/siggen.py')
-rw-r--r--bitbake/lib/bb/siggen.py50
1 files changed, 32 insertions, 18 deletions
diff --git a/bitbake/lib/bb/siggen.py b/bitbake/lib/bb/siggen.py
index ded1da020f..209a342883 100644
--- a/bitbake/lib/bb/siggen.py
+++ b/bitbake/lib/bb/siggen.py
@@ -44,6 +44,7 @@ class SignatureGenerator(object):
44 self.file_checksum_values = {} 44 self.file_checksum_values = {}
45 self.taints = {} 45 self.taints = {}
46 self.unitaskhashes = {} 46 self.unitaskhashes = {}
47 self.tidtopn = {}
47 self.setscenetasks = set() 48 self.setscenetasks = set()
48 49
49 def finalise(self, fn, d, varient): 50 def finalise(self, fn, d, varient):
@@ -79,19 +80,19 @@ class SignatureGenerator(object):
79 return 80 return
80 81
81 def get_taskdata(self): 82 def get_taskdata(self):
82 return (self.runtaskdeps, self.taskhash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.setscenetasks) 83 return (self.runtaskdeps, self.taskhash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks)
83 84
84 def set_taskdata(self, data): 85 def set_taskdata(self, data):
85 self.runtaskdeps, self.taskhash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.setscenetasks = data 86 self.runtaskdeps, self.taskhash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data
86 87
87 def reset(self, data): 88 def reset(self, data):
88 self.__init__(data) 89 self.__init__(data)
89 90
90 def get_taskhashes(self): 91 def get_taskhashes(self):
91 return self.taskhash, self.unitaskhashes 92 return self.taskhash, self.unitaskhashes, self.tidtopn
92 93
93 def set_taskhashes(self, hashes): 94 def set_taskhashes(self, hashes):
94 self.taskhash, self.unitaskhashes = hashes 95 self.taskhash, self.unitaskhashes, self.tidtopn = hashes
95 96
96 def save_unitaskhashes(self): 97 def save_unitaskhashes(self):
97 return 98 return
@@ -124,9 +125,10 @@ class SignatureGeneratorBasic(SignatureGenerator):
124 else: 125 else:
125 self.checksum_cache = None 126 self.checksum_cache = None
126 127
127 self.unihash_cache = bb.cache.SimpleCache("1") 128 self.unihash_cache = bb.cache.SimpleCache("3")
128 self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {}) 129 self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {})
129 self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split() 130 self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split()
131 self.tidtopn = {}
130 132
131 def init_rundepcheck(self, data): 133 def init_rundepcheck(self, data):
132 self.taskwhitelist = data.getVar("BB_HASHTASK_WHITELIST") or None 134 self.taskwhitelist = data.getVar("BB_HASHTASK_WHITELIST") or None
@@ -210,6 +212,9 @@ class SignatureGeneratorBasic(SignatureGenerator):
210 self.runtaskdeps[tid] = [] 212 self.runtaskdeps[tid] = []
211 self.file_checksum_values[tid] = [] 213 self.file_checksum_values[tid] = []
212 recipename = dataCache.pkg_fn[fn] 214 recipename = dataCache.pkg_fn[fn]
215
216 self.tidtopn[tid] = recipename
217
213 for dep in sorted(deps, key=clean_basepath): 218 for dep in sorted(deps, key=clean_basepath):
214 (depmc, _, deptaskname, depfn) = bb.runqueue.split_tid_mcfn(dep) 219 (depmc, _, deptaskname, depfn) = bb.runqueue.split_tid_mcfn(dep)
215 if mc != depmc: 220 if mc != depmc:
@@ -407,24 +412,35 @@ class SignatureGeneratorUniHashMixIn(object):
407 self._client = hashserv.create_client(self.server) 412 self._client = hashserv.create_client(self.server)
408 return self._client 413 return self._client
409 414
410 def __get_task_unihash_key(self, tid):
411 # TODO: The key only *needs* to be the taskhash, the tid is just
412 # convenient
413 return '%s:%s' % (tid.rsplit("/", 1)[1], self.taskhash[tid])
414
415 def get_stampfile_hash(self, tid): 415 def get_stampfile_hash(self, tid):
416 if tid in self.taskhash: 416 if tid in self.taskhash:
417 # If a unique hash is reported, use it as the stampfile hash. This 417 # If a unique hash is reported, use it as the stampfile hash. This
418 # ensures that if a task won't be re-run if the taskhash changes, 418 # ensures that if a task won't be re-run if the taskhash changes,
419 # but it would result in the same output hash 419 # but it would result in the same output hash
420 unihash = self.unitaskhashes.get(self.__get_task_unihash_key(tid), None) 420 unihash = self._get_unihash(tid)
421 if unihash is not None: 421 if unihash is not None:
422 return unihash 422 return unihash
423 423
424 return super().get_stampfile_hash(tid) 424 return super().get_stampfile_hash(tid)
425 425
426 def set_unihash(self, tid, unihash): 426 def set_unihash(self, tid, unihash):
427 self.unitaskhashes[self.__get_task_unihash_key(tid)] = unihash 427 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
428 key = mc + ":" + self.tidtopn[tid] + ":" + taskname
429 self.unitaskhashes[key] = (self.taskhash[tid], unihash)
430
431 def _get_unihash(self, tid, checkkey=None):
432 if tid not in self.tidtopn:
433 return None
434 (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid)
435 key = mc + ":" + self.tidtopn[tid] + ":" + taskname
436 if key not in self.unitaskhashes:
437 return None
438 if not checkkey:
439 checkkey = self.taskhash[tid]
440 (key, unihash) = self.unitaskhashes[key]
441 if key != checkkey:
442 return None
443 return unihash
428 444
429 def get_unihash(self, tid): 445 def get_unihash(self, tid):
430 taskhash = self.taskhash[tid] 446 taskhash = self.taskhash[tid]
@@ -433,11 +449,9 @@ class SignatureGeneratorUniHashMixIn(object):
433 if self.setscenetasks and tid not in self.setscenetasks: 449 if self.setscenetasks and tid not in self.setscenetasks:
434 return taskhash 450 return taskhash
435 451
436 key = self.__get_task_unihash_key(tid)
437
438 # TODO: This cache can grow unbounded. It probably only needs to keep 452 # TODO: This cache can grow unbounded. It probably only needs to keep
439 # for each task 453 # for each task
440 unihash = self.unitaskhashes.get(key, None) 454 unihash = self._get_unihash(tid)
441 if unihash is not None: 455 if unihash is not None:
442 return unihash 456 return unihash
443 457
@@ -472,7 +486,7 @@ class SignatureGeneratorUniHashMixIn(object):
472 except hashserv.client.HashConnectionError as e: 486 except hashserv.client.HashConnectionError as e:
473 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 487 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
474 488
475 self.unitaskhashes[key] = unihash 489 self.set_unihash(tid, unihash)
476 return unihash 490 return unihash
477 491
478 def report_unihash(self, path, task, d): 492 def report_unihash(self, path, task, d):
@@ -484,13 +498,13 @@ class SignatureGeneratorUniHashMixIn(object):
484 tempdir = d.getVar('T') 498 tempdir = d.getVar('T')
485 fn = d.getVar('BB_FILENAME') 499 fn = d.getVar('BB_FILENAME')
486 tid = fn + ':do_' + task 500 tid = fn + ':do_' + task
487 key = tid.rsplit("/", 1)[1] + ':' + taskhash 501 key = tid + ':' + taskhash
488 502
489 if self.setscenetasks and tid not in self.setscenetasks: 503 if self.setscenetasks and tid not in self.setscenetasks:
490 return 504 return
491 505
492 # Sanity checks 506 # Sanity checks
493 cache_unihash = self.unitaskhashes.get(key, None) 507 cache_unihash = self._get_unihash(tid, checkkey=taskhash)
494 if cache_unihash is None: 508 if cache_unihash is None:
495 bb.fatal('%s not in unihash cache. Please report this error' % key) 509 bb.fatal('%s not in unihash cache. Please report this error' % key)
496 510