diff options
author | Richard Purdie <richard.purdie@linuxfoundation.org> | 2019-12-12 18:07:14 +0000 |
---|---|---|
committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2020-02-08 16:01:33 +0000 |
commit | cedfbac46670a3371bbddaf8736eb8e8555e2ce8 (patch) | |
tree | c153070ff7182126f7fbf0af237cccec52cd258c /bitbake/lib/bb/siggen.py | |
parent | 306820bad60c62a8096df8d1e4c7cc8e00dc38b7 (diff) | |
download | poky-cedfbac46670a3371bbddaf8736eb8e8555e2ce8.tar.gz |
bitbake: siggen: Optimise get_unihash disk based cache handling
Currently the cache can grow huge since any previously used hash is
retained in the cache. This change moves to use one hash per task
which improves the speed of the functions considerably. Currently
performance is an issue, as are very large cache files and cache
load time.
By moving to a single hash per task, the shorted filename as a key
is no longer usable as the same recipe has multiple variants for
the same filename so this has to change.
(Bitbake rev: ed764e7fcf04b6d0ba6b4cac7415b1ee8f492865)
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'bitbake/lib/bb/siggen.py')
-rw-r--r-- | bitbake/lib/bb/siggen.py | 50 |
1 files changed, 32 insertions, 18 deletions
diff --git a/bitbake/lib/bb/siggen.py b/bitbake/lib/bb/siggen.py index ded1da020f..209a342883 100644 --- a/bitbake/lib/bb/siggen.py +++ b/bitbake/lib/bb/siggen.py | |||
@@ -44,6 +44,7 @@ class SignatureGenerator(object): | |||
44 | self.file_checksum_values = {} | 44 | self.file_checksum_values = {} |
45 | self.taints = {} | 45 | self.taints = {} |
46 | self.unitaskhashes = {} | 46 | self.unitaskhashes = {} |
47 | self.tidtopn = {} | ||
47 | self.setscenetasks = set() | 48 | self.setscenetasks = set() |
48 | 49 | ||
49 | def finalise(self, fn, d, varient): | 50 | def finalise(self, fn, d, varient): |
@@ -79,19 +80,19 @@ class SignatureGenerator(object): | |||
79 | return | 80 | return |
80 | 81 | ||
81 | def get_taskdata(self): | 82 | def get_taskdata(self): |
82 | return (self.runtaskdeps, self.taskhash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.setscenetasks) | 83 | return (self.runtaskdeps, self.taskhash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks) |
83 | 84 | ||
84 | def set_taskdata(self, data): | 85 | def set_taskdata(self, data): |
85 | self.runtaskdeps, self.taskhash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.setscenetasks = data | 86 | self.runtaskdeps, self.taskhash, self.file_checksum_values, self.taints, self.basehash, self.unitaskhashes, self.tidtopn, self.setscenetasks = data |
86 | 87 | ||
87 | def reset(self, data): | 88 | def reset(self, data): |
88 | self.__init__(data) | 89 | self.__init__(data) |
89 | 90 | ||
90 | def get_taskhashes(self): | 91 | def get_taskhashes(self): |
91 | return self.taskhash, self.unitaskhashes | 92 | return self.taskhash, self.unitaskhashes, self.tidtopn |
92 | 93 | ||
93 | def set_taskhashes(self, hashes): | 94 | def set_taskhashes(self, hashes): |
94 | self.taskhash, self.unitaskhashes = hashes | 95 | self.taskhash, self.unitaskhashes, self.tidtopn = hashes |
95 | 96 | ||
96 | def save_unitaskhashes(self): | 97 | def save_unitaskhashes(self): |
97 | return | 98 | return |
@@ -124,9 +125,10 @@ class SignatureGeneratorBasic(SignatureGenerator): | |||
124 | else: | 125 | else: |
125 | self.checksum_cache = None | 126 | self.checksum_cache = None |
126 | 127 | ||
127 | self.unihash_cache = bb.cache.SimpleCache("1") | 128 | self.unihash_cache = bb.cache.SimpleCache("3") |
128 | self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {}) | 129 | self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {}) |
129 | self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split() | 130 | self.localdirsexclude = (data.getVar("BB_SIGNATURE_LOCAL_DIRS_EXCLUDE") or "CVS .bzr .git .hg .osc .p4 .repo .svn").split() |
131 | self.tidtopn = {} | ||
130 | 132 | ||
131 | def init_rundepcheck(self, data): | 133 | def init_rundepcheck(self, data): |
132 | self.taskwhitelist = data.getVar("BB_HASHTASK_WHITELIST") or None | 134 | self.taskwhitelist = data.getVar("BB_HASHTASK_WHITELIST") or None |
@@ -210,6 +212,9 @@ class SignatureGeneratorBasic(SignatureGenerator): | |||
210 | self.runtaskdeps[tid] = [] | 212 | self.runtaskdeps[tid] = [] |
211 | self.file_checksum_values[tid] = [] | 213 | self.file_checksum_values[tid] = [] |
212 | recipename = dataCache.pkg_fn[fn] | 214 | recipename = dataCache.pkg_fn[fn] |
215 | |||
216 | self.tidtopn[tid] = recipename | ||
217 | |||
213 | for dep in sorted(deps, key=clean_basepath): | 218 | for dep in sorted(deps, key=clean_basepath): |
214 | (depmc, _, deptaskname, depfn) = bb.runqueue.split_tid_mcfn(dep) | 219 | (depmc, _, deptaskname, depfn) = bb.runqueue.split_tid_mcfn(dep) |
215 | if mc != depmc: | 220 | if mc != depmc: |
@@ -407,24 +412,35 @@ class SignatureGeneratorUniHashMixIn(object): | |||
407 | self._client = hashserv.create_client(self.server) | 412 | self._client = hashserv.create_client(self.server) |
408 | return self._client | 413 | return self._client |
409 | 414 | ||
410 | def __get_task_unihash_key(self, tid): | ||
411 | # TODO: The key only *needs* to be the taskhash, the tid is just | ||
412 | # convenient | ||
413 | return '%s:%s' % (tid.rsplit("/", 1)[1], self.taskhash[tid]) | ||
414 | |||
415 | def get_stampfile_hash(self, tid): | 415 | def get_stampfile_hash(self, tid): |
416 | if tid in self.taskhash: | 416 | if tid in self.taskhash: |
417 | # If a unique hash is reported, use it as the stampfile hash. This | 417 | # If a unique hash is reported, use it as the stampfile hash. This |
418 | # ensures that if a task won't be re-run if the taskhash changes, | 418 | # ensures that if a task won't be re-run if the taskhash changes, |
419 | # but it would result in the same output hash | 419 | # but it would result in the same output hash |
420 | unihash = self.unitaskhashes.get(self.__get_task_unihash_key(tid), None) | 420 | unihash = self._get_unihash(tid) |
421 | if unihash is not None: | 421 | if unihash is not None: |
422 | return unihash | 422 | return unihash |
423 | 423 | ||
424 | return super().get_stampfile_hash(tid) | 424 | return super().get_stampfile_hash(tid) |
425 | 425 | ||
426 | def set_unihash(self, tid, unihash): | 426 | def set_unihash(self, tid, unihash): |
427 | self.unitaskhashes[self.__get_task_unihash_key(tid)] = unihash | 427 | (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid) |
428 | key = mc + ":" + self.tidtopn[tid] + ":" + taskname | ||
429 | self.unitaskhashes[key] = (self.taskhash[tid], unihash) | ||
430 | |||
431 | def _get_unihash(self, tid, checkkey=None): | ||
432 | if tid not in self.tidtopn: | ||
433 | return None | ||
434 | (mc, fn, taskname, taskfn) = bb.runqueue.split_tid_mcfn(tid) | ||
435 | key = mc + ":" + self.tidtopn[tid] + ":" + taskname | ||
436 | if key not in self.unitaskhashes: | ||
437 | return None | ||
438 | if not checkkey: | ||
439 | checkkey = self.taskhash[tid] | ||
440 | (key, unihash) = self.unitaskhashes[key] | ||
441 | if key != checkkey: | ||
442 | return None | ||
443 | return unihash | ||
428 | 444 | ||
429 | def get_unihash(self, tid): | 445 | def get_unihash(self, tid): |
430 | taskhash = self.taskhash[tid] | 446 | taskhash = self.taskhash[tid] |
@@ -433,11 +449,9 @@ class SignatureGeneratorUniHashMixIn(object): | |||
433 | if self.setscenetasks and tid not in self.setscenetasks: | 449 | if self.setscenetasks and tid not in self.setscenetasks: |
434 | return taskhash | 450 | return taskhash |
435 | 451 | ||
436 | key = self.__get_task_unihash_key(tid) | ||
437 | |||
438 | # TODO: This cache can grow unbounded. It probably only needs to keep | 452 | # TODO: This cache can grow unbounded. It probably only needs to keep |
439 | # for each task | 453 | # for each task |
440 | unihash = self.unitaskhashes.get(key, None) | 454 | unihash = self._get_unihash(tid) |
441 | if unihash is not None: | 455 | if unihash is not None: |
442 | return unihash | 456 | return unihash |
443 | 457 | ||
@@ -472,7 +486,7 @@ class SignatureGeneratorUniHashMixIn(object): | |||
472 | except hashserv.client.HashConnectionError as e: | 486 | except hashserv.client.HashConnectionError as e: |
473 | bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) | 487 | bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) |
474 | 488 | ||
475 | self.unitaskhashes[key] = unihash | 489 | self.set_unihash(tid, unihash) |
476 | return unihash | 490 | return unihash |
477 | 491 | ||
478 | def report_unihash(self, path, task, d): | 492 | def report_unihash(self, path, task, d): |
@@ -484,13 +498,13 @@ class SignatureGeneratorUniHashMixIn(object): | |||
484 | tempdir = d.getVar('T') | 498 | tempdir = d.getVar('T') |
485 | fn = d.getVar('BB_FILENAME') | 499 | fn = d.getVar('BB_FILENAME') |
486 | tid = fn + ':do_' + task | 500 | tid = fn + ':do_' + task |
487 | key = tid.rsplit("/", 1)[1] + ':' + taskhash | 501 | key = tid + ':' + taskhash |
488 | 502 | ||
489 | if self.setscenetasks and tid not in self.setscenetasks: | 503 | if self.setscenetasks and tid not in self.setscenetasks: |
490 | return | 504 | return |
491 | 505 | ||
492 | # Sanity checks | 506 | # Sanity checks |
493 | cache_unihash = self.unitaskhashes.get(key, None) | 507 | cache_unihash = self._get_unihash(tid, checkkey=taskhash) |
494 | if cache_unihash is None: | 508 | if cache_unihash is None: |
495 | bb.fatal('%s not in unihash cache. Please report this error' % key) | 509 | bb.fatal('%s not in unihash cache. Please report this error' % key) |
496 | 510 | ||