diff options
author | Richard Purdie <richard.purdie@linuxfoundation.org> | 2019-12-11 23:11:50 +0000 |
---|---|---|
committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2019-12-12 13:15:00 +0000 |
commit | 6b2cdc4b96eab7ad5e8ae8daebe24f2b18b09b1e (patch) | |
tree | db5415b65231a59dbf646ecedb0d1b1f4c828db6 /bitbake/lib | |
parent | b3771bded2f288578fe06ef9a2886c713d065c78 (diff) | |
download | poky-6b2cdc4b96eab7ad5e8ae8daebe24f2b18b09b1e.tar.gz |
bitbake: siggen: Fix hashequiv performance issues
We're seeing huge slowdowns on large builds on the autobuilder. A qemux86 world
build was running really slowly, a primary feature was lots of rehashing going on
due to an sstate change which caused a rebuild when all output should be identical.
This was traced to the hundreds of thousands of calls to get_unihash() from
get_taskash(). If we simplify the unitaskhashes data structure, we can bypass
the function call and access the data directly. In local profile charts,
this significanly sped up process_possible_migrations(), both on profiles
and visually.
Whilst this change doesn't aid readability, it does solve an otherwise
huge performance issue.
(Bitbake rev: c4b8440f730c33eaf9f818b856ae81b2f1017fec)
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'bitbake/lib')
-rw-r--r-- | bitbake/lib/bb/siggen.py | 27 |
1 files changed, 13 insertions, 14 deletions
diff --git a/bitbake/lib/bb/siggen.py b/bitbake/lib/bb/siggen.py index dbf510238f..517ec7b61a 100644 --- a/bitbake/lib/bb/siggen.py +++ b/bitbake/lib/bb/siggen.py | |||
@@ -121,7 +121,7 @@ class SignatureGeneratorBasic(SignatureGenerator): | |||
121 | else: | 121 | else: |
122 | self.checksum_cache = None | 122 | self.checksum_cache = None |
123 | 123 | ||
124 | self.unihash_cache = bb.cache.SimpleCache("1") | 124 | self.unihash_cache = bb.cache.SimpleCache("2") |
125 | self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {}) | 125 | self.unitaskhashes = self.unihash_cache.init_cache(data, "bb_unihashes.dat", {}) |
126 | 126 | ||
127 | def init_rundepcheck(self, data): | 127 | def init_rundepcheck(self, data): |
@@ -216,7 +216,13 @@ class SignatureGeneratorBasic(SignatureGenerator): | |||
216 | continue | 216 | continue |
217 | if dep not in self.taskhash: | 217 | if dep not in self.taskhash: |
218 | bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep) | 218 | bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?" % dep) |
219 | data = data + self.get_unihash(dep) | 219 | # We bypass the fuction and go straight to the cache here |
220 | # as this was a performance bottleneck otherwise | ||
221 | if self.taskhash[dep] in self.unitaskhashes: | ||
222 | unihash = self.unitaskhashes[self.taskhash[dep]] | ||
223 | else: | ||
224 | unihash = self.get_unihash(dep) | ||
225 | data = data + unihash | ||
220 | self.runtaskdeps[tid].append(dep) | 226 | self.runtaskdeps[tid].append(dep) |
221 | 227 | ||
222 | if task in dataCache.file_checksums[fn]: | 228 | if task in dataCache.file_checksums[fn]: |
@@ -387,24 +393,19 @@ class SignatureGeneratorUniHashMixIn(object): | |||
387 | self._client = hashserv.create_client(self.server) | 393 | self._client = hashserv.create_client(self.server) |
388 | return self._client | 394 | return self._client |
389 | 395 | ||
390 | def __get_task_unihash_key(self, tid): | ||
391 | # TODO: The key only *needs* to be the taskhash, the tid is just | ||
392 | # convenient | ||
393 | return '%s:%s' % (tid.rsplit("/", 1)[1], self.taskhash[tid]) | ||
394 | |||
395 | def get_stampfile_hash(self, tid): | 396 | def get_stampfile_hash(self, tid): |
396 | if tid in self.taskhash: | 397 | if tid in self.taskhash: |
397 | # If a unique hash is reported, use it as the stampfile hash. This | 398 | # If a unique hash is reported, use it as the stampfile hash. This |
398 | # ensures that if a task won't be re-run if the taskhash changes, | 399 | # ensures that if a task won't be re-run if the taskhash changes, |
399 | # but it would result in the same output hash | 400 | # but it would result in the same output hash |
400 | unihash = self.unitaskhashes.get(self.__get_task_unihash_key(tid), None) | 401 | unihash = self.unitaskhashes.get(self.taskhash[tid], None) |
401 | if unihash is not None: | 402 | if unihash is not None: |
402 | return unihash | 403 | return unihash |
403 | 404 | ||
404 | return super().get_stampfile_hash(tid) | 405 | return super().get_stampfile_hash(tid) |
405 | 406 | ||
406 | def set_unihash(self, tid, unihash): | 407 | def set_unihash(self, tid, unihash): |
407 | self.unitaskhashes[self.__get_task_unihash_key(tid)] = unihash | 408 | self.unitaskhashes[self.taskhash[tid]] = unihash |
408 | 409 | ||
409 | def get_unihash(self, tid): | 410 | def get_unihash(self, tid): |
410 | taskhash = self.taskhash[tid] | 411 | taskhash = self.taskhash[tid] |
@@ -413,11 +414,9 @@ class SignatureGeneratorUniHashMixIn(object): | |||
413 | if self.setscenetasks and tid not in self.setscenetasks: | 414 | if self.setscenetasks and tid not in self.setscenetasks: |
414 | return taskhash | 415 | return taskhash |
415 | 416 | ||
416 | key = self.__get_task_unihash_key(tid) | ||
417 | |||
418 | # TODO: This cache can grow unbounded. It probably only needs to keep | 417 | # TODO: This cache can grow unbounded. It probably only needs to keep |
419 | # for each task | 418 | # for each task |
420 | unihash = self.unitaskhashes.get(key, None) | 419 | unihash = self.unitaskhashes.get(taskhash, None) |
421 | if unihash is not None: | 420 | if unihash is not None: |
422 | return unihash | 421 | return unihash |
423 | 422 | ||
@@ -449,7 +448,7 @@ class SignatureGeneratorUniHashMixIn(object): | |||
449 | except hashserv.client.HashConnectionError as e: | 448 | except hashserv.client.HashConnectionError as e: |
450 | bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) | 449 | bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) |
451 | 450 | ||
452 | self.unitaskhashes[key] = unihash | 451 | self.unitaskhashes[taskhash] = unihash |
453 | return unihash | 452 | return unihash |
454 | 453 | ||
455 | def report_unihash(self, path, task, d): | 454 | def report_unihash(self, path, task, d): |
@@ -467,7 +466,7 @@ class SignatureGeneratorUniHashMixIn(object): | |||
467 | return | 466 | return |
468 | 467 | ||
469 | # Sanity checks | 468 | # Sanity checks |
470 | cache_unihash = self.unitaskhashes.get(key, None) | 469 | cache_unihash = self.unitaskhashes.get(taskhash, None) |
471 | if cache_unihash is None: | 470 | if cache_unihash is None: |
472 | bb.fatal('%s not in unihash cache. Please report this error' % key) | 471 | bb.fatal('%s not in unihash cache. Please report this error' % key) |
473 | 472 | ||