diff options
author | Joshua Watt <JPEWhacker@gmail.com> | 2024-02-18 15:59:51 -0700 |
---|---|---|
committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2024-02-19 11:58:12 +0000 |
commit | e5056394e030c4573ca7d65e79484a2aa8afaaed (patch) | |
tree | 6d42efcea21015bab44c1b924fa1cbd26592d015 | |
parent | 37b4d7e4931cb032659273c19520d74083ffb0e9 (diff) | |
download | poky-e5056394e030c4573ca7d65e79484a2aa8afaaed.tar.gz |
bitbake: siggen: Add parallel query API
Implements a new API called get_unihashes() that allows for querying
multiple unihashes in parallel.
The API is also reworked to make it easier for derived classes to
interface with the new API in a consistent manner. Instead of overriding
get_unihash() to add custom handling for local hash calculating (e.g.
caches) derived classes should now override get_cached_unihash(), and
return the local unihash or None if there isn't one.
(Bitbake rev: 6faf48c09a4003a31b32e450779fb8ac9cc5e946)
Signed-off-by: Joshua Watt <JPEWhacker@gmail.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
-rw-r--r-- | bitbake/lib/bb/siggen.py | 121 |
1 files changed, 87 insertions, 34 deletions
diff --git a/bitbake/lib/bb/siggen.py b/bitbake/lib/bb/siggen.py index 58854aee76..e1a4fa2cdd 100644 --- a/bitbake/lib/bb/siggen.py +++ b/bitbake/lib/bb/siggen.py | |||
@@ -102,9 +102,18 @@ class SignatureGenerator(object): | |||
102 | if flag: | 102 | if flag: |
103 | self.datacaches[mc].stamp_extrainfo[mcfn][t] = flag | 103 | self.datacaches[mc].stamp_extrainfo[mcfn][t] = flag |
104 | 104 | ||
105 | def get_cached_unihash(self, tid): | ||
106 | return None | ||
107 | |||
105 | def get_unihash(self, tid): | 108 | def get_unihash(self, tid): |
109 | unihash = self.get_cached_unihash(tid) | ||
110 | if unihash: | ||
111 | return unihash | ||
106 | return self.taskhash[tid] | 112 | return self.taskhash[tid] |
107 | 113 | ||
114 | def get_unihashes(self, tids): | ||
115 | return {tid: self.get_unihash(tid) for tid in tids} | ||
116 | |||
108 | def prep_taskhash(self, tid, deps, dataCaches): | 117 | def prep_taskhash(self, tid, deps, dataCaches): |
109 | return | 118 | return |
110 | 119 | ||
@@ -524,28 +533,37 @@ class SignatureGeneratorUniHashMixIn(object): | |||
524 | super().__init__(data) | 533 | super().__init__(data) |
525 | 534 | ||
526 | def get_taskdata(self): | 535 | def get_taskdata(self): |
527 | return (self.server, self.method, self.extramethod) + super().get_taskdata() | 536 | return (self.server, self.method, self.extramethod, self.max_parallel) + super().get_taskdata() |
528 | 537 | ||
529 | def set_taskdata(self, data): | 538 | def set_taskdata(self, data): |
530 | self.server, self.method, self.extramethod = data[:3] | 539 | self.server, self.method, self.extramethod, self.max_parallel = data[:4] |
531 | super().set_taskdata(data[3:]) | 540 | super().set_taskdata(data[4:]) |
532 | 541 | ||
533 | def client(self): | 542 | def client(self): |
534 | if getattr(self, '_client', None) is None: | 543 | if getattr(self, '_client', None) is None: |
535 | self._client = hashserv.create_client(self.server) | 544 | self._client = hashserv.create_client(self.server) |
536 | return self._client | 545 | return self._client |
537 | 546 | ||
547 | def client_pool(self): | ||
548 | if getattr(self, '_client_pool', None) is None: | ||
549 | self._client_pool = hashserv.client.ClientPool(self.server, self.max_parallel) | ||
550 | return self._client_pool | ||
551 | |||
538 | def reset(self, data): | 552 | def reset(self, data): |
539 | if getattr(self, '_client', None) is not None: | 553 | self.__close_clients() |
540 | self._client.close() | ||
541 | self._client = None | ||
542 | return super().reset(data) | 554 | return super().reset(data) |
543 | 555 | ||
544 | def exit(self): | 556 | def exit(self): |
557 | self.__close_clients() | ||
558 | return super().exit() | ||
559 | |||
560 | def __close_clients(self): | ||
545 | if getattr(self, '_client', None) is not None: | 561 | if getattr(self, '_client', None) is not None: |
546 | self._client.close() | 562 | self._client.close() |
547 | self._client = None | 563 | self._client = None |
548 | return super().exit() | 564 | if getattr(self, '_client_pool', None) is not None: |
565 | self._client_pool.close() | ||
566 | self._client_pool = None | ||
549 | 567 | ||
550 | def get_stampfile_hash(self, tid): | 568 | def get_stampfile_hash(self, tid): |
551 | if tid in self.taskhash: | 569 | if tid in self.taskhash: |
@@ -578,7 +596,7 @@ class SignatureGeneratorUniHashMixIn(object): | |||
578 | return None | 596 | return None |
579 | return unihash | 597 | return unihash |
580 | 598 | ||
581 | def get_unihash(self, tid): | 599 | def get_cached_unihash(self, tid): |
582 | taskhash = self.taskhash[tid] | 600 | taskhash = self.taskhash[tid] |
583 | 601 | ||
584 | # If its not a setscene task we can return | 602 | # If its not a setscene task we can return |
@@ -593,40 +611,74 @@ class SignatureGeneratorUniHashMixIn(object): | |||
593 | self.unihash[tid] = unihash | 611 | self.unihash[tid] = unihash |
594 | return unihash | 612 | return unihash |
595 | 613 | ||
596 | # In the absence of being able to discover a unique hash from the | 614 | return None |
597 | # server, make it be equivalent to the taskhash. The unique "hash" only | ||
598 | # really needs to be a unique string (not even necessarily a hash), but | ||
599 | # making it match the taskhash has a few advantages: | ||
600 | # | ||
601 | # 1) All of the sstate code that assumes hashes can be the same | ||
602 | # 2) It provides maximal compatibility with builders that don't use | ||
603 | # an equivalency server | ||
604 | # 3) The value is easy for multiple independent builders to derive the | ||
605 | # same unique hash from the same input. This means that if the | ||
606 | # independent builders find the same taskhash, but it isn't reported | ||
607 | # to the server, there is a better chance that they will agree on | ||
608 | # the unique hash. | ||
609 | unihash = taskhash | ||
610 | 615 | ||
611 | try: | 616 | def _get_method(self, tid): |
612 | method = self.method | 617 | method = self.method |
613 | if tid in self.extramethod: | 618 | if tid in self.extramethod: |
614 | method = method + self.extramethod[tid] | 619 | method = method + self.extramethod[tid] |
615 | data = self.client().get_unihash(method, self.taskhash[tid]) | 620 | |
616 | if data: | 621 | return method |
617 | unihash = data | 622 | |
623 | def get_unihash(self, tid): | ||
624 | return self.get_unihashes([tid])[tid] | ||
625 | |||
626 | def get_unihashes(self, tids): | ||
627 | """ | ||
628 | For a iterable of tids, returns a dictionary that maps each tid to a | ||
629 | unihash | ||
630 | """ | ||
631 | result = {} | ||
632 | queries = {} | ||
633 | query_result = {} | ||
634 | |||
635 | for tid in tids: | ||
636 | unihash = self.get_cached_unihash(tid) | ||
637 | if unihash: | ||
638 | result[tid] = unihash | ||
639 | else: | ||
640 | queries[tid] = (self._get_method(tid), self.taskhash[tid]) | ||
641 | |||
642 | if len(queries) == 0: | ||
643 | return result | ||
644 | |||
645 | if self.max_parallel <= 1 or len(queries) <= 1: | ||
646 | # No parallelism required. Make the query serially with the single client | ||
647 | for tid, args in queries.items(): | ||
648 | query_result[tid] = self.client().get_unihash(*args) | ||
649 | else: | ||
650 | query_result = self.client_pool().get_unihashes(queries) | ||
651 | |||
652 | for tid, unihash in query_result.items(): | ||
653 | # In the absence of being able to discover a unique hash from the | ||
654 | # server, make it be equivalent to the taskhash. The unique "hash" only | ||
655 | # really needs to be a unique string (not even necessarily a hash), but | ||
656 | # making it match the taskhash has a few advantages: | ||
657 | # | ||
658 | # 1) All of the sstate code that assumes hashes can be the same | ||
659 | # 2) It provides maximal compatibility with builders that don't use | ||
660 | # an equivalency server | ||
661 | # 3) The value is easy for multiple independent builders to derive the | ||
662 | # same unique hash from the same input. This means that if the | ||
663 | # independent builders find the same taskhash, but it isn't reported | ||
664 | # to the server, there is a better chance that they will agree on | ||
665 | # the unique hash. | ||
666 | taskhash = self.taskhash[tid] | ||
667 | if unihash: | ||
618 | # A unique hash equal to the taskhash is not very interesting, | 668 | # A unique hash equal to the taskhash is not very interesting, |
619 | # so it is reported it at debug level 2. If they differ, that | 669 | # so it is reported it at debug level 2. If they differ, that |
620 | # is much more interesting, so it is reported at debug level 1 | 670 | # is much more interesting, so it is reported at debug level 1 |
621 | hashequiv_logger.bbdebug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server)) | 671 | hashequiv_logger.bbdebug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server)) |
622 | else: | 672 | else: |
623 | hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server)) | 673 | hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server)) |
624 | except ConnectionError as e: | 674 | unihash = taskhash |
625 | bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) | ||
626 | 675 | ||
627 | self.set_unihash(tid, unihash) | 676 | |
628 | self.unihash[tid] = unihash | 677 | self.set_unihash(tid, unihash) |
629 | return unihash | 678 | self.unihash[tid] = unihash |
679 | result[tid] = unihash | ||
680 | |||
681 | return result | ||
630 | 682 | ||
631 | def report_unihash(self, path, task, d): | 683 | def report_unihash(self, path, task, d): |
632 | import importlib | 684 | import importlib |
@@ -754,6 +806,7 @@ class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureG | |||
754 | super().init_rundepcheck(data) | 806 | super().init_rundepcheck(data) |
755 | self.server = data.getVar('BB_HASHSERVE') | 807 | self.server = data.getVar('BB_HASHSERVE') |
756 | self.method = "sstate_output_hash" | 808 | self.method = "sstate_output_hash" |
809 | self.max_parallel = 1 | ||
757 | 810 | ||
758 | def clean_checksum_file_path(file_checksum_tuple): | 811 | def clean_checksum_file_path(file_checksum_tuple): |
759 | f, cs = file_checksum_tuple | 812 | f, cs = file_checksum_tuple |