summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoshua Watt <JPEWhacker@gmail.com>2024-02-18 15:59:51 -0700
committerRichard Purdie <richard.purdie@linuxfoundation.org>2024-02-19 11:58:12 +0000
commite5056394e030c4573ca7d65e79484a2aa8afaaed (patch)
tree6d42efcea21015bab44c1b924fa1cbd26592d015
parent37b4d7e4931cb032659273c19520d74083ffb0e9 (diff)
downloadpoky-e5056394e030c4573ca7d65e79484a2aa8afaaed.tar.gz
bitbake: siggen: Add parallel query API
Implements a new API called get_unihashes() that allows for querying multiple unihashes in parallel. The API is also reworked to make it easier for derived classes to interface with the new API in a consistent manner. Instead of overriding get_unihash() to add custom handling for local hash calculating (e.g. caches) derived classes should now override get_cached_unihash(), and return the local unihash or None if there isn't one. (Bitbake rev: 6faf48c09a4003a31b32e450779fb8ac9cc5e946) Signed-off-by: Joshua Watt <JPEWhacker@gmail.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
-rw-r--r--bitbake/lib/bb/siggen.py121
1 files changed, 87 insertions, 34 deletions
diff --git a/bitbake/lib/bb/siggen.py b/bitbake/lib/bb/siggen.py
index 58854aee76..e1a4fa2cdd 100644
--- a/bitbake/lib/bb/siggen.py
+++ b/bitbake/lib/bb/siggen.py
@@ -102,9 +102,18 @@ class SignatureGenerator(object):
102 if flag: 102 if flag:
103 self.datacaches[mc].stamp_extrainfo[mcfn][t] = flag 103 self.datacaches[mc].stamp_extrainfo[mcfn][t] = flag
104 104
105 def get_cached_unihash(self, tid):
106 return None
107
105 def get_unihash(self, tid): 108 def get_unihash(self, tid):
109 unihash = self.get_cached_unihash(tid)
110 if unihash:
111 return unihash
106 return self.taskhash[tid] 112 return self.taskhash[tid]
107 113
114 def get_unihashes(self, tids):
115 return {tid: self.get_unihash(tid) for tid in tids}
116
108 def prep_taskhash(self, tid, deps, dataCaches): 117 def prep_taskhash(self, tid, deps, dataCaches):
109 return 118 return
110 119
@@ -524,28 +533,37 @@ class SignatureGeneratorUniHashMixIn(object):
524 super().__init__(data) 533 super().__init__(data)
525 534
526 def get_taskdata(self): 535 def get_taskdata(self):
527 return (self.server, self.method, self.extramethod) + super().get_taskdata() 536 return (self.server, self.method, self.extramethod, self.max_parallel) + super().get_taskdata()
528 537
529 def set_taskdata(self, data): 538 def set_taskdata(self, data):
530 self.server, self.method, self.extramethod = data[:3] 539 self.server, self.method, self.extramethod, self.max_parallel = data[:4]
531 super().set_taskdata(data[3:]) 540 super().set_taskdata(data[4:])
532 541
533 def client(self): 542 def client(self):
534 if getattr(self, '_client', None) is None: 543 if getattr(self, '_client', None) is None:
535 self._client = hashserv.create_client(self.server) 544 self._client = hashserv.create_client(self.server)
536 return self._client 545 return self._client
537 546
547 def client_pool(self):
548 if getattr(self, '_client_pool', None) is None:
549 self._client_pool = hashserv.client.ClientPool(self.server, self.max_parallel)
550 return self._client_pool
551
538 def reset(self, data): 552 def reset(self, data):
539 if getattr(self, '_client', None) is not None: 553 self.__close_clients()
540 self._client.close()
541 self._client = None
542 return super().reset(data) 554 return super().reset(data)
543 555
544 def exit(self): 556 def exit(self):
557 self.__close_clients()
558 return super().exit()
559
560 def __close_clients(self):
545 if getattr(self, '_client', None) is not None: 561 if getattr(self, '_client', None) is not None:
546 self._client.close() 562 self._client.close()
547 self._client = None 563 self._client = None
548 return super().exit() 564 if getattr(self, '_client_pool', None) is not None:
565 self._client_pool.close()
566 self._client_pool = None
549 567
550 def get_stampfile_hash(self, tid): 568 def get_stampfile_hash(self, tid):
551 if tid in self.taskhash: 569 if tid in self.taskhash:
@@ -578,7 +596,7 @@ class SignatureGeneratorUniHashMixIn(object):
578 return None 596 return None
579 return unihash 597 return unihash
580 598
581 def get_unihash(self, tid): 599 def get_cached_unihash(self, tid):
582 taskhash = self.taskhash[tid] 600 taskhash = self.taskhash[tid]
583 601
584 # If its not a setscene task we can return 602 # If its not a setscene task we can return
@@ -593,40 +611,74 @@ class SignatureGeneratorUniHashMixIn(object):
593 self.unihash[tid] = unihash 611 self.unihash[tid] = unihash
594 return unihash 612 return unihash
595 613
596 # In the absence of being able to discover a unique hash from the 614 return None
597 # server, make it be equivalent to the taskhash. The unique "hash" only
598 # really needs to be a unique string (not even necessarily a hash), but
599 # making it match the taskhash has a few advantages:
600 #
601 # 1) All of the sstate code that assumes hashes can be the same
602 # 2) It provides maximal compatibility with builders that don't use
603 # an equivalency server
604 # 3) The value is easy for multiple independent builders to derive the
605 # same unique hash from the same input. This means that if the
606 # independent builders find the same taskhash, but it isn't reported
607 # to the server, there is a better chance that they will agree on
608 # the unique hash.
609 unihash = taskhash
610 615
611 try: 616 def _get_method(self, tid):
612 method = self.method 617 method = self.method
613 if tid in self.extramethod: 618 if tid in self.extramethod:
614 method = method + self.extramethod[tid] 619 method = method + self.extramethod[tid]
615 data = self.client().get_unihash(method, self.taskhash[tid]) 620
616 if data: 621 return method
617 unihash = data 622
623 def get_unihash(self, tid):
624 return self.get_unihashes([tid])[tid]
625
626 def get_unihashes(self, tids):
627 """
628 For a iterable of tids, returns a dictionary that maps each tid to a
629 unihash
630 """
631 result = {}
632 queries = {}
633 query_result = {}
634
635 for tid in tids:
636 unihash = self.get_cached_unihash(tid)
637 if unihash:
638 result[tid] = unihash
639 else:
640 queries[tid] = (self._get_method(tid), self.taskhash[tid])
641
642 if len(queries) == 0:
643 return result
644
645 if self.max_parallel <= 1 or len(queries) <= 1:
646 # No parallelism required. Make the query serially with the single client
647 for tid, args in queries.items():
648 query_result[tid] = self.client().get_unihash(*args)
649 else:
650 query_result = self.client_pool().get_unihashes(queries)
651
652 for tid, unihash in query_result.items():
653 # In the absence of being able to discover a unique hash from the
654 # server, make it be equivalent to the taskhash. The unique "hash" only
655 # really needs to be a unique string (not even necessarily a hash), but
656 # making it match the taskhash has a few advantages:
657 #
658 # 1) All of the sstate code that assumes hashes can be the same
659 # 2) It provides maximal compatibility with builders that don't use
660 # an equivalency server
661 # 3) The value is easy for multiple independent builders to derive the
662 # same unique hash from the same input. This means that if the
663 # independent builders find the same taskhash, but it isn't reported
664 # to the server, there is a better chance that they will agree on
665 # the unique hash.
666 taskhash = self.taskhash[tid]
667 if unihash:
618 # A unique hash equal to the taskhash is not very interesting, 668 # A unique hash equal to the taskhash is not very interesting,
619 # so it is reported it at debug level 2. If they differ, that 669 # so it is reported it at debug level 2. If they differ, that
620 # is much more interesting, so it is reported at debug level 1 670 # is much more interesting, so it is reported at debug level 1
621 hashequiv_logger.bbdebug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server)) 671 hashequiv_logger.bbdebug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
622 else: 672 else:
623 hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server)) 673 hashequiv_logger.debug2('No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
624 except ConnectionError as e: 674 unihash = taskhash
625 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
626 675
627 self.set_unihash(tid, unihash) 676
628 self.unihash[tid] = unihash 677 self.set_unihash(tid, unihash)
629 return unihash 678 self.unihash[tid] = unihash
679 result[tid] = unihash
680
681 return result
630 682
631 def report_unihash(self, path, task, d): 683 def report_unihash(self, path, task, d):
632 import importlib 684 import importlib
@@ -754,6 +806,7 @@ class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureG
754 super().init_rundepcheck(data) 806 super().init_rundepcheck(data)
755 self.server = data.getVar('BB_HASHSERVE') 807 self.server = data.getVar('BB_HASHSERVE')
756 self.method = "sstate_output_hash" 808 self.method = "sstate_output_hash"
809 self.max_parallel = 1
757 810
758def clean_checksum_file_path(file_checksum_tuple): 811def clean_checksum_file_path(file_checksum_tuple):
759 f, cs = file_checksum_tuple 812 f, cs = file_checksum_tuple