diff options
author | Joshua Watt <jpewhacker@gmail.com> | 2019-09-17 08:37:11 -0500 |
---|---|---|
committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2019-09-18 17:52:01 +0100 |
commit | 20f032338ff3b4b25f2cbb7f975b5fd1c105004d (patch) | |
tree | 84c1a4693fdbaac0823e99d70ed7c814b890244c /bitbake/lib/bb/siggen.py | |
parent | 34923e4f772fc57c29421741d2f622eb4009961c (diff) | |
download | poky-20f032338ff3b4b25f2cbb7f975b5fd1c105004d.tar.gz |
bitbake: bitbake: Rework hash equivalence
Reworks the hash equivalence server to address performance issues that
were encountered with the REST mechanism used previously, particularly
during the heavy request load encountered during signature generation.
Notable changes are:
1) The server protocol is no longer HTTP based. Instead, it uses a
simpler JSON over a streaming protocol link. This protocol has much
lower overhead than HTTP since it eliminates the HTTP headers.
2) The hash equivalence server can either bind to a TCP port, or a Unix
domain socket. Unix domain sockets are more efficient for local
communication, and so are preferred if the user enables hash
equivalence only for the local build. The arguments to the
'bitbake-hashserve' command have been updated accordingly.
3) The value to which BB_HASHSERVE should be set to enable a local hash
equivalence server is changed to "auto" instead of "localhost:0". The
latter didn't make sense when the local server was using a Unix
domain socket.
4) Clients are expected to keep a persistent connection to the server
instead of creating a new connection each time a request is made for
optimal performance.
5) Most of the client logic has been moved to the hashserve module in
bitbake. This makes it easier to share the client code.
6) A new bitbake command has been added called 'bitbake-hashclient'.
This command can be used to query a hash equivalence server, including
fetching the statistics and running a performance stress test.
7) The table indexes in the SQLite database have been updated to
optimize hash lookups. This change is backward compatible, as the
database will delete the old indexes first if they exist.
8) The server has been reworked to use python async to maximize
performance with persistently connected clients. This requires Python
3.5 or later.
(Bitbake rev: 2124eec3a5830afe8e07ffb6f2a0df6a417ac973)
Signed-off-by: Joshua Watt <JPEWhacker@gmail.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'bitbake/lib/bb/siggen.py')
-rw-r--r-- | bitbake/lib/bb/siggen.py | 74 |
1 files changed, 26 insertions, 48 deletions
diff --git a/bitbake/lib/bb/siggen.py b/bitbake/lib/bb/siggen.py index 8b593a348b..e047c217e5 100644 --- a/bitbake/lib/bb/siggen.py +++ b/bitbake/lib/bb/siggen.py | |||
@@ -13,6 +13,7 @@ import difflib | |||
13 | import simplediff | 13 | import simplediff |
14 | from bb.checksum import FileChecksumCache | 14 | from bb.checksum import FileChecksumCache |
15 | from bb import runqueue | 15 | from bb import runqueue |
16 | import hashserv | ||
16 | 17 | ||
17 | logger = logging.getLogger('BitBake.SigGen') | 18 | logger = logging.getLogger('BitBake.SigGen') |
18 | 19 | ||
@@ -375,6 +376,11 @@ class SignatureGeneratorUniHashMixIn(object): | |||
375 | self.server, self.method = data[:2] | 376 | self.server, self.method = data[:2] |
376 | super().set_taskdata(data[2:]) | 377 | super().set_taskdata(data[2:]) |
377 | 378 | ||
379 | def client(self): | ||
380 | if getattr(self, '_client', None) is None: | ||
381 | self._client = hashserv.create_client(self.server) | ||
382 | return self._client | ||
383 | |||
378 | def __get_task_unihash_key(self, tid): | 384 | def __get_task_unihash_key(self, tid): |
379 | # TODO: The key only *needs* to be the taskhash, the tid is just | 385 | # TODO: The key only *needs* to be the taskhash, the tid is just |
380 | # convenient | 386 | # convenient |
@@ -395,9 +401,6 @@ class SignatureGeneratorUniHashMixIn(object): | |||
395 | self.unitaskhashes[self.__get_task_unihash_key(tid)] = unihash | 401 | self.unitaskhashes[self.__get_task_unihash_key(tid)] = unihash |
396 | 402 | ||
397 | def get_unihash(self, tid): | 403 | def get_unihash(self, tid): |
398 | import urllib | ||
399 | import json | ||
400 | |||
401 | taskhash = self.taskhash[tid] | 404 | taskhash = self.taskhash[tid] |
402 | 405 | ||
403 | # If its not a setscene task we can return | 406 | # If its not a setscene task we can return |
@@ -428,36 +431,22 @@ class SignatureGeneratorUniHashMixIn(object): | |||
428 | unihash = taskhash | 431 | unihash = taskhash |
429 | 432 | ||
430 | try: | 433 | try: |
431 | url = '%s/v1/equivalent?%s' % (self.server, | 434 | data = self.client().get_unihash(self.method, self.taskhash[tid]) |
432 | urllib.parse.urlencode({'method': self.method, 'taskhash': self.taskhash[tid]})) | 435 | if data: |
433 | 436 | unihash = data | |
434 | request = urllib.request.Request(url) | ||
435 | response = urllib.request.urlopen(request) | ||
436 | data = response.read().decode('utf-8') | ||
437 | |||
438 | json_data = json.loads(data) | ||
439 | |||
440 | if json_data: | ||
441 | unihash = json_data['unihash'] | ||
442 | # A unique hash equal to the taskhash is not very interesting, | 437 | # A unique hash equal to the taskhash is not very interesting, |
443 | # so it is reported it at debug level 2. If they differ, that | 438 | # so it is reported it at debug level 2. If they differ, that |
444 | # is much more interesting, so it is reported at debug level 1 | 439 | # is much more interesting, so it is reported at debug level 1 |
445 | bb.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server)) | 440 | bb.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server)) |
446 | else: | 441 | else: |
447 | bb.debug(2, 'No reported unihash for %s:%s from %s' % (tid, taskhash, self.server)) | 442 | bb.debug(2, 'No reported unihash for %s:%s from %s' % (tid, taskhash, self.server)) |
448 | except urllib.error.URLError as e: | 443 | except hashserv.HashConnectionError as e: |
449 | bb.warn('Failure contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) | 444 | bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) |
450 | except (KeyError, json.JSONDecodeError) as e: | ||
451 | bb.warn('Poorly formatted response from %s: %s' % (self.server, str(e))) | ||
452 | 445 | ||
453 | self.unitaskhashes[key] = unihash | 446 | self.unitaskhashes[key] = unihash |
454 | return unihash | 447 | return unihash |
455 | 448 | ||
456 | def report_unihash(self, path, task, d): | 449 | def report_unihash(self, path, task, d): |
457 | import urllib | ||
458 | import json | ||
459 | import tempfile | ||
460 | import base64 | ||
461 | import importlib | 450 | import importlib |
462 | 451 | ||
463 | taskhash = d.getVar('BB_TASKHASH') | 452 | taskhash = d.getVar('BB_TASKHASH') |
@@ -492,42 +481,31 @@ class SignatureGeneratorUniHashMixIn(object): | |||
492 | outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs) | 481 | outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs) |
493 | 482 | ||
494 | try: | 483 | try: |
495 | url = '%s/v1/equivalent' % self.server | 484 | extra_data = {} |
496 | task_data = { | 485 | |
497 | 'taskhash': taskhash, | 486 | owner = d.getVar('SSTATE_HASHEQUIV_OWNER') |
498 | 'method': self.method, | 487 | if owner: |
499 | 'outhash': outhash, | 488 | extra_data['owner'] = owner |
500 | 'unihash': unihash, | ||
501 | 'owner': d.getVar('SSTATE_HASHEQUIV_OWNER') | ||
502 | } | ||
503 | 489 | ||
504 | if report_taskdata: | 490 | if report_taskdata: |
505 | sigfile.seek(0) | 491 | sigfile.seek(0) |
506 | 492 | ||
507 | task_data['PN'] = d.getVar('PN') | 493 | extra_data['PN'] = d.getVar('PN') |
508 | task_data['PV'] = d.getVar('PV') | 494 | extra_data['PV'] = d.getVar('PV') |
509 | task_data['PR'] = d.getVar('PR') | 495 | extra_data['PR'] = d.getVar('PR') |
510 | task_data['task'] = task | 496 | extra_data['task'] = task |
511 | task_data['outhash_siginfo'] = sigfile.read().decode('utf-8') | 497 | extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8') |
512 | |||
513 | headers = {'content-type': 'application/json'} | ||
514 | |||
515 | request = urllib.request.Request(url, json.dumps(task_data).encode('utf-8'), headers) | ||
516 | response = urllib.request.urlopen(request) | ||
517 | data = response.read().decode('utf-8') | ||
518 | 498 | ||
519 | json_data = json.loads(data) | 499 | data = self.client().report_unihash(taskhash, self.method, outhash, unihash, extra_data) |
520 | new_unihash = json_data['unihash'] | 500 | new_unihash = data['unihash'] |
521 | 501 | ||
522 | if new_unihash != unihash: | 502 | if new_unihash != unihash: |
523 | bb.debug(1, 'Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server)) | 503 | bb.debug(1, 'Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server)) |
524 | bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d) | 504 | bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d) |
525 | else: | 505 | else: |
526 | bb.debug(1, 'Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server)) | 506 | bb.debug(1, 'Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server)) |
527 | except urllib.error.URLError as e: | 507 | except hashserv.HashConnectionError as e: |
528 | bb.warn('Failure contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) | 508 | bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) |
529 | except (KeyError, json.JSONDecodeError) as e: | ||
530 | bb.warn('Poorly formatted response from %s: %s' % (self.server, str(e))) | ||
531 | finally: | 509 | finally: |
532 | if sigfile: | 510 | if sigfile: |
533 | sigfile.close() | 511 | sigfile.close() |
@@ -548,7 +526,7 @@ class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureG | |||
548 | name = "TestEquivHash" | 526 | name = "TestEquivHash" |
549 | def init_rundepcheck(self, data): | 527 | def init_rundepcheck(self, data): |
550 | super().init_rundepcheck(data) | 528 | super().init_rundepcheck(data) |
551 | self.server = "http://" + data.getVar('BB_HASHSERVE') | 529 | self.server = data.getVar('BB_HASHSERVE') |
552 | self.method = "sstate_output_hash" | 530 | self.method = "sstate_output_hash" |
553 | 531 | ||
554 | 532 | ||