summaryrefslogtreecommitdiffstats
path: root/bitbake/lib/bb/siggen.py
diff options
context:
space:
mode:
authorJoshua Watt <jpewhacker@gmail.com>2019-09-17 08:37:11 -0500
committerRichard Purdie <richard.purdie@linuxfoundation.org>2019-09-18 17:52:01 +0100
commit20f032338ff3b4b25f2cbb7f975b5fd1c105004d (patch)
tree84c1a4693fdbaac0823e99d70ed7c814b890244c /bitbake/lib/bb/siggen.py
parent34923e4f772fc57c29421741d2f622eb4009961c (diff)
downloadpoky-20f032338ff3b4b25f2cbb7f975b5fd1c105004d.tar.gz
bitbake: bitbake: Rework hash equivalence
Reworks the hash equivalence server to address performance issues that were encountered with the REST mechanism used previously, particularly during the heavy request load encountered during signature generation. Notable changes are: 1) The server protocol is no longer HTTP based. Instead, it uses a simpler JSON over a streaming protocol link. This protocol has much lower overhead than HTTP since it eliminates the HTTP headers. 2) The hash equivalence server can either bind to a TCP port, or a Unix domain socket. Unix domain sockets are more efficient for local communication, and so are preferred if the user enables hash equivalence only for the local build. The arguments to the 'bitbake-hashserve' command have been updated accordingly. 3) The value to which BB_HASHSERVE should be set to enable a local hash equivalence server is changed to "auto" instead of "localhost:0". The latter didn't make sense when the local server was using a Unix domain socket. 4) Clients are expected to keep a persistent connection to the server instead of creating a new connection each time a request is made for optimal performance. 5) Most of the client logic has been moved to the hashserve module in bitbake. This makes it easier to share the client code. 6) A new bitbake command has been added called 'bitbake-hashclient'. This command can be used to query a hash equivalence server, including fetching the statistics and running a performance stress test. 7) The table indexes in the SQLite database have been updated to optimize hash lookups. This change is backward compatible, as the database will delete the old indexes first if they exist. 8) The server has been reworked to use python async to maximize performance with persistently connected clients. This requires Python 3.5 or later. (Bitbake rev: 2124eec3a5830afe8e07ffb6f2a0df6a417ac973) Signed-off-by: Joshua Watt <JPEWhacker@gmail.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'bitbake/lib/bb/siggen.py')
-rw-r--r--bitbake/lib/bb/siggen.py74
1 files changed, 26 insertions, 48 deletions
diff --git a/bitbake/lib/bb/siggen.py b/bitbake/lib/bb/siggen.py
index 8b593a348b..e047c217e5 100644
--- a/bitbake/lib/bb/siggen.py
+++ b/bitbake/lib/bb/siggen.py
@@ -13,6 +13,7 @@ import difflib
13import simplediff 13import simplediff
14from bb.checksum import FileChecksumCache 14from bb.checksum import FileChecksumCache
15from bb import runqueue 15from bb import runqueue
16import hashserv
16 17
17logger = logging.getLogger('BitBake.SigGen') 18logger = logging.getLogger('BitBake.SigGen')
18 19
@@ -375,6 +376,11 @@ class SignatureGeneratorUniHashMixIn(object):
375 self.server, self.method = data[:2] 376 self.server, self.method = data[:2]
376 super().set_taskdata(data[2:]) 377 super().set_taskdata(data[2:])
377 378
379 def client(self):
380 if getattr(self, '_client', None) is None:
381 self._client = hashserv.create_client(self.server)
382 return self._client
383
378 def __get_task_unihash_key(self, tid): 384 def __get_task_unihash_key(self, tid):
379 # TODO: The key only *needs* to be the taskhash, the tid is just 385 # TODO: The key only *needs* to be the taskhash, the tid is just
380 # convenient 386 # convenient
@@ -395,9 +401,6 @@ class SignatureGeneratorUniHashMixIn(object):
395 self.unitaskhashes[self.__get_task_unihash_key(tid)] = unihash 401 self.unitaskhashes[self.__get_task_unihash_key(tid)] = unihash
396 402
397 def get_unihash(self, tid): 403 def get_unihash(self, tid):
398 import urllib
399 import json
400
401 taskhash = self.taskhash[tid] 404 taskhash = self.taskhash[tid]
402 405
403 # If its not a setscene task we can return 406 # If its not a setscene task we can return
@@ -428,36 +431,22 @@ class SignatureGeneratorUniHashMixIn(object):
428 unihash = taskhash 431 unihash = taskhash
429 432
430 try: 433 try:
431 url = '%s/v1/equivalent?%s' % (self.server, 434 data = self.client().get_unihash(self.method, self.taskhash[tid])
432 urllib.parse.urlencode({'method': self.method, 'taskhash': self.taskhash[tid]})) 435 if data:
433 436 unihash = data
434 request = urllib.request.Request(url)
435 response = urllib.request.urlopen(request)
436 data = response.read().decode('utf-8')
437
438 json_data = json.loads(data)
439
440 if json_data:
441 unihash = json_data['unihash']
442 # A unique hash equal to the taskhash is not very interesting, 437 # A unique hash equal to the taskhash is not very interesting,
443 # so it is reported it at debug level 2. If they differ, that 438 # so it is reported it at debug level 2. If they differ, that
444 # is much more interesting, so it is reported at debug level 1 439 # is much more interesting, so it is reported at debug level 1
445 bb.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server)) 440 bb.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
446 else: 441 else:
447 bb.debug(2, 'No reported unihash for %s:%s from %s' % (tid, taskhash, self.server)) 442 bb.debug(2, 'No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
448 except urllib.error.URLError as e: 443 except hashserv.HashConnectionError as e:
449 bb.warn('Failure contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 444 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
450 except (KeyError, json.JSONDecodeError) as e:
451 bb.warn('Poorly formatted response from %s: %s' % (self.server, str(e)))
452 445
453 self.unitaskhashes[key] = unihash 446 self.unitaskhashes[key] = unihash
454 return unihash 447 return unihash
455 448
456 def report_unihash(self, path, task, d): 449 def report_unihash(self, path, task, d):
457 import urllib
458 import json
459 import tempfile
460 import base64
461 import importlib 450 import importlib
462 451
463 taskhash = d.getVar('BB_TASKHASH') 452 taskhash = d.getVar('BB_TASKHASH')
@@ -492,42 +481,31 @@ class SignatureGeneratorUniHashMixIn(object):
492 outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs) 481 outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs)
493 482
494 try: 483 try:
495 url = '%s/v1/equivalent' % self.server 484 extra_data = {}
496 task_data = { 485
497 'taskhash': taskhash, 486 owner = d.getVar('SSTATE_HASHEQUIV_OWNER')
498 'method': self.method, 487 if owner:
499 'outhash': outhash, 488 extra_data['owner'] = owner
500 'unihash': unihash,
501 'owner': d.getVar('SSTATE_HASHEQUIV_OWNER')
502 }
503 489
504 if report_taskdata: 490 if report_taskdata:
505 sigfile.seek(0) 491 sigfile.seek(0)
506 492
507 task_data['PN'] = d.getVar('PN') 493 extra_data['PN'] = d.getVar('PN')
508 task_data['PV'] = d.getVar('PV') 494 extra_data['PV'] = d.getVar('PV')
509 task_data['PR'] = d.getVar('PR') 495 extra_data['PR'] = d.getVar('PR')
510 task_data['task'] = task 496 extra_data['task'] = task
511 task_data['outhash_siginfo'] = sigfile.read().decode('utf-8') 497 extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8')
512
513 headers = {'content-type': 'application/json'}
514
515 request = urllib.request.Request(url, json.dumps(task_data).encode('utf-8'), headers)
516 response = urllib.request.urlopen(request)
517 data = response.read().decode('utf-8')
518 498
519 json_data = json.loads(data) 499 data = self.client().report_unihash(taskhash, self.method, outhash, unihash, extra_data)
520 new_unihash = json_data['unihash'] 500 new_unihash = data['unihash']
521 501
522 if new_unihash != unihash: 502 if new_unihash != unihash:
523 bb.debug(1, 'Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server)) 503 bb.debug(1, 'Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
524 bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d) 504 bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d)
525 else: 505 else:
526 bb.debug(1, 'Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server)) 506 bb.debug(1, 'Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
527 except urllib.error.URLError as e: 507 except hashserv.HashConnectionError as e:
528 bb.warn('Failure contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 508 bb.warn('Error contacting Hash Equivalence Server %s: %s' % (self.server, str(e)))
529 except (KeyError, json.JSONDecodeError) as e:
530 bb.warn('Poorly formatted response from %s: %s' % (self.server, str(e)))
531 finally: 509 finally:
532 if sigfile: 510 if sigfile:
533 sigfile.close() 511 sigfile.close()
@@ -548,7 +526,7 @@ class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureG
548 name = "TestEquivHash" 526 name = "TestEquivHash"
549 def init_rundepcheck(self, data): 527 def init_rundepcheck(self, data):
550 super().init_rundepcheck(data) 528 super().init_rundepcheck(data)
551 self.server = "http://" + data.getVar('BB_HASHSERVE') 529 self.server = data.getVar('BB_HASHSERVE')
552 self.method = "sstate_output_hash" 530 self.method = "sstate_output_hash"
553 531
554 532