summaryrefslogtreecommitdiffstats
path: root/bitbake/lib/bb
diff options
context:
space:
mode:
authorJoshua Watt <jpewhacker@gmail.com>2019-09-17 08:37:11 -0500
committerRichard Purdie <richard.purdie@linuxfoundation.org>2019-09-18 11:52:03 +0100
commit19b60d0e7a01df5b435c7fb17211ad64369acc54 (patch)
tree5d8b57cd47df8aa6da76208080012ffe453a4ac0 /bitbake/lib/bb
parent55bb60614972659ba6f4947ff40749a3b2e38d1a (diff)
downloadpoky-19b60d0e7a01df5b435c7fb17211ad64369acc54.tar.gz
bitbake: bitbake: Rework hash equivalence
Reworks the hash equivalence server to address performance issues that were encountered with the REST mechanism used previously, particularly during the heavy request load encountered during signature generation. Notable changes are: 1) The server protocol is no longer HTTP based. Instead, it uses a simpler JSON over a streaming protocol link. This protocol has much lower overhead than HTTP since it eliminates the HTTP headers. 2) The hash equivalence server can either bind to a TCP port, or a Unix domain socket. Unix domain sockets are more efficient for local communication, and so are preferred if the user enables hash equivalence only for the local build. The arguments to the 'bitbake-hashserve' command have been updated accordingly. 3) The value to which BB_HASHSERVE should be set to enable a local hash equivalence server is changed to "auto" instead of "localhost:0". The latter didn't make sense when the local server was using a Unix domain socket. 4) Clients are expected to keep a persistent connection to the server instead of creating a new connection each time a request is made for optimal performance. 5) Most of the client logic has been moved to the hashserve module in bitbake. This makes it easier to share the client code. 6) A new bitbake command has been added called 'bitbake-hashclient'. This command can be used to query a hash equivalence server, including fetching the statistics and running a performance stress test. 7) The table indexes in the SQLite database have been updated to optimize hash lookups. This change is backward compatible, as the database will delete the old indexes first if they exist. 8) The server has been reworked to use python async to maximize performance with persistently connected clients. This requires Python 3.5 or later. (Bitbake rev: 1f404bd23335f6c5f6ca944c5be0b838ffb76c4d) Signed-off-by: Joshua Watt <JPEWhacker@gmail.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'bitbake/lib/bb')
-rw-r--r--bitbake/lib/bb/cooker.py17
-rw-r--r--bitbake/lib/bb/runqueue.py4
-rw-r--r--bitbake/lib/bb/siggen.py74
-rw-r--r--bitbake/lib/bb/tests/runqueue.py19
4 files changed, 50 insertions, 64 deletions
diff --git a/bitbake/lib/bb/cooker.py b/bitbake/lib/bb/cooker.py
index e46868ddd0..0c540028ae 100644
--- a/bitbake/lib/bb/cooker.py
+++ b/bitbake/lib/bb/cooker.py
@@ -194,7 +194,7 @@ class BBCooker:
194 194
195 self.ui_cmdline = None 195 self.ui_cmdline = None
196 self.hashserv = None 196 self.hashserv = None
197 self.hashservport = None 197 self.hashservaddr = None
198 198
199 self.initConfigurationData() 199 self.initConfigurationData()
200 200
@@ -392,19 +392,20 @@ class BBCooker:
392 except prserv.serv.PRServiceConfigError as e: 392 except prserv.serv.PRServiceConfigError as e:
393 bb.fatal("Unable to start PR Server, exitting") 393 bb.fatal("Unable to start PR Server, exitting")
394 394
395 if self.data.getVar("BB_HASHSERVE") == "localhost:0": 395 if self.data.getVar("BB_HASHSERVE") == "auto":
396 # Create a new hash server bound to a unix domain socket
396 if not self.hashserv: 397 if not self.hashserv:
397 dbfile = (self.data.getVar("PERSISTENT_DIR") or self.data.getVar("CACHE")) + "/hashserv.db" 398 dbfile = (self.data.getVar("PERSISTENT_DIR") or self.data.getVar("CACHE")) + "/hashserv.db"
398 self.hashserv = hashserv.create_server(('localhost', 0), dbfile, '') 399 self.hashservaddr = "unix://%s/hashserve.sock" % self.data.getVar("TOPDIR")
399 self.hashservport = "localhost:" + str(self.hashserv.server_port) 400 self.hashserv = hashserv.create_server(self.hashservaddr, dbfile, sync=False)
400 self.hashserv.process = multiprocessing.Process(target=self.hashserv.serve_forever) 401 self.hashserv.process = multiprocessing.Process(target=self.hashserv.serve_forever)
401 self.hashserv.process.daemon = True 402 self.hashserv.process.daemon = True
402 self.hashserv.process.start() 403 self.hashserv.process.start()
403 self.data.setVar("BB_HASHSERVE", self.hashservport) 404 self.data.setVar("BB_HASHSERVE", self.hashservaddr)
404 self.databuilder.origdata.setVar("BB_HASHSERVE", self.hashservport) 405 self.databuilder.origdata.setVar("BB_HASHSERVE", self.hashservaddr)
405 self.databuilder.data.setVar("BB_HASHSERVE", self.hashservport) 406 self.databuilder.data.setVar("BB_HASHSERVE", self.hashservaddr)
406 for mc in self.databuilder.mcdata: 407 for mc in self.databuilder.mcdata:
407 self.databuilder.mcdata[mc].setVar("BB_HASHSERVE", self.hashservport) 408 self.databuilder.mcdata[mc].setVar("BB_HASHSERVE", self.hashservaddr)
408 409
409 bb.parse.init_parser(self.data) 410 bb.parse.init_parser(self.data)
410 411
diff --git a/bitbake/lib/bb/runqueue.py b/bitbake/lib/bb/runqueue.py
index 45bfec8c37..314a30908b 100644
--- a/bitbake/lib/bb/runqueue.py
+++ b/bitbake/lib/bb/runqueue.py
@@ -1260,7 +1260,7 @@ class RunQueue:
1260 "buildname" : self.cfgData.getVar("BUILDNAME"), 1260 "buildname" : self.cfgData.getVar("BUILDNAME"),
1261 "date" : self.cfgData.getVar("DATE"), 1261 "date" : self.cfgData.getVar("DATE"),
1262 "time" : self.cfgData.getVar("TIME"), 1262 "time" : self.cfgData.getVar("TIME"),
1263 "hashservport" : self.cooker.hashservport, 1263 "hashservaddr" : self.cooker.hashservaddr,
1264 } 1264 }
1265 1265
1266 worker.stdin.write(b"<cookerconfig>" + pickle.dumps(self.cooker.configuration) + b"</cookerconfig>") 1266 worker.stdin.write(b"<cookerconfig>" + pickle.dumps(self.cooker.configuration) + b"</cookerconfig>")
@@ -2174,7 +2174,7 @@ class RunQueueExecute:
2174 ret.add(dep) 2174 ret.add(dep)
2175 return ret 2175 return ret
2176 2176
2177 # We filter out multiconfig dependencies from taskdepdata we pass to the tasks 2177 # We filter out multiconfig dependencies from taskdepdata we pass to the tasks
2178 # as most code can't handle them 2178 # as most code can't handle them
2179 def build_taskdepdata(self, task): 2179 def build_taskdepdata(self, task):
2180 taskdepdata = {} 2180 taskdepdata = {}
diff --git a/bitbake/lib/bb/siggen.py b/bitbake/lib/bb/siggen.py
index 8b593a348b..0d1d3425e8 100644
--- a/bitbake/lib/bb/siggen.py
+++ b/bitbake/lib/bb/siggen.py
@@ -13,6 +13,7 @@ import difflib
13import simplediff 13import simplediff
14from bb.checksum import FileChecksumCache 14from bb.checksum import FileChecksumCache
15from bb import runqueue 15from bb import runqueue
16import hashserv
16 17
17logger = logging.getLogger('BitBake.SigGen') 18logger = logging.getLogger('BitBake.SigGen')
18 19
@@ -375,6 +376,11 @@ class SignatureGeneratorUniHashMixIn(object):
375 self.server, self.method = data[:2] 376 self.server, self.method = data[:2]
376 super().set_taskdata(data[2:]) 377 super().set_taskdata(data[2:])
377 378
379 def client(self):
380 if getattr(self, '_client', None) is None:
381 self._client = hashserv.create_client(self.server)
382 return self._client
383
378 def __get_task_unihash_key(self, tid): 384 def __get_task_unihash_key(self, tid):
379 # TODO: The key only *needs* to be the taskhash, the tid is just 385 # TODO: The key only *needs* to be the taskhash, the tid is just
380 # convenient 386 # convenient
@@ -395,9 +401,6 @@ class SignatureGeneratorUniHashMixIn(object):
395 self.unitaskhashes[self.__get_task_unihash_key(tid)] = unihash 401 self.unitaskhashes[self.__get_task_unihash_key(tid)] = unihash
396 402
397 def get_unihash(self, tid): 403 def get_unihash(self, tid):
398 import urllib
399 import json
400
401 taskhash = self.taskhash[tid] 404 taskhash = self.taskhash[tid]
402 405
403 # If its not a setscene task we can return 406 # If its not a setscene task we can return
@@ -428,36 +431,22 @@ class SignatureGeneratorUniHashMixIn(object):
428 unihash = taskhash 431 unihash = taskhash
429 432
430 try: 433 try:
431 url = '%s/v1/equivalent?%s' % (self.server, 434 data = self.client().get_unihash(self.method, self.taskhash[tid])
432 urllib.parse.urlencode({'method': self.method, 'taskhash': self.taskhash[tid]})) 435 if data:
433 436 unihash = data
434 request = urllib.request.Request(url)
435 response = urllib.request.urlopen(request)
436 data = response.read().decode('utf-8')
437
438 json_data = json.loads(data)
439
440 if json_data:
441 unihash = json_data['unihash']
442 # A unique hash equal to the taskhash is not very interesting, 437 # A unique hash equal to the taskhash is not very interesting,
443 # so it is reported it at debug level 2. If they differ, that 438 # so it is reported it at debug level 2. If they differ, that
444 # is much more interesting, so it is reported at debug level 1 439 # is much more interesting, so it is reported at debug level 1
445 bb.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server)) 440 bb.debug((1, 2)[unihash == taskhash], 'Found unihash %s in place of %s for %s from %s' % (unihash, taskhash, tid, self.server))
446 else: 441 else:
447 bb.debug(2, 'No reported unihash for %s:%s from %s' % (tid, taskhash, self.server)) 442 bb.debug(2, 'No reported unihash for %s:%s from %s' % (tid, taskhash, self.server))
448 except urllib.error.URLError as e: 443 except hashserv.HashConnectionError as e:
449 bb.warn('Failure contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 444 bb.warn('Error contacting Hash Equivalence Server %s: %s' (self.server, str(e)))
450 except (KeyError, json.JSONDecodeError) as e:
451 bb.warn('Poorly formatted response from %s: %s' % (self.server, str(e)))
452 445
453 self.unitaskhashes[key] = unihash 446 self.unitaskhashes[key] = unihash
454 return unihash 447 return unihash
455 448
456 def report_unihash(self, path, task, d): 449 def report_unihash(self, path, task, d):
457 import urllib
458 import json
459 import tempfile
460 import base64
461 import importlib 450 import importlib
462 451
463 taskhash = d.getVar('BB_TASKHASH') 452 taskhash = d.getVar('BB_TASKHASH')
@@ -492,42 +481,31 @@ class SignatureGeneratorUniHashMixIn(object):
492 outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs) 481 outhash = bb.utils.better_eval(self.method + '(path, sigfile, task, d)', locs)
493 482
494 try: 483 try:
495 url = '%s/v1/equivalent' % self.server 484 extra_data = {}
496 task_data = { 485
497 'taskhash': taskhash, 486 owner = d.getVar('SSTATE_HASHEQUIV_OWNER')
498 'method': self.method, 487 if owner:
499 'outhash': outhash, 488 extra_data['owner'] = owner
500 'unihash': unihash,
501 'owner': d.getVar('SSTATE_HASHEQUIV_OWNER')
502 }
503 489
504 if report_taskdata: 490 if report_taskdata:
505 sigfile.seek(0) 491 sigfile.seek(0)
506 492
507 task_data['PN'] = d.getVar('PN') 493 extra_data['PN'] = d.getVar('PN')
508 task_data['PV'] = d.getVar('PV') 494 extra_data['PV'] = d.getVar('PV')
509 task_data['PR'] = d.getVar('PR') 495 extra_data['PR'] = d.getVar('PR')
510 task_data['task'] = task 496 extra_data['task'] = task
511 task_data['outhash_siginfo'] = sigfile.read().decode('utf-8') 497 extra_data['outhash_siginfo'] = sigfile.read().decode('utf-8')
512
513 headers = {'content-type': 'application/json'}
514
515 request = urllib.request.Request(url, json.dumps(task_data).encode('utf-8'), headers)
516 response = urllib.request.urlopen(request)
517 data = response.read().decode('utf-8')
518 498
519 json_data = json.loads(data) 499 data = self.client().report_unihash(taskhash, self.method, outhash, unihash, extra_data)
520 new_unihash = json_data['unihash'] 500 new_unihash = data['unihash']
521 501
522 if new_unihash != unihash: 502 if new_unihash != unihash:
523 bb.debug(1, 'Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server)) 503 bb.debug(1, 'Task %s unihash changed %s -> %s by server %s' % (taskhash, unihash, new_unihash, self.server))
524 bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d) 504 bb.event.fire(bb.runqueue.taskUniHashUpdate(fn + ':do_' + task, new_unihash), d)
525 else: 505 else:
526 bb.debug(1, 'Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server)) 506 bb.debug(1, 'Reported task %s as unihash %s to %s' % (taskhash, unihash, self.server))
527 except urllib.error.URLError as e: 507 except hashserv.HashConnectionError as e:
528 bb.warn('Failure contacting Hash Equivalence Server %s: %s' % (self.server, str(e))) 508 bb.warn('Error contacting Hash Equivalence Server %s: %s' (self.server, str(e)))
529 except (KeyError, json.JSONDecodeError) as e:
530 bb.warn('Poorly formatted response from %s: %s' % (self.server, str(e)))
531 finally: 509 finally:
532 if sigfile: 510 if sigfile:
533 sigfile.close() 511 sigfile.close()
@@ -548,7 +526,7 @@ class SignatureGeneratorTestEquivHash(SignatureGeneratorUniHashMixIn, SignatureG
548 name = "TestEquivHash" 526 name = "TestEquivHash"
549 def init_rundepcheck(self, data): 527 def init_rundepcheck(self, data):
550 super().init_rundepcheck(data) 528 super().init_rundepcheck(data)
551 self.server = "http://" + data.getVar('BB_HASHSERVE') 529 self.server = data.getVar('BB_HASHSERVE')
552 self.method = "sstate_output_hash" 530 self.method = "sstate_output_hash"
553 531
554 532
diff --git a/bitbake/lib/bb/tests/runqueue.py b/bitbake/lib/bb/tests/runqueue.py
index c7f5e55726..cb4d526f13 100644
--- a/bitbake/lib/bb/tests/runqueue.py
+++ b/bitbake/lib/bb/tests/runqueue.py
@@ -11,6 +11,7 @@ import bb
11import os 11import os
12import tempfile 12import tempfile
13import subprocess 13import subprocess
14import sys
14 15
15# 16#
16# TODO: 17# TODO:
@@ -232,10 +233,11 @@ class RunQueueTests(unittest.TestCase):
232 self.assertEqual(set(tasks), set(expected)) 233 self.assertEqual(set(tasks), set(expected))
233 234
234 235
236 @unittest.skipIf(sys.version_info < (3, 5, 0), 'Python 3.5 or later required')
235 def test_hashserv_single(self): 237 def test_hashserv_single(self):
236 with tempfile.TemporaryDirectory(prefix="runqueuetest") as tempdir: 238 with tempfile.TemporaryDirectory(prefix="runqueuetest") as tempdir:
237 extraenv = { 239 extraenv = {
238 "BB_HASHSERVE" : "localhost:0", 240 "BB_HASHSERVE" : "auto",
239 "BB_SIGNATURE_HANDLER" : "TestEquivHash" 241 "BB_SIGNATURE_HANDLER" : "TestEquivHash"
240 } 242 }
241 cmd = ["bitbake", "a1", "b1"] 243 cmd = ["bitbake", "a1", "b1"]
@@ -255,10 +257,11 @@ class RunQueueTests(unittest.TestCase):
255 'a1:package_write_ipk_setscene', 'a1:package_qa_setscene'] 257 'a1:package_write_ipk_setscene', 'a1:package_qa_setscene']
256 self.assertEqual(set(tasks), set(expected)) 258 self.assertEqual(set(tasks), set(expected))
257 259
260 @unittest.skipIf(sys.version_info < (3, 5, 0), 'Python 3.5 or later required')
258 def test_hashserv_double(self): 261 def test_hashserv_double(self):
259 with tempfile.TemporaryDirectory(prefix="runqueuetest") as tempdir: 262 with tempfile.TemporaryDirectory(prefix="runqueuetest") as tempdir:
260 extraenv = { 263 extraenv = {
261 "BB_HASHSERVE" : "localhost:0", 264 "BB_HASHSERVE" : "auto",
262 "BB_SIGNATURE_HANDLER" : "TestEquivHash" 265 "BB_SIGNATURE_HANDLER" : "TestEquivHash"
263 } 266 }
264 cmd = ["bitbake", "a1", "b1", "e1"] 267 cmd = ["bitbake", "a1", "b1", "e1"]
@@ -278,11 +281,12 @@ class RunQueueTests(unittest.TestCase):
278 self.assertEqual(set(tasks), set(expected)) 281 self.assertEqual(set(tasks), set(expected))
279 282
280 283
284 @unittest.skipIf(sys.version_info < (3, 5, 0), 'Python 3.5 or later required')
281 def test_hashserv_multiple_setscene(self): 285 def test_hashserv_multiple_setscene(self):
282 # Runs e1:do_package_setscene twice 286 # Runs e1:do_package_setscene twice
283 with tempfile.TemporaryDirectory(prefix="runqueuetest") as tempdir: 287 with tempfile.TemporaryDirectory(prefix="runqueuetest") as tempdir:
284 extraenv = { 288 extraenv = {
285 "BB_HASHSERVE" : "localhost:0", 289 "BB_HASHSERVE" : "auto",
286 "BB_SIGNATURE_HANDLER" : "TestEquivHash" 290 "BB_SIGNATURE_HANDLER" : "TestEquivHash"
287 } 291 }
288 cmd = ["bitbake", "a1", "b1", "e1"] 292 cmd = ["bitbake", "a1", "b1", "e1"]
@@ -308,11 +312,12 @@ class RunQueueTests(unittest.TestCase):
308 else: 312 else:
309 self.assertEqual(tasks.count(i), 1, "%s not in task list once" % i) 313 self.assertEqual(tasks.count(i), 1, "%s not in task list once" % i)
310 314
315 @unittest.skipIf(sys.version_info < (3, 5, 0), 'Python 3.5 or later required')
311 def test_hashserv_partial_match(self): 316 def test_hashserv_partial_match(self):
312 # e1:do_package matches initial built but not second hash value 317 # e1:do_package matches initial built but not second hash value
313 with tempfile.TemporaryDirectory(prefix="runqueuetest") as tempdir: 318 with tempfile.TemporaryDirectory(prefix="runqueuetest") as tempdir:
314 extraenv = { 319 extraenv = {
315 "BB_HASHSERVE" : "localhost:0", 320 "BB_HASHSERVE" : "auto",
316 "BB_SIGNATURE_HANDLER" : "TestEquivHash" 321 "BB_SIGNATURE_HANDLER" : "TestEquivHash"
317 } 322 }
318 cmd = ["bitbake", "a1", "b1"] 323 cmd = ["bitbake", "a1", "b1"]
@@ -336,11 +341,12 @@ class RunQueueTests(unittest.TestCase):
336 expected.remove('e1:package') 341 expected.remove('e1:package')
337 self.assertEqual(set(tasks), set(expected)) 342 self.assertEqual(set(tasks), set(expected))
338 343
344 @unittest.skipIf(sys.version_info < (3, 5, 0), 'Python 3.5 or later required')
339 def test_hashserv_partial_match2(self): 345 def test_hashserv_partial_match2(self):
340 # e1:do_package + e1:do_populate_sysroot matches initial built but not second hash value 346 # e1:do_package + e1:do_populate_sysroot matches initial built but not second hash value
341 with tempfile.TemporaryDirectory(prefix="runqueuetest") as tempdir: 347 with tempfile.TemporaryDirectory(prefix="runqueuetest") as tempdir:
342 extraenv = { 348 extraenv = {
343 "BB_HASHSERVE" : "localhost:0", 349 "BB_HASHSERVE" : "auto",
344 "BB_SIGNATURE_HANDLER" : "TestEquivHash" 350 "BB_SIGNATURE_HANDLER" : "TestEquivHash"
345 } 351 }
346 cmd = ["bitbake", "a1", "b1"] 352 cmd = ["bitbake", "a1", "b1"]
@@ -363,13 +369,14 @@ class RunQueueTests(unittest.TestCase):
363 'e1:package_setscene', 'e1:populate_sysroot_setscene', 'e1:build', 'e1:package_qa', 'e1:package_write_rpm', 'e1:package_write_ipk', 'e1:packagedata'] 369 'e1:package_setscene', 'e1:populate_sysroot_setscene', 'e1:build', 'e1:package_qa', 'e1:package_write_rpm', 'e1:package_write_ipk', 'e1:packagedata']
364 self.assertEqual(set(tasks), set(expected)) 370 self.assertEqual(set(tasks), set(expected))
365 371
372 @unittest.skipIf(sys.version_info < (3, 5, 0), 'Python 3.5 or later required')
366 def test_hashserv_partial_match3(self): 373 def test_hashserv_partial_match3(self):
367 # e1:do_package is valid for a1 but not after b1 374 # e1:do_package is valid for a1 but not after b1
368 # In former buggy code, this triggered e1:do_fetch, then e1:do_populate_sysroot to run 375 # In former buggy code, this triggered e1:do_fetch, then e1:do_populate_sysroot to run
369 # with none of the intermediate tasks which is a serious bug 376 # with none of the intermediate tasks which is a serious bug
370 with tempfile.TemporaryDirectory(prefix="runqueuetest") as tempdir: 377 with tempfile.TemporaryDirectory(prefix="runqueuetest") as tempdir:
371 extraenv = { 378 extraenv = {
372 "BB_HASHSERVE" : "localhost:0", 379 "BB_HASHSERVE" : "auto",
373 "BB_SIGNATURE_HANDLER" : "TestEquivHash" 380 "BB_SIGNATURE_HANDLER" : "TestEquivHash"
374 } 381 }
375 cmd = ["bitbake", "a1", "b1"] 382 cmd = ["bitbake", "a1", "b1"]