diff options
author | Richard Purdie <richard.purdie@linuxfoundation.org> | 2015-07-31 10:16:33 +0100 |
---|---|---|
committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2015-09-01 21:37:33 +0100 |
commit | 01c1167336e5e75077ceb46b6cc43981e09317f6 (patch) | |
tree | 5046459b1ac1ed3cbd3cee1f5effc0e22e2bb86f | |
parent | c1803b774a247522378edfd116bf44614352fdc4 (diff) | |
download | poky-01c1167336e5e75077ceb46b6cc43981e09317f6.tar.gz |
bitbake: bitbake: cooker: properly fix bitbake.lock handling
If the PR server or indeed any other child process takes some time to
exit (which it sometimes does when saving its database), it can end up
holding bitbake.lock after the UI exits, which led to errors if you ran
bitbake commands successively - we saw this when running the PR server
oe-selftest tests in OE-Core. The recent attempt to fix this wasn't
quite right and ended up breaking memory resident bitbake. This time we
close the lock file when cooker shuts down (inside the UI process)
instead of unlocking it, and this is done in the cooker code rather than
the actual UI code so it doesn't matter which UI is in use. Additionally
we report that we're waiting for the lock to be released, using lsof or
fuser if available to list the processes with the lock open.
The 'magic' in the locking is due to all spawned subprocesses of bitbake
holding an open file descriptor to the bitbake.lock. It is automatically
unlocked when all those fds close the file (as all the processes terminate).
We close the UI copy of the lock explicitly, then close the server process
copy, any remaining open copy is therefore some proess exiting.
(The reproducer for the problem is to set PRSERV_HOST = "localhost:0"
and add a call to time.sleep(20) after self.server_close() in
lib/prserv/serv.py, then run "bitbake -p; bitbake -p" ).
Cleanup work done by Paul Eggleton <paul.eggleton@linux.intel.com>.
This reverts bitbake commit 69ecd15aece54753154950c55d7af42f85ad8606 and
e97a9f1528d77503b5c93e48e3de9933fbb9f3cd.
(Bitbake rev: a29780bd43f74b7326fe788dbd65177b86806fcf)
(Bitbake rev: 830b8f31459ca484bdaf2caa8ff4b7cbf21c77ac)
Signed-off-by: Paul Eggleton <paul.eggleton@linux.intel.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Signed-off-by: Saul Wold <sgw@linux.intel.com>
Conflicts:
bitbake/lib/bb/cooker.py
bitbake/lib/bb/main.py
bitbake/lib/bb/tinfoil.py
bitbake/lib/bb/ui/knotty.py
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
-rwxr-xr-x | bitbake/bin/bitbake | 1 | ||||
-rw-r--r-- | bitbake/lib/bb/cooker.py | 29 | ||||
-rw-r--r-- | bitbake/lib/bb/tinfoil.py | 5 | ||||
-rw-r--r-- | bitbake/lib/bb/ui/knotty.py | 43 | ||||
-rw-r--r-- | bitbake/lib/bb/utils.py | 29 |
5 files changed, 84 insertions, 23 deletions
diff --git a/bitbake/bin/bitbake b/bitbake/bin/bitbake index a2e8cc13b0..32120e7392 100755 --- a/bitbake/bin/bitbake +++ b/bitbake/bin/bitbake | |||
@@ -263,6 +263,7 @@ def start_server(servermodule, configParams, configuration, features): | |||
263 | logger.handle(event) | 263 | logger.handle(event) |
264 | raise exc_info[1], None, exc_info[2] | 264 | raise exc_info[1], None, exc_info[2] |
265 | server.detach() | 265 | server.detach() |
266 | cooker.lock.close() | ||
266 | return server | 267 | return server |
267 | 268 | ||
268 | 269 | ||
diff --git a/bitbake/lib/bb/cooker.py b/bitbake/lib/bb/cooker.py index aeb3f71e2f..70cccefe0c 100644 --- a/bitbake/lib/bb/cooker.py +++ b/bitbake/lib/bb/cooker.py | |||
@@ -38,6 +38,8 @@ import bb, bb.exceptions, bb.command | |||
38 | from bb import utils, data, parse, event, cache, providers, taskdata, runqueue | 38 | from bb import utils, data, parse, event, cache, providers, taskdata, runqueue |
39 | import Queue | 39 | import Queue |
40 | import signal | 40 | import signal |
41 | import subprocess | ||
42 | import errno | ||
41 | import prserv.serv | 43 | import prserv.serv |
42 | import pyinotify | 44 | import pyinotify |
43 | 45 | ||
@@ -1442,6 +1444,33 @@ class BBCooker: | |||
1442 | def post_serve(self): | 1444 | def post_serve(self): |
1443 | prserv.serv.auto_shutdown(self.data) | 1445 | prserv.serv.auto_shutdown(self.data) |
1444 | bb.event.fire(CookerExit(), self.event_data) | 1446 | bb.event.fire(CookerExit(), self.event_data) |
1447 | lockfile = self.lock.name | ||
1448 | self.lock.close() | ||
1449 | self.lock = None | ||
1450 | |||
1451 | while not self.lock: | ||
1452 | with bb.utils.timeout(3): | ||
1453 | self.lock = bb.utils.lockfile(lockfile, shared=False, retry=False, block=True) | ||
1454 | if not self.lock: | ||
1455 | # Some systems may not have lsof available | ||
1456 | procs = None | ||
1457 | try: | ||
1458 | procs = subprocess.check_output(["lsof", '-w', lockfile], stderr=subprocess.STDOUT) | ||
1459 | except OSError as e: | ||
1460 | if e.errno != errno.ENOENT: | ||
1461 | raise | ||
1462 | if procs is None: | ||
1463 | # Fall back to fuser if lsof is unavailable | ||
1464 | try: | ||
1465 | procs = subprocess.check_output(["fuser", '-v', lockfile], stderr=subprocess.STDOUT) | ||
1466 | except OSError as e: | ||
1467 | if e.errno != errno.ENOENT: | ||
1468 | raise | ||
1469 | |||
1470 | msg = "Delaying shutdown due to active processes which appear to be holding bitbake.lock" | ||
1471 | if procs: | ||
1472 | msg += ":\n%s" % str(procs) | ||
1473 | print(msg) | ||
1445 | 1474 | ||
1446 | def shutdown(self, force = False): | 1475 | def shutdown(self, force = False): |
1447 | if force: | 1476 | if force: |
diff --git a/bitbake/lib/bb/tinfoil.py b/bitbake/lib/bb/tinfoil.py index 6bcbd47ab3..277131fcf1 100644 --- a/bitbake/lib/bb/tinfoil.py +++ b/bitbake/lib/bb/tinfoil.py | |||
@@ -84,6 +84,11 @@ class Tinfoil: | |||
84 | else: | 84 | else: |
85 | self.parseRecipes() | 85 | self.parseRecipes() |
86 | 86 | ||
87 | def shutdown(self): | ||
88 | self.cooker.shutdown(force=True) | ||
89 | self.cooker.post_serve() | ||
90 | self.cooker.unlockBitbake() | ||
91 | |||
87 | class TinfoilConfigParameters(ConfigParameters): | 92 | class TinfoilConfigParameters(ConfigParameters): |
88 | 93 | ||
89 | def __init__(self, **options): | 94 | def __init__(self, **options): |
diff --git a/bitbake/lib/bb/ui/knotty.py b/bitbake/lib/bb/ui/knotty.py index 9e58b31727..84664551df 100644 --- a/bitbake/lib/bb/ui/knotty.py +++ b/bitbake/lib/bb/ui/knotty.py | |||
@@ -536,24 +536,29 @@ def main(server, eventHandler, params, tf = TerminalFilter): | |||
536 | if not params.observe_only: | 536 | if not params.observe_only: |
537 | _, error = server.runCommand(["stateForceShutdown"]) | 537 | _, error = server.runCommand(["stateForceShutdown"]) |
538 | main.shutdown = 2 | 538 | main.shutdown = 2 |
539 | summary = "" | 539 | try: |
540 | if taskfailures: | 540 | summary = "" |
541 | summary += pluralise("\nSummary: %s task failed:", | 541 | if taskfailures: |
542 | "\nSummary: %s tasks failed:", len(taskfailures)) | 542 | summary += pluralise("\nSummary: %s task failed:", |
543 | for failure in taskfailures: | 543 | "\nSummary: %s tasks failed:", len(taskfailures)) |
544 | summary += "\n %s" % failure | 544 | for failure in taskfailures: |
545 | if warnings: | 545 | summary += "\n %s" % failure |
546 | summary += pluralise("\nSummary: There was %s WARNING message shown.", | 546 | if warnings: |
547 | "\nSummary: There were %s WARNING messages shown.", warnings) | 547 | summary += pluralise("\nSummary: There was %s WARNING message shown.", |
548 | if return_value and errors: | 548 | "\nSummary: There were %s WARNING messages shown.", warnings) |
549 | summary += pluralise("\nSummary: There was %s ERROR message shown, returning a non-zero exit code.", | 549 | if return_value and errors: |
550 | "\nSummary: There were %s ERROR messages shown, returning a non-zero exit code.", errors) | 550 | summary += pluralise("\nSummary: There was %s ERROR message shown, returning a non-zero exit code.", |
551 | if summary: | 551 | "\nSummary: There were %s ERROR messages shown, returning a non-zero exit code.", errors) |
552 | print(summary) | 552 | if summary: |
553 | 553 | print(summary) | |
554 | if interrupted: | 554 | |
555 | print("Execution was interrupted, returning a non-zero exit code.") | 555 | if interrupted: |
556 | if return_value == 0: | 556 | print("Execution was interrupted, returning a non-zero exit code.") |
557 | return_value = 1 | 557 | if return_value == 0: |
558 | return_value = 1 | ||
559 | except IOError as e: | ||
560 | import errno | ||
561 | if e.errno == errno.EPIPE: | ||
562 | pass | ||
558 | 563 | ||
559 | return return_value | 564 | return return_value |
diff --git a/bitbake/lib/bb/utils.py b/bitbake/lib/bb/utils.py index 2562db8e47..f217ae366c 100644 --- a/bitbake/lib/bb/utils.py +++ b/bitbake/lib/bb/utils.py | |||
@@ -31,6 +31,7 @@ import subprocess | |||
31 | import glob | 31 | import glob |
32 | import traceback | 32 | import traceback |
33 | import errno | 33 | import errno |
34 | import signal | ||
34 | from commands import getstatusoutput | 35 | from commands import getstatusoutput |
35 | from contextlib import contextmanager | 36 | from contextlib import contextmanager |
36 | 37 | ||
@@ -386,10 +387,30 @@ def fileslocked(files): | |||
386 | for lock in locks: | 387 | for lock in locks: |
387 | bb.utils.unlockfile(lock) | 388 | bb.utils.unlockfile(lock) |
388 | 389 | ||
389 | def lockfile(name, shared=False, retry=True): | 390 | @contextmanager |
391 | def timeout(seconds): | ||
392 | def timeout_handler(signum, frame): | ||
393 | pass | ||
394 | |||
395 | original_handler = signal.signal(signal.SIGALRM, timeout_handler) | ||
396 | |||
397 | try: | ||
398 | signal.alarm(seconds) | ||
399 | yield | ||
400 | finally: | ||
401 | signal.alarm(0) | ||
402 | signal.signal(signal.SIGALRM, original_handler) | ||
403 | |||
404 | def lockfile(name, shared=False, retry=True, block=False): | ||
390 | """ | 405 | """ |
391 | Use the file fn as a lock file, return when the lock has been acquired. | 406 | Use the specified file as a lock file, return when the lock has |
392 | Returns a variable to pass to unlockfile(). | 407 | been acquired. Returns a variable to pass to unlockfile(). |
408 | Parameters: | ||
409 | retry: True to re-try locking if it fails, False otherwise | ||
410 | block: True to block until the lock succeeds, False otherwise | ||
411 | The retry and block parameters are kind of equivalent unless you | ||
412 | consider the possibility of sending a signal to the process to break | ||
413 | out - at which point you want block=True rather than retry=True. | ||
393 | """ | 414 | """ |
394 | dirname = os.path.dirname(name) | 415 | dirname = os.path.dirname(name) |
395 | mkdirhier(dirname) | 416 | mkdirhier(dirname) |
@@ -402,7 +423,7 @@ def lockfile(name, shared=False, retry=True): | |||
402 | op = fcntl.LOCK_EX | 423 | op = fcntl.LOCK_EX |
403 | if shared: | 424 | if shared: |
404 | op = fcntl.LOCK_SH | 425 | op = fcntl.LOCK_SH |
405 | if not retry: | 426 | if not retry and not block: |
406 | op = op | fcntl.LOCK_NB | 427 | op = op | fcntl.LOCK_NB |
407 | 428 | ||
408 | while True: | 429 | while True: |