From a19687acd12497d727203e63d74b2703387f34a6 Mon Sep 17 00:00:00 2001 From: Richard Purdie Date: Wed, 4 Jan 2023 12:32:35 +0000 Subject: bitbake: lib/bb: Update thread/process locks to use a timeout The thread/process locks we use translate to futexes in Linux. If a process dies holding the lock, anything else trying to take the lock will hang indefinitely. An example would be the OOM killer taking out a parser process. To avoid bitbake processes just hanging indefinitely, add a timeout to our lock calls using a context manager. If we can't obtain the lock after waiting 5 minutes, hard exit out using os._exit(1). Use _exit() to avoid locking in any other places trying to write error messages to event handler queues (which also need locks). Whilst a bit harsh, this should mean we stop having lots of long running processes in cases where things are never going to work out and also avoids hanging builds on the autobuilder. (Bitbake rev: d2a3f662b0eed900fc012a392bfa0a365df0df9b) Signed-off-by: Richard Purdie --- bitbake/lib/bb/utils.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'bitbake/lib/bb/utils.py') diff --git a/bitbake/lib/bb/utils.py b/bitbake/lib/bb/utils.py index 0df522b372..8c79159573 100644 --- a/bitbake/lib/bb/utils.py +++ b/bitbake/lib/bb/utils.py @@ -1841,3 +1841,16 @@ def mkstemp(suffix=None, prefix=None, dir=None, text=False): else: prefix = tempfile.gettempprefix() + entropy return tempfile.mkstemp(suffix=suffix, prefix=prefix, dir=dir, text=text) + +# If we don't have a timeout of some kind and a process/thread exits badly (for example +# OOM killed) and held a lock, we'd just hang in the lock futex forever. It is better +# we exit at some point than hang. 5 minutes with no progress means we're probably deadlocked. +@contextmanager +def lock_timeout(lock): + held = lock.acquire(timeout=5*60) + try: + if not held: + os._exit(1) + yield held + finally: + lock.release() -- cgit v1.2.3-54-g00ecf