diff options
author | Richard Purdie <richard.purdie@linuxfoundation.org> | 2022-03-29 15:17:10 +0100 |
---|---|---|
committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2022-03-30 13:05:03 +0100 |
commit | 928bcb10a46939eaf801bea0b633e4624b5b5dfa (patch) | |
tree | 42f3bc730e5fd7f4306d7b090574e30343c85cee /bitbake/lib | |
parent | c0ff6c75eedea6e0f472b45456aed238073ac157 (diff) | |
download | poky-928bcb10a46939eaf801bea0b633e4624b5b5dfa.tar.gz |
bitbake: cooker/process: Fix signal handling lockups
If a parser process is terminated while holding a write lock, then it
will lead to a deadlock (see
https://docs.python.org/3/library/multiprocessing.html#multiprocessing.Process.terminate).
With SIGTERM, we don't want to terminate holding the lock. We also don't
want a SIGINT to cause a partial write to the event stream.
I tried using signal masks to avoid this but it doesn't work, see
https://bugs.python.org/issue47139
Instead, add a signal handler and catch the calls around the critical section.
We also need a thread lock to ensure other threads in the same process don't
handle the signal until all the threads are not in the lock.
(Bitbake rev: a40efaa5556a188dfe46c8d060adde37dc400dcd)
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'bitbake/lib')
-rw-r--r-- | bitbake/lib/bb/cooker.py | 28 | ||||
-rw-r--r-- | bitbake/lib/bb/server/process.py | 22 |
2 files changed, 46 insertions, 4 deletions
diff --git a/bitbake/lib/bb/cooker.py b/bitbake/lib/bb/cooker.py index d6fcd9e05c..7c0c5d4efa 100644 --- a/bitbake/lib/bb/cooker.py +++ b/bitbake/lib/bb/cooker.py | |||
@@ -2017,6 +2017,22 @@ class Parser(multiprocessing.Process): | |||
2017 | self.context = bb.utils.get_context().copy() | 2017 | self.context = bb.utils.get_context().copy() |
2018 | self.handlers = bb.event.get_class_handlers().copy() | 2018 | self.handlers = bb.event.get_class_handlers().copy() |
2019 | self.profile = profile | 2019 | self.profile = profile |
2020 | self.queue_signals = False | ||
2021 | self.signal_received = [] | ||
2022 | self.signal_threadlock = threading.Lock() | ||
2023 | |||
2024 | def catch_sig(self, signum, frame): | ||
2025 | if self.queue_signals: | ||
2026 | self.signal_received.append(signum) | ||
2027 | else: | ||
2028 | self.handle_sig(signum, frame) | ||
2029 | |||
2030 | def handle_sig(self, signum, frame): | ||
2031 | if signum == signal.SIGTERM: | ||
2032 | signal.signal(signal.SIGTERM, signal.SIG_DFL) | ||
2033 | os.kill(os.getpid(), signal.SIGTERM) | ||
2034 | elif signum == signal.SIGINT: | ||
2035 | signal.default_int_handler(signum, frame) | ||
2020 | 2036 | ||
2021 | def run(self): | 2037 | def run(self): |
2022 | 2038 | ||
@@ -2036,9 +2052,17 @@ class Parser(multiprocessing.Process): | |||
2036 | prof.dump_stats(logfile) | 2052 | prof.dump_stats(logfile) |
2037 | 2053 | ||
2038 | def realrun(self): | 2054 | def realrun(self): |
2039 | signal.signal(signal.SIGTERM, signal.SIG_DFL) | 2055 | # Signal handling here is hard. We must not terminate any process or thread holding the write |
2056 | # lock for the event stream as it will not be released, ever, and things will hang. | ||
2057 | # Python handles signals in the main thread/process but they can be raised from any thread and | ||
2058 | # we want to defer processing of any SIGTERM/SIGINT signal until we're outside the critical section | ||
2059 | # and don't hold the lock (see server/process.py). We therefore always catch the signals (so any | ||
2060 | # new thread should also do so) and we defer handling but we handle with the local thread lock | ||
2061 | # held (a threading lock, not a multiprocessing one) so that no other thread in the process | ||
2062 | # can be in the critical section. | ||
2063 | signal.signal(signal.SIGTERM, self.catch_sig) | ||
2040 | signal.signal(signal.SIGHUP, signal.SIG_DFL) | 2064 | signal.signal(signal.SIGHUP, signal.SIG_DFL) |
2041 | signal.signal(signal.SIGINT, signal.SIG_IGN) | 2065 | signal.signal(signal.SIGINT, self.catch_sig) |
2042 | bb.utils.set_process_name(multiprocessing.current_process().name) | 2066 | bb.utils.set_process_name(multiprocessing.current_process().name) |
2043 | multiprocessing.util.Finalize(None, bb.codeparser.parser_cache_save, exitpriority=1) | 2067 | multiprocessing.util.Finalize(None, bb.codeparser.parser_cache_save, exitpriority=1) |
2044 | multiprocessing.util.Finalize(None, bb.fetch.fetcher_parse_save, exitpriority=1) | 2068 | multiprocessing.util.Finalize(None, bb.fetch.fetcher_parse_save, exitpriority=1) |
diff --git a/bitbake/lib/bb/server/process.py b/bitbake/lib/bb/server/process.py index 7c587a9110..ce53fdc678 100644 --- a/bitbake/lib/bb/server/process.py +++ b/bitbake/lib/bb/server/process.py | |||
@@ -20,6 +20,7 @@ import os | |||
20 | import sys | 20 | import sys |
21 | import time | 21 | import time |
22 | import select | 22 | import select |
23 | import signal | ||
23 | import socket | 24 | import socket |
24 | import subprocess | 25 | import subprocess |
25 | import errno | 26 | import errno |
@@ -737,11 +738,28 @@ class ConnectionWriter(object): | |||
737 | # Why bb.event needs this I have no idea | 738 | # Why bb.event needs this I have no idea |
738 | self.event = self | 739 | self.event = self |
739 | 740 | ||
740 | def send(self, obj): | 741 | def _send(self, obj): |
741 | obj = multiprocessing.reduction.ForkingPickler.dumps(obj) | ||
742 | with self.wlock: | 742 | with self.wlock: |
743 | self.writer.send_bytes(obj) | 743 | self.writer.send_bytes(obj) |
744 | 744 | ||
745 | def send(self, obj): | ||
746 | obj = multiprocessing.reduction.ForkingPickler.dumps(obj) | ||
747 | # See notes/code in CookerParser | ||
748 | # We must not terminate holding this lock else processes will hang. | ||
749 | # For SIGTERM, raising afterwards avoids this. | ||
750 | # For SIGINT, we don't want to have written partial data to the pipe. | ||
751 | # pthread_sigmask block/unblock would be nice but doesn't work, https://bugs.python.org/issue47139 | ||
752 | process = multiprocessing.current_process() | ||
753 | if process and hasattr(process, "queue_signals"): | ||
754 | with process.signal_threadlock: | ||
755 | process.queue_signals = True | ||
756 | self._send(obj) | ||
757 | process.queue_signals = False | ||
758 | for sig in process.signal_received.pop(): | ||
759 | process.handle_sig(sig, None) | ||
760 | else: | ||
761 | self._send(obj) | ||
762 | |||
745 | def fileno(self): | 763 | def fileno(self): |
746 | return self.writer.fileno() | 764 | return self.writer.fileno() |
747 | 765 | ||