From bc22d82c2f0c16ee8ad2edeb3bcdfb31db0afa94 Mon Sep 17 00:00:00 2001 From: Mark Asselstine Date: Thu, 28 Dec 2023 16:01:17 -0500 Subject: bitbake: server/process: catch and expand multiprocessing connection exceptions Doing builds on systems with limited resources, or with high demand package builds such as chromium it isn't uncommon for the OOM Killer to be triggered and for bitbake-server to be selected as the process to be killed. When the bitbake-server does terminate unexpectedly due to the OOM Killer or otherwise, this currently results in a generic python traceback with little indication as to what has failed. Here we trap and raise the exceptions while extending the exception text in runCommand() to make it clear that this is most likely caused by the bitbake-server unexpectedly terminating. Callers of runCommand() should be updated to properly handle the BrokenPipeError and EOFError exceptions to avoid printing a python traceback, but even if they don't, the added text in the exceptions should provide some hints as to what might have caused the failure. (Bitbake rev: 5ff62b802f79acc86bbd6a99484f08501ff5dc2d) Signed-off-by: Mark Asselstine Signed-off-by: Richard Purdie --- bitbake/lib/bb/server/process.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'bitbake/lib/bb/server') diff --git a/bitbake/lib/bb/server/process.py b/bitbake/lib/bb/server/process.py index d495ac6245..6d77ce4786 100644 --- a/bitbake/lib/bb/server/process.py +++ b/bitbake/lib/bb/server/process.py @@ -500,12 +500,18 @@ class ServerCommunicator(): self.recv = recv def runCommand(self, command): - self.connection.send(command) + try: + self.connection.send(command) + except BrokenPipeError as e: + raise BrokenPipeError("bitbake-server might have died or been forcibly stopped, ie. OOM killed") from e if not self.recv.poll(30): logger.info("No reply from server in 30s (for command %s at %s)" % (command[0], currenttime())) if not self.recv.poll(30): raise ProcessTimeout("Timeout while waiting for a reply from the bitbake server (60s at %s)" % currenttime()) - ret, exc = self.recv.get() + try: + ret, exc = self.recv.get() + except EOFError as e: + raise EOFError("bitbake-server might have died or been forcibly stopped, ie. OOM killed") from e # Should probably turn all exceptions in exc back into exceptions? # For now, at least handle BBHandledException if exc and ("BBHandledException" in exc or "SystemExit" in exc): -- cgit v1.2.3-54-g00ecf