diff options
author | Richard Purdie <richard.purdie@linuxfoundation.org> | 2014-03-19 12:53:05 +0000 |
---|---|---|
committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2014-03-19 13:48:58 +0000 |
commit | ac4ff568f5b2ad4f50a1b7b60797e7b797c314bb (patch) | |
tree | d814ab087c7b583dec5e25e5bfb1b3bbd3963e39 | |
parent | ea52b5e21b71e0731ea0b8356770691b4b93d5be (diff) | |
download | poky-ac4ff568f5b2ad4f50a1b7b60797e7b797c314bb.tar.gz |
bitbake: runqueue: Revert child signal handler for now
We're running into processes using 100% cpu. It appears theses are locked in
a subprocess.poll() type loop where the process has exited but the code is
looping as its not handling the ECHILD error.
http://bugs.python.org/issue14396
http://bugs.python.org/issue15756
This is likely due to one or both of the above bugs. The question is what actually
grabbed the child exit code as it wasn't this code. Its likely there is therefore
some other code racing and taking that code, it may be some kind of race like:
http://hg.python.org/cpython/rev/767420808a62/
where the fix effectively catches the childs codes in a different part of the system.
We could try and get everyone onto python 2.7.4 where the above bugs are fixed however
for now its safer to admit defeat and go back to polling explictly for our worker exit
codes.
(Bitbake rev: 5b9a099ec2a1dc954b614e12a306595f55b6a99e)
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
-rw-r--r-- | bitbake/lib/bb/runqueue.py | 45 |
1 files changed, 17 insertions, 28 deletions
diff --git a/bitbake/lib/bb/runqueue.py b/bitbake/lib/bb/runqueue.py index fc6bec11f9..e62bb5232f 100644 --- a/bitbake/lib/bb/runqueue.py +++ b/bitbake/lib/bb/runqueue.py | |||
@@ -856,7 +856,6 @@ class RunQueue: | |||
856 | self.workerpipe = None | 856 | self.workerpipe = None |
857 | self.fakeworker = None | 857 | self.fakeworker = None |
858 | self.fakeworkerpipe = None | 858 | self.fakeworkerpipe = None |
859 | self.oldsigchld = None | ||
860 | 859 | ||
861 | def _start_worker(self, fakeroot = False, rqexec = None): | 860 | def _start_worker(self, fakeroot = False, rqexec = None): |
862 | logger.debug(1, "Starting bitbake-worker") | 861 | logger.debug(1, "Starting bitbake-worker") |
@@ -913,34 +912,10 @@ class RunQueue: | |||
913 | continue | 912 | continue |
914 | workerpipe.close() | 913 | workerpipe.close() |
915 | 914 | ||
916 | def sigchild_exception(self, *args, **kwargs): | ||
917 | for w in [self.worker, self.fakeworker]: | ||
918 | if not w: | ||
919 | continue | ||
920 | try: | ||
921 | pid, status = os.waitpid(w.pid, os.WNOHANG) | ||
922 | if pid != 0 and not self.teardown: | ||
923 | name = None | ||
924 | if self.worker and pid == self.worker.pid: | ||
925 | name = "Worker" | ||
926 | elif self.fakeworker and pid == self.fakeworker.pid: | ||
927 | name = "Fakeroot" | ||
928 | else: | ||
929 | bb.warn("Unknown process (%s) exited unexpectedly (%s), shutting down..." % (pid, str(status))) | ||
930 | if name and not self.teardown: | ||
931 | bb.error("%s process (%s) exited unexpectedly (%s), shutting down..." % (name, pid, str(status))) | ||
932 | self.finish_runqueue(True) | ||
933 | except OSError: | ||
934 | return | ||
935 | |||
936 | def start_worker(self): | 915 | def start_worker(self): |
937 | if self.worker: | 916 | if self.worker: |
938 | self.teardown_workers() | 917 | self.teardown_workers() |
939 | self.teardown = False | 918 | self.teardown = False |
940 | if self.oldsigchld is None: | ||
941 | self.oldsigchld = signal.signal(signal.SIGCHLD, self.sigchild_exception) | ||
942 | if self.oldsigchld is None: | ||
943 | self.oldsigchld = signal.SIG_DFL | ||
944 | self.worker, self.workerpipe = self._start_worker() | 919 | self.worker, self.workerpipe = self._start_worker() |
945 | 920 | ||
946 | def start_fakeworker(self, rqexec): | 921 | def start_fakeworker(self, rqexec): |
@@ -949,9 +924,6 @@ class RunQueue: | |||
949 | 924 | ||
950 | def teardown_workers(self): | 925 | def teardown_workers(self): |
951 | self.teardown = True | 926 | self.teardown = True |
952 | if self.oldsigchld is not None: | ||
953 | signal.signal(signal.SIGCHLD, self.oldsigchld) | ||
954 | self.oldsigchld = None | ||
955 | self._teardown_worker(self.worker, self.workerpipe) | 927 | self._teardown_worker(self.worker, self.workerpipe) |
956 | self.worker = None | 928 | self.worker = None |
957 | self.workerpipe = None | 929 | self.workerpipe = None |
@@ -2118,6 +2090,23 @@ class runQueuePipe(): | |||
2118 | self.rqexec = rqexec | 2090 | self.rqexec = rqexec |
2119 | 2091 | ||
2120 | def read(self): | 2092 | def read(self): |
2093 | try: | ||
2094 | for w in [self.rq.worker, self.rq.fakeworker]: | ||
2095 | if not w: | ||
2096 | continue | ||
2097 | pid, status = os.waitpid(w.pid, os.WNOHANG) | ||
2098 | if pid != 0 and not self.rq.teardown: | ||
2099 | if self.rq.worker and pid == self.rq.worker.pid: | ||
2100 | name = "Worker" | ||
2101 | elif self.rq.fakeworker and pid == self.rq.fakeworker.pid: | ||
2102 | name = "Fakeroot" | ||
2103 | else: | ||
2104 | name = "Unknown" | ||
2105 | bb.error("%s process (%s) exited unexpectedly (%s), shutting down..." % (name, pid, str(status))) | ||
2106 | self.rq.finish_runqueue(True) | ||
2107 | except OSError: | ||
2108 | pass | ||
2109 | |||
2121 | start = len(self.queue) | 2110 | start = len(self.queue) |
2122 | try: | 2111 | try: |
2123 | self.queue = self.queue + self.input.read(102400) | 2112 | self.queue = self.queue + self.input.read(102400) |