summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRichard Purdie <richard.purdie@linuxfoundation.org>2014-03-19 12:53:05 +0000
committerRichard Purdie <richard.purdie@linuxfoundation.org>2014-03-19 13:48:58 +0000
commitac4ff568f5b2ad4f50a1b7b60797e7b797c314bb (patch)
treed814ab087c7b583dec5e25e5bfb1b3bbd3963e39
parentea52b5e21b71e0731ea0b8356770691b4b93d5be (diff)
downloadpoky-ac4ff568f5b2ad4f50a1b7b60797e7b797c314bb.tar.gz
bitbake: runqueue: Revert child signal handler for now
We're running into processes using 100% cpu. It appears theses are locked in a subprocess.poll() type loop where the process has exited but the code is looping as its not handling the ECHILD error. http://bugs.python.org/issue14396 http://bugs.python.org/issue15756 This is likely due to one or both of the above bugs. The question is what actually grabbed the child exit code as it wasn't this code. Its likely there is therefore some other code racing and taking that code, it may be some kind of race like: http://hg.python.org/cpython/rev/767420808a62/ where the fix effectively catches the childs codes in a different part of the system. We could try and get everyone onto python 2.7.4 where the above bugs are fixed however for now its safer to admit defeat and go back to polling explictly for our worker exit codes. (Bitbake rev: 5b9a099ec2a1dc954b614e12a306595f55b6a99e) Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
-rw-r--r--bitbake/lib/bb/runqueue.py45
1 files changed, 17 insertions, 28 deletions
diff --git a/bitbake/lib/bb/runqueue.py b/bitbake/lib/bb/runqueue.py
index fc6bec11f9..e62bb5232f 100644
--- a/bitbake/lib/bb/runqueue.py
+++ b/bitbake/lib/bb/runqueue.py
@@ -856,7 +856,6 @@ class RunQueue:
856 self.workerpipe = None 856 self.workerpipe = None
857 self.fakeworker = None 857 self.fakeworker = None
858 self.fakeworkerpipe = None 858 self.fakeworkerpipe = None
859 self.oldsigchld = None
860 859
861 def _start_worker(self, fakeroot = False, rqexec = None): 860 def _start_worker(self, fakeroot = False, rqexec = None):
862 logger.debug(1, "Starting bitbake-worker") 861 logger.debug(1, "Starting bitbake-worker")
@@ -913,34 +912,10 @@ class RunQueue:
913 continue 912 continue
914 workerpipe.close() 913 workerpipe.close()
915 914
916 def sigchild_exception(self, *args, **kwargs):
917 for w in [self.worker, self.fakeworker]:
918 if not w:
919 continue
920 try:
921 pid, status = os.waitpid(w.pid, os.WNOHANG)
922 if pid != 0 and not self.teardown:
923 name = None
924 if self.worker and pid == self.worker.pid:
925 name = "Worker"
926 elif self.fakeworker and pid == self.fakeworker.pid:
927 name = "Fakeroot"
928 else:
929 bb.warn("Unknown process (%s) exited unexpectedly (%s), shutting down..." % (pid, str(status)))
930 if name and not self.teardown:
931 bb.error("%s process (%s) exited unexpectedly (%s), shutting down..." % (name, pid, str(status)))
932 self.finish_runqueue(True)
933 except OSError:
934 return
935
936 def start_worker(self): 915 def start_worker(self):
937 if self.worker: 916 if self.worker:
938 self.teardown_workers() 917 self.teardown_workers()
939 self.teardown = False 918 self.teardown = False
940 if self.oldsigchld is None:
941 self.oldsigchld = signal.signal(signal.SIGCHLD, self.sigchild_exception)
942 if self.oldsigchld is None:
943 self.oldsigchld = signal.SIG_DFL
944 self.worker, self.workerpipe = self._start_worker() 919 self.worker, self.workerpipe = self._start_worker()
945 920
946 def start_fakeworker(self, rqexec): 921 def start_fakeworker(self, rqexec):
@@ -949,9 +924,6 @@ class RunQueue:
949 924
950 def teardown_workers(self): 925 def teardown_workers(self):
951 self.teardown = True 926 self.teardown = True
952 if self.oldsigchld is not None:
953 signal.signal(signal.SIGCHLD, self.oldsigchld)
954 self.oldsigchld = None
955 self._teardown_worker(self.worker, self.workerpipe) 927 self._teardown_worker(self.worker, self.workerpipe)
956 self.worker = None 928 self.worker = None
957 self.workerpipe = None 929 self.workerpipe = None
@@ -2118,6 +2090,23 @@ class runQueuePipe():
2118 self.rqexec = rqexec 2090 self.rqexec = rqexec
2119 2091
2120 def read(self): 2092 def read(self):
2093 try:
2094 for w in [self.rq.worker, self.rq.fakeworker]:
2095 if not w:
2096 continue
2097 pid, status = os.waitpid(w.pid, os.WNOHANG)
2098 if pid != 0 and not self.rq.teardown:
2099 if self.rq.worker and pid == self.rq.worker.pid:
2100 name = "Worker"
2101 elif self.rq.fakeworker and pid == self.rq.fakeworker.pid:
2102 name = "Fakeroot"
2103 else:
2104 name = "Unknown"
2105 bb.error("%s process (%s) exited unexpectedly (%s), shutting down..." % (name, pid, str(status)))
2106 self.rq.finish_runqueue(True)
2107 except OSError:
2108 pass
2109
2121 start = len(self.queue) 2110 start = len(self.queue)
2122 try: 2111 try:
2123 self.queue = self.queue + self.input.read(102400) 2112 self.queue = self.queue + self.input.read(102400)