summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAryaman Gupta <aryaman.gupta@windriver.com>2022-07-21 17:00:03 -0400
committerRichard Purdie <richard.purdie@linuxfoundation.org>2022-07-28 11:55:06 +0100
commit48a6d84de10777335f70cfb5836cd1db0f2ff257 (patch)
tree117007d3fa8d7ed11eb40a6ab4152ff75cfa4ca1
parentace415e6221cd9b60da6f476d9619214ec3c5c23 (diff)
downloadpoky-48a6d84de10777335f70cfb5836cd1db0f2ff257.tar.gz
bitbake: runqueue: add cpu/io pressure regulation
Prevent the scheduler from starting new tasks if the current cpu or io pressure is above a certain threshold and there is at least one active task. This threshold can be specified through the "BB_PRESSURE_MAX_{CPU|IO}" variables in conf/local.conf. The threshold represents the difference in "total" pressure from the previous second. The pressure data is discussed in this oe-core commit: 061931520b buildstats.py: enable collection of /proc/pressure data where one can see that the average and "total" values are available. From tests, it was seen that while using the averaged data was somewhat useful, the latency in regulating builds was too high. By taking the difference between the current pressure and the pressure seen in the previous second, better regulation occurs. Using a shorter time period is appealing but due to fluctations in pressure, comparing the current pressure to 1 second ago achieves a reasonable compromise. One can look at the buildstats logs, that usually sample once per second, to decide a sensible threshold. If the thresholds aren't specified, pressure is not monitored and hence there is no impact on build times. Arbitary lower limit of 1.0 results in a fatal error to avoid extremely long builds. If the limits are higher than 1,000,000, then warnings are issued to inform users that the specified limit is very high and unlikely to result in any regulation. The current bitbake scheduling algorithm requires that at least one task be active. This means that if high pressure is seen, then new tasks will not be started and pressure will be checked only for as long as at least one task is active. When there are no active tasks, an additional task will be started and pressure checking resumed. This behaviour means that if an external source is causing the pressure to exceed the threshold, bitbake will continue to make some progress towards the requested target. This violates the intent of limiting pressure but, given the current scheduling algorithm as described above, there seems to be no other option. In the case where only one bitbake build is running, the implications of the scheduler requirement will likely result in pressure being higher than the threshold. More work would be required to ensure that the pressure threshold is never exceeded, for example by adding pressure monitoring to make and ninja. (Bitbake rev: 502e05cbe67fb7a0e804dcc2cc0764a2e05c014f) Signed-off-by: Aryaman Gupta <aryaman.gupta@windriver.com> Signed-off-by: Randy Macleod <randy.macleod@windriver.com> Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
-rw-r--r--bitbake/lib/bb/runqueue.py65
1 files changed, 65 insertions, 0 deletions
diff --git a/bitbake/lib/bb/runqueue.py b/bitbake/lib/bb/runqueue.py
index 1e47fe70ef..359b503297 100644
--- a/bitbake/lib/bb/runqueue.py
+++ b/bitbake/lib/bb/runqueue.py
@@ -24,6 +24,7 @@ import pickle
24from multiprocessing import Process 24from multiprocessing import Process
25import shlex 25import shlex
26import pprint 26import pprint
27import time
27 28
28bblogger = logging.getLogger("BitBake") 29bblogger = logging.getLogger("BitBake")
29logger = logging.getLogger("BitBake.RunQueue") 30logger = logging.getLogger("BitBake.RunQueue")
@@ -159,6 +160,46 @@ class RunQueueScheduler(object):
159 self.buildable.append(tid) 160 self.buildable.append(tid)
160 161
161 self.rev_prio_map = None 162 self.rev_prio_map = None
163 self.is_pressure_usable()
164
165 def is_pressure_usable(self):
166 """
167 If monitoring pressure, return True if pressure files can be open and read. For example
168 openSUSE /proc/pressure/* files have readable file permissions but when read the error EOPNOTSUPP (Operation not supported)
169 is returned.
170 """
171 if self.rq.max_cpu_pressure or self.rq.max_io_pressure:
172 try:
173 with open("/proc/pressure/cpu") as cpu_pressure_fds, open("/proc/pressure/io") as io_pressure_fds:
174 self.prev_cpu_pressure = cpu_pressure_fds.readline().split()[4].split("=")[1]
175 self.prev_io_pressure = io_pressure_fds.readline().split()[4].split("=")[1]
176 self.prev_pressure_time = time.time()
177 self.check_pressure = True
178 except:
179 bb.warn("The /proc/pressure files can't be read. Continuing build without monitoring pressure")
180 self.check_pressure = False
181 else:
182 self.check_pressure = False
183
184 def exceeds_max_pressure(self):
185 """
186 Monitor the difference in total pressure at least once per second, if
187 BB_PRESSURE_MAX_{CPU|IO} are set, return True if above threshold.
188 """
189 if self.check_pressure:
190 with open("/proc/pressure/cpu") as cpu_pressure_fds, open("/proc/pressure/io") as io_pressure_fds:
191 # extract "total" from /proc/pressure/{cpu|io}
192 curr_cpu_pressure = cpu_pressure_fds.readline().split()[4].split("=")[1]
193 curr_io_pressure = io_pressure_fds.readline().split()[4].split("=")[1]
194 exceeds_cpu_pressure = self.rq.max_cpu_pressure and (float(curr_cpu_pressure) - float(self.prev_cpu_pressure)) > self.rq.max_cpu_pressure
195 exceeds_io_pressure = self.rq.max_io_pressure and (float(curr_io_pressure) - float(self.prev_io_pressure)) > self.rq.max_io_pressure
196 now = time.time()
197 if now - self.prev_pressure_time > 1.0:
198 self.prev_cpu_pressure = curr_cpu_pressure
199 self.prev_io_pressure = curr_io_pressure
200 self.prev_pressure_time = now
201 return (exceeds_cpu_pressure or exceeds_io_pressure)
202 return False
162 203
163 def next_buildable_task(self): 204 def next_buildable_task(self):
164 """ 205 """
@@ -172,6 +213,12 @@ class RunQueueScheduler(object):
172 if not buildable: 213 if not buildable:
173 return None 214 return None
174 215
216 # Bitbake requires that at least one task be active. Only check for pressure if
217 # this is the case, otherwise the pressure limitation could result in no tasks
218 # being active and no new tasks started thereby, at times, breaking the scheduler.
219 if self.rq.stats.active and self.exceeds_max_pressure():
220 return None
221
175 # Filter out tasks that have a max number of threads that have been exceeded 222 # Filter out tasks that have a max number of threads that have been exceeded
176 skip_buildable = {} 223 skip_buildable = {}
177 for running in self.rq.runq_running.difference(self.rq.runq_complete): 224 for running in self.rq.runq_running.difference(self.rq.runq_complete):
@@ -1699,6 +1746,8 @@ class RunQueueExecute:
1699 1746
1700 self.number_tasks = int(self.cfgData.getVar("BB_NUMBER_THREADS") or 1) 1747 self.number_tasks = int(self.cfgData.getVar("BB_NUMBER_THREADS") or 1)
1701 self.scheduler = self.cfgData.getVar("BB_SCHEDULER") or "speed" 1748 self.scheduler = self.cfgData.getVar("BB_SCHEDULER") or "speed"
1749 self.max_cpu_pressure = self.cfgData.getVar("BB_PRESSURE_MAX_CPU")
1750 self.max_io_pressure = self.cfgData.getVar("BB_PRESSURE_MAX_IO")
1702 1751
1703 self.sq_buildable = set() 1752 self.sq_buildable = set()
1704 self.sq_running = set() 1753 self.sq_running = set()
@@ -1733,6 +1782,22 @@ class RunQueueExecute:
1733 if self.number_tasks <= 0: 1782 if self.number_tasks <= 0:
1734 bb.fatal("Invalid BB_NUMBER_THREADS %s" % self.number_tasks) 1783 bb.fatal("Invalid BB_NUMBER_THREADS %s" % self.number_tasks)
1735 1784
1785 lower_limit = 1.0
1786 upper_limit = 1000000.0
1787 if self.max_cpu_pressure:
1788 self.max_cpu_pressure = float(self.max_cpu_pressure)
1789 if self.max_cpu_pressure < lower_limit:
1790 bb.fatal("Invalid BB_PRESSURE_MAX_CPU %s, minimum value is %s." % (self.max_cpu_pressure, lower_limit))
1791 if self.max_cpu_pressure > upper_limit:
1792 bb.warn("Your build will be largely unregulated since BB_PRESSURE_MAX_CPU is set to %s. It is very unlikely that such high pressure will be experienced." % (self.max_cpu_pressure))
1793
1794 if self.max_io_pressure:
1795 self.max_io_pressure = float(self.max_io_pressure)
1796 if self.max_io_pressure < lower_limit:
1797 bb.fatal("Invalid BB_PRESSURE_MAX_IO %s, minimum value is %s." % (self.max_io_pressure, lower_limit))
1798 if self.max_io_pressure > upper_limit:
1799 bb.warn("Your build will be largely unregulated since BB_PRESSURE_MAX_IO is set to %s. It is very unlikely that such high pressure will be experienced." % (self.max_io_pressure))
1800
1736 # List of setscene tasks which we've covered 1801 # List of setscene tasks which we've covered
1737 self.scenequeue_covered = set() 1802 self.scenequeue_covered = set()
1738 # List of tasks which are covered (including setscene ones) 1803 # List of tasks which are covered (including setscene ones)