summaryrefslogtreecommitdiffstats
path: root/bitbake
diff options
context:
space:
mode:
authorAryaman Gupta <aryaman.gupta@windriver.com>2022-08-16 15:13:43 -0400
committerRichard Purdie <richard.purdie@linuxfoundation.org>2022-08-23 15:57:11 +0100
commiteaf8d5efa0c2472dbaf0df962c0e516191bde1a5 (patch)
tree3603cb3caa8da3122d67d363054cbf4ed89ac3be /bitbake
parent4aad5914efe9789755789856882aac53de6c4ed3 (diff)
downloadpoky-eaf8d5efa0c2472dbaf0df962c0e516191bde1a5.tar.gz
bitbake: bitbake: runqueue: add cpu/io pressure regulation
Prevent the scheduler from starting new tasks if the current cpu or io pressure is above a certain threshold and there is at least one active task. This threshold can be specified through the "BB_PRESSURE_MAX_{CPU|IO}" variables in conf/local.conf. The threshold represents the difference in "total" pressure from the previous second. The pressure data is discussed in this oe-core commit: 061931520b buildstats.py: enable collection of /proc/pressure data where one can see that the average and "total" values are available. >From tests, it was seen that while using the averaged data was somewhat useful, the latency in regulating builds was too high. By taking the difference between the current pressure and the pressure seen in the previous second, better regulation occurs. Using a shorter time period is appealing but due to fluctations in pressure, comparing the current pressure to 1 second ago achieves a reasonable compromise. One can look at the buildstats logs, that usually sample once per second, to decide a sensible threshold. If the thresholds aren't specified, pressure is not monitored and hence there is no impact on build times. Arbitary lower limit of 1.0 results in a fatal error to avoid extremely long builds. If the limits are higher than 1,000,000, then warnings are issued to inform users that the specified limit is very high and unlikely to result in any regulation. The current bitbake scheduling algorithm requires that at least one task be active. This means that if high pressure is seen, then new tasks will not be started and pressure will be checked only for as long as at least one task is active. When there are no active tasks, an additional task will be started and pressure checking resumed. This behaviour means that if an external source is causing the pressure to exceed the threshold, bitbake will continue to make some progress towards the requested target. This violates the intent of limiting pressure but, given the current scheduling algorithm as described above, there seems to be no other option. In the case where only one bitbake build is running, the implications of the scheduler requirement will likely result in pressure being higher than the threshold. More work would be required to ensure that the pressure threshold is never exceeded, for example by adding pressure monitoring to make and ninja. (Bitbake rev: 502e05cbe67fb7a0e804dcc2cc0764a2e05c014f) (Bitbake rev: 66741d216e9d4343e82a94f00cd39751632a5b96) Signed-off-by: Aryaman Gupta <aryaman.gupta@windriver.com> Signed-off-by: Randy Macleod <randy.macleod@windriver.com> Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'bitbake')
-rw-r--r--bitbake/lib/bb/runqueue.py65
1 files changed, 65 insertions, 0 deletions
diff --git a/bitbake/lib/bb/runqueue.py b/bitbake/lib/bb/runqueue.py
index a513b0983b..9aa99ef4a1 100644
--- a/bitbake/lib/bb/runqueue.py
+++ b/bitbake/lib/bb/runqueue.py
@@ -24,6 +24,7 @@ import pickle
24from multiprocessing import Process 24from multiprocessing import Process
25import shlex 25import shlex
26import pprint 26import pprint
27import time
27 28
28bblogger = logging.getLogger("BitBake") 29bblogger = logging.getLogger("BitBake")
29logger = logging.getLogger("BitBake.RunQueue") 30logger = logging.getLogger("BitBake.RunQueue")
@@ -142,6 +143,46 @@ class RunQueueScheduler(object):
142 self.buildable.append(tid) 143 self.buildable.append(tid)
143 144
144 self.rev_prio_map = None 145 self.rev_prio_map = None
146 self.is_pressure_usable()
147
148 def is_pressure_usable(self):
149 """
150 If monitoring pressure, return True if pressure files can be open and read. For example
151 openSUSE /proc/pressure/* files have readable file permissions but when read the error EOPNOTSUPP (Operation not supported)
152 is returned.
153 """
154 if self.rq.max_cpu_pressure or self.rq.max_io_pressure:
155 try:
156 with open("/proc/pressure/cpu") as cpu_pressure_fds, open("/proc/pressure/io") as io_pressure_fds:
157 self.prev_cpu_pressure = cpu_pressure_fds.readline().split()[4].split("=")[1]
158 self.prev_io_pressure = io_pressure_fds.readline().split()[4].split("=")[1]
159 self.prev_pressure_time = time.time()
160 self.check_pressure = True
161 except:
162 bb.warn("The /proc/pressure files can't be read. Continuing build without monitoring pressure")
163 self.check_pressure = False
164 else:
165 self.check_pressure = False
166
167 def exceeds_max_pressure(self):
168 """
169 Monitor the difference in total pressure at least once per second, if
170 BB_PRESSURE_MAX_{CPU|IO} are set, return True if above threshold.
171 """
172 if self.check_pressure:
173 with open("/proc/pressure/cpu") as cpu_pressure_fds, open("/proc/pressure/io") as io_pressure_fds:
174 # extract "total" from /proc/pressure/{cpu|io}
175 curr_cpu_pressure = cpu_pressure_fds.readline().split()[4].split("=")[1]
176 curr_io_pressure = io_pressure_fds.readline().split()[4].split("=")[1]
177 exceeds_cpu_pressure = self.rq.max_cpu_pressure and (float(curr_cpu_pressure) - float(self.prev_cpu_pressure)) > self.rq.max_cpu_pressure
178 exceeds_io_pressure = self.rq.max_io_pressure and (float(curr_io_pressure) - float(self.prev_io_pressure)) > self.rq.max_io_pressure
179 now = time.time()
180 if now - self.prev_pressure_time > 1.0:
181 self.prev_cpu_pressure = curr_cpu_pressure
182 self.prev_io_pressure = curr_io_pressure
183 self.prev_pressure_time = now
184 return (exceeds_cpu_pressure or exceeds_io_pressure)
185 return False
145 186
146 def next_buildable_task(self): 187 def next_buildable_task(self):
147 """ 188 """
@@ -155,6 +196,12 @@ class RunQueueScheduler(object):
155 if not buildable: 196 if not buildable:
156 return None 197 return None
157 198
199 # Bitbake requires that at least one task be active. Only check for pressure if
200 # this is the case, otherwise the pressure limitation could result in no tasks
201 # being active and no new tasks started thereby, at times, breaking the scheduler.
202 if self.rq.stats.active and self.exceeds_max_pressure():
203 return None
204
158 # Filter out tasks that have a max number of threads that have been exceeded 205 # Filter out tasks that have a max number of threads that have been exceeded
159 skip_buildable = {} 206 skip_buildable = {}
160 for running in self.rq.runq_running.difference(self.rq.runq_complete): 207 for running in self.rq.runq_running.difference(self.rq.runq_complete):
@@ -1700,6 +1747,8 @@ class RunQueueExecute:
1700 1747
1701 self.number_tasks = int(self.cfgData.getVar("BB_NUMBER_THREADS") or 1) 1748 self.number_tasks = int(self.cfgData.getVar("BB_NUMBER_THREADS") or 1)
1702 self.scheduler = self.cfgData.getVar("BB_SCHEDULER") or "speed" 1749 self.scheduler = self.cfgData.getVar("BB_SCHEDULER") or "speed"
1750 self.max_cpu_pressure = self.cfgData.getVar("BB_PRESSURE_MAX_CPU")
1751 self.max_io_pressure = self.cfgData.getVar("BB_PRESSURE_MAX_IO")
1703 1752
1704 self.sq_buildable = set() 1753 self.sq_buildable = set()
1705 self.sq_running = set() 1754 self.sq_running = set()
@@ -1735,6 +1784,22 @@ class RunQueueExecute:
1735 if self.number_tasks <= 0: 1784 if self.number_tasks <= 0:
1736 bb.fatal("Invalid BB_NUMBER_THREADS %s" % self.number_tasks) 1785 bb.fatal("Invalid BB_NUMBER_THREADS %s" % self.number_tasks)
1737 1786
1787 lower_limit = 1.0
1788 upper_limit = 1000000.0
1789 if self.max_cpu_pressure:
1790 self.max_cpu_pressure = float(self.max_cpu_pressure)
1791 if self.max_cpu_pressure < lower_limit:
1792 bb.fatal("Invalid BB_PRESSURE_MAX_CPU %s, minimum value is %s." % (self.max_cpu_pressure, lower_limit))
1793 if self.max_cpu_pressure > upper_limit:
1794 bb.warn("Your build will be largely unregulated since BB_PRESSURE_MAX_CPU is set to %s. It is very unlikely that such high pressure will be experienced." % (self.max_cpu_pressure))
1795
1796 if self.max_io_pressure:
1797 self.max_io_pressure = float(self.max_io_pressure)
1798 if self.max_io_pressure < lower_limit:
1799 bb.fatal("Invalid BB_PRESSURE_MAX_IO %s, minimum value is %s." % (self.max_io_pressure, lower_limit))
1800 if self.max_io_pressure > upper_limit:
1801 bb.warn("Your build will be largely unregulated since BB_PRESSURE_MAX_IO is set to %s. It is very unlikely that such high pressure will be experienced." % (self.max_io_pressure))
1802
1738 # List of setscene tasks which we've covered 1803 # List of setscene tasks which we've covered
1739 self.scenequeue_covered = set() 1804 self.scenequeue_covered = set()
1740 # List of tasks which are covered (including setscene ones) 1805 # List of tasks which are covered (including setscene ones)