buildstats: reduce amount of data stored for system utilization

Pre-processing /proc data during the build considerably reduces the amount of data written to disk: 176KB instead of 4.7MB for a 20 minuted build. Parsing also becomes faster. The disk monitor log added another 16KB in that example build. The overall buildstat was 20MB, so the overhead for monitoring system utilization is small enough that it can be enabled by default. (From OE-Core rev: b17812385cd55e81066d3ceda92dffdc6e5564da) Signed-off-by: Patrick Ohly <patrick.ohly@intel.com> Signed-off-by: Ross Burton <ross.burton@intel.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
author: Patrick Ohly <patrick.ohly@intel.com> 2016-11-30 10:50:10 +0100
committer: Richard Purdie <richard.purdie@linuxfoundation.org> 2016-12-07 10:37:59 +0000
commit: f1a527c5f6f0add2c7f209ffb69650733f692e81 (patch)
tree: 58bb89bda00b059a2355333e6c0adf3c7ccfadc3 /meta/lib
parent: 820c042b3603b4ab2a1e439c765085797859efe9 (diff)
download: poky-f1a527c5f6f0add2c7f209ffb69650733f692e81.tar.gz
1 files changed, 104 insertions, 10 deletions
diff --git a/meta/lib/buildstats.py b/meta/lib/buildstats.py
index 7c8b3521a7..9e5b6be020 100644
--- a/meta/lib/buildstats.py
+++ b/meta/lib/buildstats.py
@@ -3,6 +3,7 @@
 # like open log files and the time of the last sampling.
 import time
+import re
 import bb.event
 class SystemStats:
@@ -12,14 +13,18 @@ class SystemStats:
        bb.utils.mkdirhier(bsdir)
        self.proc_files = []
-        for filename in ('diskstats', 'meminfo', 'stat'):
+        for filename, handler in (
+                ('diskstats', self._reduce_diskstats),
+                ('meminfo', self._reduce_meminfo),
+                ('stat', self._reduce_stat),
+        ):
            # In practice, this class gets instantiated only once in
            # the bitbake cooker process.  Therefore 'append' mode is
            # not strictly necessary, but using it makes the class
            # more robust should two processes ever write
            # concurrently.
-            self.proc_files.append((filename,
+            destfile = os.path.join(bsdir, '%sproc_%s.log' % ('reduced_' if handler else '', filename))
-                                    open(os.path.join(bsdir, 'proc_%s.log' % filename), 'ab')))
+            self.proc_files.append((filename, open(destfile, 'ab'), handler))
        self.monitor_disk = open(os.path.join(bsdir, 'monitor_disk.log'), 'ab')
        # Last time that we sampled /proc data resp. recorded disk monitoring data.
        self.last_proc = 0
@@ -30,23 +35,112 @@ class SystemStats:
        # depends on the heartbeat event, which fires less often.
        self.min_seconds = 1
+        self.meminfo_regex = re.compile(b'^(MemTotal|MemFree|Buffers|Cached|SwapTotal|SwapFree):\s*(\d+)')
+        self.diskstats_regex = re.compile(b'^([hsv]d.|mtdblock\d|mmcblk\d|cciss/c\d+d\d+.*)$')
+        self.diskstats_ltime = None
+        self.diskstats_data = None
+        self.stat_ltimes = None
    def close(self):
        self.monitor_disk.close()
        for _, output, _ in self.proc_files:
            output.close()
+    def _reduce_meminfo(self, time, data):
+        """
+        Extracts 'MemTotal', 'MemFree', 'Buffers', 'Cached', 'SwapTotal', 'SwapFree'
+        and writes their values into a single line, in that order.
+        """
+        values = {}
+        for line in data.split(b'\n'):
+            m = self.meminfo_regex.match(line)
+            if m:
+                values[m.group(1)] = m.group(2)
+        if len(values) == 6:
+            return (time,
+                    b' '.join([values[x] for x in
+                               (b'MemTotal', b'MemFree', b'Buffers', b'Cached', b'SwapTotal', b'SwapFree')]) + b'\n')
+    def _diskstats_is_relevant_line(self, linetokens):
+        if len(linetokens) != 14:
+            return False
+        disk = linetokens[2]
+        return self.diskstats_regex.match(disk)
+    def _reduce_diskstats(self, time, data):
+        relevant_tokens = filter(self._diskstats_is_relevant_line, map(lambda x: x.split(), data.split(b'\n')))
+        diskdata = [0] * 3
+        reduced = None
+        for tokens in relevant_tokens:
+            # rsect
+            diskdata[0] += int(tokens[5])
+            # wsect
+            diskdata[1] += int(tokens[9])
+            # use
+            diskdata[2] += int(tokens[12])
+        if self.diskstats_ltime:
+            # We need to compute information about the time interval
+            # since the last sampling and record the result as sample
+            # for that point in the past.
+            interval = time - self.diskstats_ltime
+            if interval > 0:
+                sums = [ a - b for a, b in zip(diskdata, self.diskstats_data) ]
+                readTput = sums[0] / 2.0 * 100.0 / interval
+                writeTput = sums[1] / 2.0 * 100.0 / interval
+                util = float( sums[2] ) / 10 / interval
+                util = max(0.0, min(1.0, util))
+                reduced = (self.diskstats_ltime, (readTput, writeTput, util))
+        self.diskstats_ltime = time
+        self.diskstats_data = diskdata
+        return reduced
+    def _reduce_nop(self, time, data):
+        return (time, data)
+    def _reduce_stat(self, time, data):
+        if not data:
+            return None
+        # CPU times {user, nice, system, idle, io_wait, irq, softirq} from first line
+        tokens = data.split(b'\n', 1)[0].split()
+        times = [ int(token) for token in tokens[1:] ]
+        reduced = None
+        if self.stat_ltimes:
+            user = float((times[0] + times[1]) - (self.stat_ltimes[0] + self.stat_ltimes[1]))
+            system = float((times[2] + times[5] + times[6]) - (self.stat_ltimes[2] + self.stat_ltimes[5] + self.stat_ltimes[6]))
+            idle = float(times[3] - self.stat_ltimes[3])
+            iowait = float(times[4] - self.stat_ltimes[4])
+            aSum = max(user + system + idle + iowait, 1)
+            reduced = (time, (user/aSum, system/aSum, iowait/aSum))
+        self.stat_ltimes = times
+        return reduced
    def sample(self, event, force):
        now = time.time()
        if (now - self.last_proc > self.min_seconds) or force:
-            for filename, output in self.proc_files:
+            for filename, output, handler in self.proc_files:
                with open(os.path.join('/proc', filename), 'rb') as input:
                    data = input.read()
-                    # Unbuffered raw write, less overhead and useful
+                    if handler:
-                    # in case that we end up with concurrent writes.
+                        reduced = handler(now, data)
-                    os.write(output.fileno(),
+                    else:
-                             ('%.0f\n' % now).encode('ascii') +
+                        reduced = (now, data)
-                             data +
+                    if reduced:
-                             b'\n')
+                        if isinstance(reduced[1], bytes):
+                            # Use as it is.
+                            data = reduced[1]
+                        else:
+                            # Convert to a single line.
+                            data = (' '.join([str(x) for x in reduced[1]]) + '\n').encode('ascii')
+                        # Unbuffered raw write, less overhead and useful
+                        # in case that we end up with concurrent writes.
+                        os.write(output.fileno(),
+                                 ('%.0f\n' % reduced[0]).encode('ascii') +
+                                 data +
+                                 b'\n')
            self.last_proc = now
        if isinstance(event, bb.event.MonitorDiskEvent) and \
author	Patrick Ohly <patrick.ohly@intel.com>	2016-11-30 10:50:10 +0100
committer	Richard Purdie <richard.purdie@linuxfoundation.org>	2016-12-07 10:37:59 +0000
commit	f1a527c5f6f0add2c7f209ffb69650733f692e81 (patch)
tree	58bb89bda00b059a2355333e6c0adf3c7ccfadc3 /meta/lib
parent	820c042b3603b4ab2a1e439c765085797859efe9 (diff)
download	poky-f1a527c5f6f0add2c7f209ffb69650733f692e81.tar.gz

diff --git a/meta/lib/buildstats.py b/meta/lib/buildstats.py index 7c8b3521a7..9e5b6be020 100644 --- a/meta/lib/buildstats.py +++ b/meta/lib/buildstats.py
@@ -3,6 +3,7 @@
3	# like open log files and the time of the last sampling.	3	# like open log files and the time of the last sampling.
4		4
5	import time	5	import time
		6	import re
6	import bb.event	7	import bb.event
7		8
8	class SystemStats:	9	class SystemStats:
@@ -12,14 +13,18 @@ class SystemStats:
12	bb.utils.mkdirhier(bsdir)	13	bb.utils.mkdirhier(bsdir)
13		14
14	self.proc_files = []	15	self.proc_files = []
15	for filename in ('diskstats', 'meminfo', 'stat'):	16	for filename, handler in (
		17	('diskstats', self._reduce_diskstats),
		18	('meminfo', self._reduce_meminfo),
		19	('stat', self._reduce_stat),
		20	):
16	# In practice, this class gets instantiated only once in	21	# In practice, this class gets instantiated only once in
17	# the bitbake cooker process. Therefore 'append' mode is	22	# the bitbake cooker process. Therefore 'append' mode is
18	# not strictly necessary, but using it makes the class	23	# not strictly necessary, but using it makes the class
19	# more robust should two processes ever write	24	# more robust should two processes ever write
20	# concurrently.	25	# concurrently.
21	self.proc_files.append((filename,	26	destfile = os.path.join(bsdir, '%sproc_%s.log' % ('reduced_' if handler else '', filename))
22	open(os.path.join(bsdir, 'proc_%s.log' % filename), 'ab')))	27	self.proc_files.append((filename, open(destfile, 'ab'), handler))
23	self.monitor_disk = open(os.path.join(bsdir, 'monitor_disk.log'), 'ab')	28	self.monitor_disk = open(os.path.join(bsdir, 'monitor_disk.log'), 'ab')
24	# Last time that we sampled /proc data resp. recorded disk monitoring data.	29	# Last time that we sampled /proc data resp. recorded disk monitoring data.
25	self.last_proc = 0	30	self.last_proc = 0
@@ -30,23 +35,112 @@ class SystemStats:
30	# depends on the heartbeat event, which fires less often.	35	# depends on the heartbeat event, which fires less often.
31	self.min_seconds = 1	36	self.min_seconds = 1
32		37
		38	self.meminfo_regex = re.compile(b'^(MemTotal\|MemFree\|Buffers\|Cached\|SwapTotal\|SwapFree):\s*(\d+)')
		39	self.diskstats_regex = re.compile(b'^([hsv]d.\|mtdblock\d\|mmcblk\d\|cciss/c\d+d\d+.*)$')
		40	self.diskstats_ltime = None
		41	self.diskstats_data = None
		42	self.stat_ltimes = None
		43
33	def close(self):	44	def close(self):
34	self.monitor_disk.close()	45	self.monitor_disk.close()
35	for _, output, _ in self.proc_files:	46	for _, output, _ in self.proc_files:
36	output.close()	47	output.close()
37		48
		49	def _reduce_meminfo(self, time, data):
		50	"""
		51	Extracts 'MemTotal', 'MemFree', 'Buffers', 'Cached', 'SwapTotal', 'SwapFree'
		52	and writes their values into a single line, in that order.
		53	"""
		54	values = {}
		55	for line in data.split(b'\n'):
		56	m = self.meminfo_regex.match(line)
		57	if m:
		58	values[m.group(1)] = m.group(2)
		59	if len(values) == 6:
		60	return (time,
		61	b' '.join([values[x] for x in
		62	(b'MemTotal', b'MemFree', b'Buffers', b'Cached', b'SwapTotal', b'SwapFree')]) + b'\n')
		63
		64	def _diskstats_is_relevant_line(self, linetokens):
		65	if len(linetokens) != 14:
		66	return False
		67	disk = linetokens[2]
		68	return self.diskstats_regex.match(disk)
		69
		70	def _reduce_diskstats(self, time, data):
		71	relevant_tokens = filter(self._diskstats_is_relevant_line, map(lambda x: x.split(), data.split(b'\n')))
		72	diskdata = [0] * 3
		73	reduced = None
		74	for tokens in relevant_tokens:
		75	# rsect
		76	diskdata[0] += int(tokens[5])
		77	# wsect
		78	diskdata[1] += int(tokens[9])
		79	# use
		80	diskdata[2] += int(tokens[12])
		81	if self.diskstats_ltime:
		82	# We need to compute information about the time interval
		83	# since the last sampling and record the result as sample
		84	# for that point in the past.
		85	interval = time - self.diskstats_ltime
		86	if interval > 0:
		87	sums = [ a - b for a, b in zip(diskdata, self.diskstats_data) ]
		88	readTput = sums[0] / 2.0 * 100.0 / interval
		89	writeTput = sums[1] / 2.0 * 100.0 / interval
		90	util = float( sums[2] ) / 10 / interval
		91	util = max(0.0, min(1.0, util))
		92	reduced = (self.diskstats_ltime, (readTput, writeTput, util))
		93
		94	self.diskstats_ltime = time
		95	self.diskstats_data = diskdata
		96	return reduced
		97
		98
		99	def _reduce_nop(self, time, data):
		100	return (time, data)
		101
		102	def _reduce_stat(self, time, data):
		103	if not data:
		104	return None
		105	# CPU times {user, nice, system, idle, io_wait, irq, softirq} from first line
		106	tokens = data.split(b'\n', 1)[0].split()
		107	times = [ int(token) for token in tokens[1:] ]
		108	reduced = None
		109	if self.stat_ltimes:
		110	user = float((times[0] + times[1]) - (self.stat_ltimes[0] + self.stat_ltimes[1]))
		111	system = float((times[2] + times[5] + times[6]) - (self.stat_ltimes[2] + self.stat_ltimes[5] + self.stat_ltimes[6]))
		112	idle = float(times[3] - self.stat_ltimes[3])
		113	iowait = float(times[4] - self.stat_ltimes[4])
		114
		115	aSum = max(user + system + idle + iowait, 1)
		116	reduced = (time, (user/aSum, system/aSum, iowait/aSum))
		117
		118	self.stat_ltimes = times
		119	return reduced
		120
38	def sample(self, event, force):	121	def sample(self, event, force):
39	now = time.time()	122	now = time.time()
40	if (now - self.last_proc > self.min_seconds) or force:	123	if (now - self.last_proc > self.min_seconds) or force:
41	for filename, output in self.proc_files:	124	for filename, output, handler in self.proc_files:
42	with open(os.path.join('/proc', filename), 'rb') as input:	125	with open(os.path.join('/proc', filename), 'rb') as input:
43	data = input.read()	126	data = input.read()
44	# Unbuffered raw write, less overhead and useful	127	if handler:
45	# in case that we end up with concurrent writes.	128	reduced = handler(now, data)
46	os.write(output.fileno(),	129	else:
47	('%.0f\n' % now).encode('ascii') +	130	reduced = (now, data)
48	data +	131	if reduced:
49	b'\n')	132	if isinstance(reduced[1], bytes):
		133	# Use as it is.
		134	data = reduced[1]
		135	else:
		136	# Convert to a single line.
		137	data = (' '.join([str(x) for x in reduced[1]]) + '\n').encode('ascii')
		138	# Unbuffered raw write, less overhead and useful
		139	# in case that we end up with concurrent writes.
		140	os.write(output.fileno(),
		141	('%.0f\n' % reduced[0]).encode('ascii') +
		142	data +
		143	b'\n')
50	self.last_proc = now	144	self.last_proc = now
51		145
52	if isinstance(event, bb.event.MonitorDiskEvent) and \	146	if isinstance(event, bb.event.MonitorDiskEvent) and \