diff options
author | Patrick Ohly <patrick.ohly@intel.com> | 2016-11-30 10:50:10 +0100 |
---|---|---|
committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2016-12-07 10:37:59 +0000 |
commit | f1a527c5f6f0add2c7f209ffb69650733f692e81 (patch) | |
tree | 58bb89bda00b059a2355333e6c0adf3c7ccfadc3 /meta/lib | |
parent | 820c042b3603b4ab2a1e439c765085797859efe9 (diff) | |
download | poky-f1a527c5f6f0add2c7f209ffb69650733f692e81.tar.gz |
buildstats: reduce amount of data stored for system utilization
Pre-processing /proc data during the build considerably reduces the
amount of data written to disk: 176KB instead of 4.7MB for a 20
minuted build. Parsing also becomes faster.
The disk monitor log added another 16KB in that example build. The
overall buildstat was 20MB, so the overhead for monitoring system
utilization is small enough that it can be enabled by default.
(From OE-Core rev: b17812385cd55e81066d3ceda92dffdc6e5564da)
Signed-off-by: Patrick Ohly <patrick.ohly@intel.com>
Signed-off-by: Ross Burton <ross.burton@intel.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'meta/lib')
-rw-r--r-- | meta/lib/buildstats.py | 114 |
1 files changed, 104 insertions, 10 deletions
diff --git a/meta/lib/buildstats.py b/meta/lib/buildstats.py index 7c8b3521a7..9e5b6be020 100644 --- a/meta/lib/buildstats.py +++ b/meta/lib/buildstats.py | |||
@@ -3,6 +3,7 @@ | |||
3 | # like open log files and the time of the last sampling. | 3 | # like open log files and the time of the last sampling. |
4 | 4 | ||
5 | import time | 5 | import time |
6 | import re | ||
6 | import bb.event | 7 | import bb.event |
7 | 8 | ||
8 | class SystemStats: | 9 | class SystemStats: |
@@ -12,14 +13,18 @@ class SystemStats: | |||
12 | bb.utils.mkdirhier(bsdir) | 13 | bb.utils.mkdirhier(bsdir) |
13 | 14 | ||
14 | self.proc_files = [] | 15 | self.proc_files = [] |
15 | for filename in ('diskstats', 'meminfo', 'stat'): | 16 | for filename, handler in ( |
17 | ('diskstats', self._reduce_diskstats), | ||
18 | ('meminfo', self._reduce_meminfo), | ||
19 | ('stat', self._reduce_stat), | ||
20 | ): | ||
16 | # In practice, this class gets instantiated only once in | 21 | # In practice, this class gets instantiated only once in |
17 | # the bitbake cooker process. Therefore 'append' mode is | 22 | # the bitbake cooker process. Therefore 'append' mode is |
18 | # not strictly necessary, but using it makes the class | 23 | # not strictly necessary, but using it makes the class |
19 | # more robust should two processes ever write | 24 | # more robust should two processes ever write |
20 | # concurrently. | 25 | # concurrently. |
21 | self.proc_files.append((filename, | 26 | destfile = os.path.join(bsdir, '%sproc_%s.log' % ('reduced_' if handler else '', filename)) |
22 | open(os.path.join(bsdir, 'proc_%s.log' % filename), 'ab'))) | 27 | self.proc_files.append((filename, open(destfile, 'ab'), handler)) |
23 | self.monitor_disk = open(os.path.join(bsdir, 'monitor_disk.log'), 'ab') | 28 | self.monitor_disk = open(os.path.join(bsdir, 'monitor_disk.log'), 'ab') |
24 | # Last time that we sampled /proc data resp. recorded disk monitoring data. | 29 | # Last time that we sampled /proc data resp. recorded disk monitoring data. |
25 | self.last_proc = 0 | 30 | self.last_proc = 0 |
@@ -30,23 +35,112 @@ class SystemStats: | |||
30 | # depends on the heartbeat event, which fires less often. | 35 | # depends on the heartbeat event, which fires less often. |
31 | self.min_seconds = 1 | 36 | self.min_seconds = 1 |
32 | 37 | ||
38 | self.meminfo_regex = re.compile(b'^(MemTotal|MemFree|Buffers|Cached|SwapTotal|SwapFree):\s*(\d+)') | ||
39 | self.diskstats_regex = re.compile(b'^([hsv]d.|mtdblock\d|mmcblk\d|cciss/c\d+d\d+.*)$') | ||
40 | self.diskstats_ltime = None | ||
41 | self.diskstats_data = None | ||
42 | self.stat_ltimes = None | ||
43 | |||
33 | def close(self): | 44 | def close(self): |
34 | self.monitor_disk.close() | 45 | self.monitor_disk.close() |
35 | for _, output, _ in self.proc_files: | 46 | for _, output, _ in self.proc_files: |
36 | output.close() | 47 | output.close() |
37 | 48 | ||
49 | def _reduce_meminfo(self, time, data): | ||
50 | """ | ||
51 | Extracts 'MemTotal', 'MemFree', 'Buffers', 'Cached', 'SwapTotal', 'SwapFree' | ||
52 | and writes their values into a single line, in that order. | ||
53 | """ | ||
54 | values = {} | ||
55 | for line in data.split(b'\n'): | ||
56 | m = self.meminfo_regex.match(line) | ||
57 | if m: | ||
58 | values[m.group(1)] = m.group(2) | ||
59 | if len(values) == 6: | ||
60 | return (time, | ||
61 | b' '.join([values[x] for x in | ||
62 | (b'MemTotal', b'MemFree', b'Buffers', b'Cached', b'SwapTotal', b'SwapFree')]) + b'\n') | ||
63 | |||
64 | def _diskstats_is_relevant_line(self, linetokens): | ||
65 | if len(linetokens) != 14: | ||
66 | return False | ||
67 | disk = linetokens[2] | ||
68 | return self.diskstats_regex.match(disk) | ||
69 | |||
70 | def _reduce_diskstats(self, time, data): | ||
71 | relevant_tokens = filter(self._diskstats_is_relevant_line, map(lambda x: x.split(), data.split(b'\n'))) | ||
72 | diskdata = [0] * 3 | ||
73 | reduced = None | ||
74 | for tokens in relevant_tokens: | ||
75 | # rsect | ||
76 | diskdata[0] += int(tokens[5]) | ||
77 | # wsect | ||
78 | diskdata[1] += int(tokens[9]) | ||
79 | # use | ||
80 | diskdata[2] += int(tokens[12]) | ||
81 | if self.diskstats_ltime: | ||
82 | # We need to compute information about the time interval | ||
83 | # since the last sampling and record the result as sample | ||
84 | # for that point in the past. | ||
85 | interval = time - self.diskstats_ltime | ||
86 | if interval > 0: | ||
87 | sums = [ a - b for a, b in zip(diskdata, self.diskstats_data) ] | ||
88 | readTput = sums[0] / 2.0 * 100.0 / interval | ||
89 | writeTput = sums[1] / 2.0 * 100.0 / interval | ||
90 | util = float( sums[2] ) / 10 / interval | ||
91 | util = max(0.0, min(1.0, util)) | ||
92 | reduced = (self.diskstats_ltime, (readTput, writeTput, util)) | ||
93 | |||
94 | self.diskstats_ltime = time | ||
95 | self.diskstats_data = diskdata | ||
96 | return reduced | ||
97 | |||
98 | |||
99 | def _reduce_nop(self, time, data): | ||
100 | return (time, data) | ||
101 | |||
102 | def _reduce_stat(self, time, data): | ||
103 | if not data: | ||
104 | return None | ||
105 | # CPU times {user, nice, system, idle, io_wait, irq, softirq} from first line | ||
106 | tokens = data.split(b'\n', 1)[0].split() | ||
107 | times = [ int(token) for token in tokens[1:] ] | ||
108 | reduced = None | ||
109 | if self.stat_ltimes: | ||
110 | user = float((times[0] + times[1]) - (self.stat_ltimes[0] + self.stat_ltimes[1])) | ||
111 | system = float((times[2] + times[5] + times[6]) - (self.stat_ltimes[2] + self.stat_ltimes[5] + self.stat_ltimes[6])) | ||
112 | idle = float(times[3] - self.stat_ltimes[3]) | ||
113 | iowait = float(times[4] - self.stat_ltimes[4]) | ||
114 | |||
115 | aSum = max(user + system + idle + iowait, 1) | ||
116 | reduced = (time, (user/aSum, system/aSum, iowait/aSum)) | ||
117 | |||
118 | self.stat_ltimes = times | ||
119 | return reduced | ||
120 | |||
38 | def sample(self, event, force): | 121 | def sample(self, event, force): |
39 | now = time.time() | 122 | now = time.time() |
40 | if (now - self.last_proc > self.min_seconds) or force: | 123 | if (now - self.last_proc > self.min_seconds) or force: |
41 | for filename, output in self.proc_files: | 124 | for filename, output, handler in self.proc_files: |
42 | with open(os.path.join('/proc', filename), 'rb') as input: | 125 | with open(os.path.join('/proc', filename), 'rb') as input: |
43 | data = input.read() | 126 | data = input.read() |
44 | # Unbuffered raw write, less overhead and useful | 127 | if handler: |
45 | # in case that we end up with concurrent writes. | 128 | reduced = handler(now, data) |
46 | os.write(output.fileno(), | 129 | else: |
47 | ('%.0f\n' % now).encode('ascii') + | 130 | reduced = (now, data) |
48 | data + | 131 | if reduced: |
49 | b'\n') | 132 | if isinstance(reduced[1], bytes): |
133 | # Use as it is. | ||
134 | data = reduced[1] | ||
135 | else: | ||
136 | # Convert to a single line. | ||
137 | data = (' '.join([str(x) for x in reduced[1]]) + '\n').encode('ascii') | ||
138 | # Unbuffered raw write, less overhead and useful | ||
139 | # in case that we end up with concurrent writes. | ||
140 | os.write(output.fileno(), | ||
141 | ('%.0f\n' % reduced[0]).encode('ascii') + | ||
142 | data + | ||
143 | b'\n') | ||
50 | self.last_proc = now | 144 | self.last_proc = now |
51 | 145 | ||
52 | if isinstance(event, bb.event.MonitorDiskEvent) and \ | 146 | if isinstance(event, bb.event.MonitorDiskEvent) and \ |