diff options
Diffstat (limited to 'scripts/pybootchartgui/pybootchartgui/parsing.py')
-rw-r--r-- | scripts/pybootchartgui/pybootchartgui/parsing.py | 791 |
1 files changed, 628 insertions, 163 deletions
diff --git a/scripts/pybootchartgui/pybootchartgui/parsing.py b/scripts/pybootchartgui/pybootchartgui/parsing.py index 6343fd5a7b..c8a6a5721a 100644 --- a/scripts/pybootchartgui/pybootchartgui/parsing.py +++ b/scripts/pybootchartgui/pybootchartgui/parsing.py | |||
@@ -1,178 +1,638 @@ | |||
1 | # This file is part of pybootchartgui. | ||
2 | |||
3 | # pybootchartgui is free software: you can redistribute it and/or modify | ||
4 | # it under the terms of the GNU General Public License as published by | ||
5 | # the Free Software Foundation, either version 3 of the License, or | ||
6 | # (at your option) any later version. | ||
7 | |||
8 | # pybootchartgui is distributed in the hope that it will be useful, | ||
9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | # GNU General Public License for more details. | ||
12 | |||
13 | # You should have received a copy of the GNU General Public License | ||
14 | # along with pybootchartgui. If not, see <http://www.gnu.org/licenses/>. | ||
15 | |||
16 | |||
1 | from __future__ import with_statement | 17 | from __future__ import with_statement |
2 | 18 | ||
3 | import os | 19 | import os |
4 | import string | 20 | import string |
5 | import re | 21 | import re |
22 | import sys | ||
6 | import tarfile | 23 | import tarfile |
24 | from time import clock | ||
7 | from collections import defaultdict | 25 | from collections import defaultdict |
26 | from functools import reduce | ||
27 | |||
28 | from .samples import * | ||
29 | from .process_tree import ProcessTree | ||
30 | |||
31 | if sys.version_info >= (3, 0): | ||
32 | long = int | ||
33 | |||
34 | # Parsing produces as its end result a 'Trace' | ||
35 | |||
36 | class Trace: | ||
37 | def __init__(self, writer, paths, options): | ||
38 | self.processes = {} | ||
39 | self.start = {} | ||
40 | self.end = {} | ||
41 | self.headers = None | ||
42 | self.disk_stats = None | ||
43 | self.ps_stats = None | ||
44 | self.taskstats = None | ||
45 | self.cpu_stats = None | ||
46 | self.cmdline = None | ||
47 | self.kernel = None | ||
48 | self.kernel_tree = None | ||
49 | self.filename = None | ||
50 | self.parent_map = None | ||
51 | self.mem_stats = None | ||
52 | |||
53 | parse_paths (writer, self, paths, options.mintime) | ||
54 | if not self.valid(): | ||
55 | raise ParseError("empty state: '%s' does not contain a valid bootchart" % ", ".join(paths)) | ||
56 | |||
57 | return | ||
58 | |||
59 | # Turn that parsed information into something more useful | ||
60 | # link processes into a tree of pointers, calculate statistics | ||
61 | self.compile(writer) | ||
62 | |||
63 | # Crop the chart to the end of the first idle period after the given | ||
64 | # process | ||
65 | if options.crop_after: | ||
66 | idle = self.crop (writer, options.crop_after) | ||
67 | else: | ||
68 | idle = None | ||
69 | |||
70 | # Annotate other times as the first start point of given process lists | ||
71 | self.times = [ idle ] | ||
72 | if options.annotate: | ||
73 | for procnames in options.annotate: | ||
74 | names = [x[:15] for x in procnames.split(",")] | ||
75 | for proc in self.ps_stats.process_map.values(): | ||
76 | if proc.cmd in names: | ||
77 | self.times.append(proc.start_time) | ||
78 | break | ||
79 | else: | ||
80 | self.times.append(None) | ||
81 | |||
82 | self.proc_tree = ProcessTree(writer, self.kernel, self.ps_stats, | ||
83 | self.ps_stats.sample_period, | ||
84 | self.headers.get("profile.process"), | ||
85 | options.prune, idle, self.taskstats, | ||
86 | self.parent_map is not None) | ||
87 | |||
88 | if self.kernel is not None: | ||
89 | self.kernel_tree = ProcessTree(writer, self.kernel, None, 0, | ||
90 | self.headers.get("profile.process"), | ||
91 | False, None, None, True) | ||
92 | |||
93 | def valid(self): | ||
94 | return len(self.processes) != 0 | ||
95 | return self.headers != None and self.disk_stats != None and \ | ||
96 | self.ps_stats != None and self.cpu_stats != None | ||
97 | |||
98 | |||
99 | def compile(self, writer): | ||
100 | |||
101 | def find_parent_id_for(pid): | ||
102 | if pid is 0: | ||
103 | return 0 | ||
104 | ppid = self.parent_map.get(pid) | ||
105 | if ppid: | ||
106 | # many of these double forks are so short lived | ||
107 | # that we have no samples, or process info for them | ||
108 | # so climb the parent hierarcy to find one | ||
109 | if int (ppid * 1000) not in self.ps_stats.process_map: | ||
110 | # print "Pid '%d' short lived with no process" % ppid | ||
111 | ppid = find_parent_id_for (ppid) | ||
112 | # else: | ||
113 | # print "Pid '%d' has an entry" % ppid | ||
114 | else: | ||
115 | # print "Pid '%d' missing from pid map" % pid | ||
116 | return 0 | ||
117 | return ppid | ||
118 | |||
119 | # merge in the cmdline data | ||
120 | if self.cmdline is not None: | ||
121 | for proc in self.ps_stats.process_map.values(): | ||
122 | rpid = int (proc.pid // 1000) | ||
123 | if rpid in self.cmdline: | ||
124 | cmd = self.cmdline[rpid] | ||
125 | proc.exe = cmd['exe'] | ||
126 | proc.args = cmd['args'] | ||
127 | # else: | ||
128 | # print "proc %d '%s' not in cmdline" % (rpid, proc.exe) | ||
129 | |||
130 | # re-parent any stray orphans if we can | ||
131 | if self.parent_map is not None: | ||
132 | for process in self.ps_stats.process_map.values(): | ||
133 | ppid = find_parent_id_for (int(process.pid // 1000)) | ||
134 | if ppid: | ||
135 | process.ppid = ppid * 1000 | ||
136 | |||
137 | # stitch the tree together with pointers | ||
138 | for process in self.ps_stats.process_map.values(): | ||
139 | process.set_parent (self.ps_stats.process_map) | ||
140 | |||
141 | # count on fingers variously | ||
142 | for process in self.ps_stats.process_map.values(): | ||
143 | process.calc_stats (self.ps_stats.sample_period) | ||
144 | |||
145 | def crop(self, writer, crop_after): | ||
146 | |||
147 | def is_idle_at(util, start, j): | ||
148 | k = j + 1 | ||
149 | while k < len(util) and util[k][0] < start + 300: | ||
150 | k += 1 | ||
151 | k = min(k, len(util)-1) | ||
152 | |||
153 | if util[j][1] >= 0.25: | ||
154 | return False | ||
155 | |||
156 | avgload = sum(u[1] for u in util[j:k+1]) / (k-j+1) | ||
157 | if avgload < 0.25: | ||
158 | return True | ||
159 | else: | ||
160 | return False | ||
161 | def is_idle(util, start): | ||
162 | for j in range(0, len(util)): | ||
163 | if util[j][0] < start: | ||
164 | continue | ||
165 | return is_idle_at(util, start, j) | ||
166 | else: | ||
167 | return False | ||
168 | |||
169 | names = [x[:15] for x in crop_after.split(",")] | ||
170 | for proc in self.ps_stats.process_map.values(): | ||
171 | if proc.cmd in names or proc.exe in names: | ||
172 | writer.info("selected proc '%s' from list (start %d)" | ||
173 | % (proc.cmd, proc.start_time)) | ||
174 | break | ||
175 | if proc is None: | ||
176 | writer.warn("no selected crop proc '%s' in list" % crop_after) | ||
177 | |||
178 | |||
179 | cpu_util = [(sample.time, sample.user + sample.sys + sample.io) for sample in self.cpu_stats] | ||
180 | disk_util = [(sample.time, sample.util) for sample in self.disk_stats] | ||
181 | |||
182 | idle = None | ||
183 | for i in range(0, len(cpu_util)): | ||
184 | if cpu_util[i][0] < proc.start_time: | ||
185 | continue | ||
186 | if is_idle_at(cpu_util, cpu_util[i][0], i) \ | ||
187 | and is_idle(disk_util, cpu_util[i][0]): | ||
188 | idle = cpu_util[i][0] | ||
189 | break | ||
190 | |||
191 | if idle is None: | ||
192 | writer.warn ("not idle after proc '%s'" % crop_after) | ||
193 | return None | ||
194 | |||
195 | crop_at = idle + 300 | ||
196 | writer.info ("cropping at time %d" % crop_at) | ||
197 | while len (self.cpu_stats) \ | ||
198 | and self.cpu_stats[-1].time > crop_at: | ||
199 | self.cpu_stats.pop() | ||
200 | while len (self.disk_stats) \ | ||
201 | and self.disk_stats[-1].time > crop_at: | ||
202 | self.disk_stats.pop() | ||
203 | |||
204 | self.ps_stats.end_time = crop_at | ||
205 | |||
206 | cropped_map = {} | ||
207 | for key, value in self.ps_stats.process_map.items(): | ||
208 | if (value.start_time <= crop_at): | ||
209 | cropped_map[key] = value | ||
210 | |||
211 | for proc in cropped_map.values(): | ||
212 | proc.duration = min (proc.duration, crop_at - proc.start_time) | ||
213 | while len (proc.samples) \ | ||
214 | and proc.samples[-1].time > crop_at: | ||
215 | proc.samples.pop() | ||
216 | |||
217 | self.ps_stats.process_map = cropped_map | ||
218 | |||
219 | return idle | ||
220 | |||
8 | 221 | ||
9 | from samples import * | ||
10 | from process_tree import ProcessTree | ||
11 | 222 | ||
12 | class ParseError(Exception): | 223 | class ParseError(Exception): |
13 | """Represents errors during parse of the bootchart.""" | 224 | """Represents errors during parse of the bootchart.""" |
14 | def __init__(self, value): | 225 | def __init__(self, value): |
15 | self.value = value | 226 | self.value = value |
16 | 227 | ||
17 | def __str__(self): | 228 | def __str__(self): |
18 | return self.value | 229 | return self.value |
19 | 230 | ||
20 | def _parse_headers(file): | 231 | def _parse_headers(file): |
21 | """Parses the headers of the bootchart.""" | 232 | """Parses the headers of the bootchart.""" |
22 | def parse((headers,last), line): | 233 | def parse(acc, line): |
23 | if '=' in line: last,value = map(string.strip, line.split('=', 1)) | 234 | (headers, last) = acc |
24 | else: value = line.strip() | 235 | if '=' in line: |
25 | headers[last] += value | 236 | last, value = map (lambda x: x.strip(), line.split('=', 1)) |
26 | return headers,last | 237 | else: |
27 | return reduce(parse, file.read().split('\n'), (defaultdict(str),''))[0] | 238 | value = line.strip() |
239 | headers[last] += value | ||
240 | return headers, last | ||
241 | return reduce(parse, file.read().decode('utf-8').split('\n'), (defaultdict(str),''))[0] | ||
28 | 242 | ||
29 | def _parse_timed_blocks(file): | 243 | def _parse_timed_blocks(file): |
30 | """Parses (ie., splits) a file into so-called timed-blocks. A | 244 | """Parses (ie., splits) a file into so-called timed-blocks. A |
31 | timed-block consists of a timestamp on a line by itself followed | 245 | timed-block consists of a timestamp on a line by itself followed |
32 | by zero or more lines of data for that point in time.""" | 246 | by zero or more lines of data for that point in time.""" |
33 | def parse(block): | 247 | def parse(block): |
34 | lines = block.split('\n') | 248 | lines = block.split('\n') |
35 | if not lines: | 249 | if not lines: |
36 | raise ParseError('expected a timed-block consisting a timestamp followed by data lines') | 250 | raise ParseError('expected a timed-block consisting a timestamp followed by data lines') |
37 | try: | 251 | try: |
38 | return (int(lines[0]), lines[1:]) | 252 | return (int(lines[0]), lines[1:]) |
39 | except ValueError: | 253 | except ValueError: |
40 | raise ParseError("expected a timed-block, but timestamp '%s' is not an integer" % lines[0]) | 254 | raise ParseError("expected a timed-block, but timestamp '%s' is not an integer" % lines[0]) |
41 | blocks = file.read().split('\n\n') | 255 | blocks = file.read().decode('utf-8').split('\n\n') |
42 | return [parse(block) for block in blocks if block.strip()] | 256 | return [parse(block) for block in blocks if block.strip() and not block.endswith(' not running\n')] |
43 | 257 | ||
44 | def _parse_proc_ps_log(file): | 258 | def _parse_proc_ps_log(writer, file): |
45 | """ | 259 | """ |
46 | * See proc(5) for details. | 260 | * See proc(5) for details. |
47 | * | 261 | * |
48 | * {pid, comm, state, ppid, pgrp, session, tty_nr, tpgid, flags, minflt, cminflt, majflt, cmajflt, utime, stime, | 262 | * {pid, comm, state, ppid, pgrp, session, tty_nr, tpgid, flags, minflt, cminflt, majflt, cmajflt, utime, stime, |
49 | * cutime, cstime, priority, nice, 0, itrealvalue, starttime, vsize, rss, rlim, startcode, endcode, startstack, | 263 | * cutime, cstime, priority, nice, 0, itrealvalue, starttime, vsize, rss, rlim, startcode, endcode, startstack, |
50 | * kstkesp, kstkeip} | 264 | * kstkesp, kstkeip} |
51 | """ | 265 | """ |
52 | processMap = {} | 266 | processMap = {} |
53 | ltime = 0 | 267 | ltime = 0 |
54 | timed_blocks = _parse_timed_blocks(file) | 268 | timed_blocks = _parse_timed_blocks(file) |
55 | for time, lines in timed_blocks: | 269 | for time, lines in timed_blocks: |
56 | for line in lines: | 270 | for line in lines: |
57 | tokens = line.split(' ') | 271 | if not line: continue |
58 | 272 | tokens = line.split(' ') | |
59 | offset = [index for index, token in enumerate(tokens[1:]) if token.endswith(')')][0] | 273 | if len(tokens) < 21: |
60 | pid, cmd, state, ppid = int(tokens[0]), ' '.join(tokens[1:2+offset]), tokens[2+offset], int(tokens[3+offset]) | 274 | continue |
61 | userCpu, sysCpu, stime= int(tokens[13+offset]), int(tokens[14+offset]), int(tokens[21+offset]) | 275 | |
62 | 276 | offset = [index for index, token in enumerate(tokens[1:]) if token[-1] == ')'][0] | |
63 | if processMap.has_key(pid): | 277 | pid, cmd, state, ppid = int(tokens[0]), ' '.join(tokens[1:2+offset]), tokens[2+offset], int(tokens[3+offset]) |
64 | process = processMap[pid] | 278 | userCpu, sysCpu, stime = int(tokens[13+offset]), int(tokens[14+offset]), int(tokens[21+offset]) |
65 | process.cmd = cmd.replace('(', '').replace(')', '') # why rename after latest name?? | 279 | |
66 | else: | 280 | # magic fixed point-ness ... |
67 | process = Process(pid, cmd, ppid, min(time, stime)) | 281 | pid *= 1000 |
68 | processMap[pid] = process | 282 | ppid *= 1000 |
69 | 283 | if pid in processMap: | |
70 | if process.last_user_cpu_time is not None and process.last_sys_cpu_time is not None and ltime is not None: | 284 | process = processMap[pid] |
71 | userCpuLoad, sysCpuLoad = process.calc_load(userCpu, sysCpu, time - ltime) | 285 | process.cmd = cmd.strip('()') # why rename after latest name?? |
72 | cpuSample = CPUSample('null', userCpuLoad, sysCpuLoad, 0.0) | 286 | else: |
73 | process.samples.append(ProcessSample(time, state, cpuSample)) | 287 | process = Process(writer, pid, cmd.strip('()'), ppid, min(time, stime)) |
74 | 288 | processMap[pid] = process | |
75 | process.last_user_cpu_time = userCpu | 289 | |
76 | process.last_sys_cpu_time = sysCpu | 290 | if process.last_user_cpu_time is not None and process.last_sys_cpu_time is not None and ltime is not None: |
77 | ltime = time | 291 | userCpuLoad, sysCpuLoad = process.calc_load(userCpu, sysCpu, max(1, time - ltime)) |
78 | 292 | cpuSample = CPUSample('null', userCpuLoad, sysCpuLoad, 0.0) | |
79 | startTime = timed_blocks[0][0] | 293 | process.samples.append(ProcessSample(time, state, cpuSample)) |
80 | avgSampleLength = (ltime - startTime)/(len(timed_blocks)-1) | 294 | |
81 | 295 | process.last_user_cpu_time = userCpu | |
82 | for process in processMap.values(): | 296 | process.last_sys_cpu_time = sysCpu |
83 | process.set_parent(processMap) | 297 | ltime = time |
84 | 298 | ||
85 | for process in processMap.values(): | 299 | if len (timed_blocks) < 2: |
86 | process.calc_stats(avgSampleLength) | 300 | return None |
87 | 301 | ||
88 | return ProcessStats(processMap.values(), avgSampleLength, startTime, ltime) | 302 | startTime = timed_blocks[0][0] |
89 | 303 | avgSampleLength = (ltime - startTime)/(len (timed_blocks) - 1) | |
304 | |||
305 | return ProcessStats (writer, processMap, len (timed_blocks), avgSampleLength, startTime, ltime) | ||
306 | |||
307 | def _parse_taskstats_log(writer, file): | ||
308 | """ | ||
309 | * See bootchart-collector.c for details. | ||
310 | * | ||
311 | * { pid, ppid, comm, cpu_run_real_total, blkio_delay_total, swapin_delay_total } | ||
312 | * | ||
313 | """ | ||
314 | processMap = {} | ||
315 | pidRewrites = {} | ||
316 | ltime = None | ||
317 | timed_blocks = _parse_timed_blocks(file) | ||
318 | for time, lines in timed_blocks: | ||
319 | # we have no 'stime' from taskstats, so prep 'init' | ||
320 | if ltime is None: | ||
321 | process = Process(writer, 1, '[init]', 0, 0) | ||
322 | processMap[1000] = process | ||
323 | ltime = time | ||
324 | # continue | ||
325 | for line in lines: | ||
326 | if not line: continue | ||
327 | tokens = line.split(' ') | ||
328 | if len(tokens) != 6: | ||
329 | continue | ||
330 | |||
331 | opid, ppid, cmd = int(tokens[0]), int(tokens[1]), tokens[2] | ||
332 | cpu_ns, blkio_delay_ns, swapin_delay_ns = long(tokens[-3]), long(tokens[-2]), long(tokens[-1]), | ||
333 | |||
334 | # make space for trees of pids | ||
335 | opid *= 1000 | ||
336 | ppid *= 1000 | ||
337 | |||
338 | # when the process name changes, we re-write the pid. | ||
339 | if opid in pidRewrites: | ||
340 | pid = pidRewrites[opid] | ||
341 | else: | ||
342 | pid = opid | ||
343 | |||
344 | cmd = cmd.strip('(').strip(')') | ||
345 | if pid in processMap: | ||
346 | process = processMap[pid] | ||
347 | if process.cmd != cmd: | ||
348 | pid += 1 | ||
349 | pidRewrites[opid] = pid | ||
350 | # print "process mutation ! '%s' vs '%s' pid %s -> pid %s\n" % (process.cmd, cmd, opid, pid) | ||
351 | process = process.split (writer, pid, cmd, ppid, time) | ||
352 | processMap[pid] = process | ||
353 | else: | ||
354 | process.cmd = cmd; | ||
355 | else: | ||
356 | process = Process(writer, pid, cmd, ppid, time) | ||
357 | processMap[pid] = process | ||
358 | |||
359 | delta_cpu_ns = (float) (cpu_ns - process.last_cpu_ns) | ||
360 | delta_blkio_delay_ns = (float) (blkio_delay_ns - process.last_blkio_delay_ns) | ||
361 | delta_swapin_delay_ns = (float) (swapin_delay_ns - process.last_swapin_delay_ns) | ||
362 | |||
363 | # make up some state data ... | ||
364 | if delta_cpu_ns > 0: | ||
365 | state = "R" | ||
366 | elif delta_blkio_delay_ns + delta_swapin_delay_ns > 0: | ||
367 | state = "D" | ||
368 | else: | ||
369 | state = "S" | ||
370 | |||
371 | # retain the ns timing information into a CPUSample - that tries | ||
372 | # with the old-style to be a %age of CPU used in this time-slice. | ||
373 | if delta_cpu_ns + delta_blkio_delay_ns + delta_swapin_delay_ns > 0: | ||
374 | # print "proc %s cpu_ns %g delta_cpu %g" % (cmd, cpu_ns, delta_cpu_ns) | ||
375 | cpuSample = CPUSample('null', delta_cpu_ns, 0.0, | ||
376 | delta_blkio_delay_ns, | ||
377 | delta_swapin_delay_ns) | ||
378 | process.samples.append(ProcessSample(time, state, cpuSample)) | ||
379 | |||
380 | process.last_cpu_ns = cpu_ns | ||
381 | process.last_blkio_delay_ns = blkio_delay_ns | ||
382 | process.last_swapin_delay_ns = swapin_delay_ns | ||
383 | ltime = time | ||
384 | |||
385 | if len (timed_blocks) < 2: | ||
386 | return None | ||
387 | |||
388 | startTime = timed_blocks[0][0] | ||
389 | avgSampleLength = (ltime - startTime)/(len(timed_blocks)-1) | ||
390 | |||
391 | return ProcessStats (writer, processMap, len (timed_blocks), avgSampleLength, startTime, ltime) | ||
392 | |||
90 | def _parse_proc_stat_log(file): | 393 | def _parse_proc_stat_log(file): |
91 | samples = [] | 394 | samples = [] |
92 | ltimes = None | 395 | ltimes = None |
93 | for time, lines in _parse_timed_blocks(file): | 396 | for time, lines in _parse_timed_blocks(file): |
94 | # CPU times {user, nice, system, idle, io_wait, irq, softirq} | 397 | # skip emtpy lines |
95 | tokens = lines[0].split(); | 398 | if not lines: |
96 | times = [ int(token) for token in tokens[1:] ] | 399 | continue |
97 | if ltimes: | 400 | # CPU times {user, nice, system, idle, io_wait, irq, softirq} |
98 | user = float((times[0] + times[1]) - (ltimes[0] + ltimes[1])) | 401 | tokens = lines[0].split() |
99 | system = float((times[2] + times[5] + times[6]) - (ltimes[2] + ltimes[5] + ltimes[6])) | 402 | times = [ int(token) for token in tokens[1:] ] |
100 | idle = float(times[3] - ltimes[3]) | 403 | if ltimes: |
101 | iowait = float(times[4] - ltimes[4]) | 404 | user = float((times[0] + times[1]) - (ltimes[0] + ltimes[1])) |
102 | 405 | system = float((times[2] + times[5] + times[6]) - (ltimes[2] + ltimes[5] + ltimes[6])) | |
103 | aSum = max(user + system + idle + iowait, 1) | 406 | idle = float(times[3] - ltimes[3]) |
104 | samples.append( CPUSample(time, user/aSum, system/aSum, iowait/aSum) ) | 407 | iowait = float(times[4] - ltimes[4]) |
105 | 408 | ||
106 | ltimes = times | 409 | aSum = max(user + system + idle + iowait, 1) |
107 | # skip the rest of statistics lines | 410 | samples.append( CPUSample(time, user/aSum, system/aSum, iowait/aSum) ) |
108 | return samples | 411 | |
109 | 412 | ltimes = times | |
110 | 413 | # skip the rest of statistics lines | |
414 | return samples | ||
415 | |||
111 | def _parse_proc_disk_stat_log(file, numCpu): | 416 | def _parse_proc_disk_stat_log(file, numCpu): |
112 | """ | 417 | """ |
113 | Parse file for disk stats, but only look at the whole disks, eg. sda, | 418 | Parse file for disk stats, but only look at the whole device, eg. sda, |
114 | not sda1, sda2 etc. The format of relevant lines should be: | 419 | not sda1, sda2 etc. The format of relevant lines should be: |
115 | {major minor name rio rmerge rsect ruse wio wmerge wsect wuse running use aveq} | 420 | {major minor name rio rmerge rsect ruse wio wmerge wsect wuse running use aveq} |
116 | """ | 421 | """ |
117 | DISK_REGEX = 'hd.$|sd.$' | 422 | disk_regex_re = re.compile ('^([hsv]d.|mtdblock\d|mmcblk\d|cciss/c\d+d\d+.*)$') |
118 | 423 | ||
119 | def is_relevant_line(line): | 424 | # this gets called an awful lot. |
120 | return len(line.split()) == 14 and re.match(DISK_REGEX, line.split()[2]) | 425 | def is_relevant_line(linetokens): |
121 | 426 | if len(linetokens) != 14: | |
122 | disk_stat_samples = [] | 427 | return False |
123 | 428 | disk = linetokens[2] | |
124 | for time, lines in _parse_timed_blocks(file): | 429 | return disk_regex_re.match(disk) |
125 | sample = DiskStatSample(time) | 430 | |
126 | relevant_tokens = [line.split() for line in lines if is_relevant_line(line)] | 431 | disk_stat_samples = [] |
127 | 432 | ||
128 | for tokens in relevant_tokens: | 433 | for time, lines in _parse_timed_blocks(file): |
129 | disk, rsect, wsect, use = tokens[2], int(tokens[5]), int(tokens[9]), int(tokens[12]) | 434 | sample = DiskStatSample(time) |
130 | sample.add_diskdata([rsect, wsect, use]) | 435 | relevant_tokens = [linetokens for linetokens in map (lambda x: x.split(),lines) if is_relevant_line(linetokens)] |
131 | 436 | ||
132 | disk_stat_samples.append(sample) | 437 | for tokens in relevant_tokens: |
133 | 438 | disk, rsect, wsect, use = tokens[2], int(tokens[5]), int(tokens[9]), int(tokens[12]) | |
134 | disk_stats = [] | 439 | sample.add_diskdata([rsect, wsect, use]) |
135 | for sample1, sample2 in zip(disk_stat_samples[:-1], disk_stat_samples[1:]): | 440 | |
136 | interval = sample1.time - sample2.time | 441 | disk_stat_samples.append(sample) |
137 | sums = [ a - b for a, b in zip(sample1.diskdata, sample2.diskdata) ] | 442 | |
138 | readTput = sums[0] / 2.0 * 100.0 / interval | 443 | disk_stats = [] |
139 | writeTput = sums[1] / 2.0 * 100.0 / interval | 444 | for sample1, sample2 in zip(disk_stat_samples[:-1], disk_stat_samples[1:]): |
140 | util = float( sums[2] ) / 10 / interval / numCpu | 445 | interval = sample1.time - sample2.time |
141 | util = max(0.0, min(1.0, util)) | 446 | if interval == 0: |
142 | disk_stats.append(DiskSample(sample2.time, readTput, writeTput, util)) | 447 | interval = 1 |
143 | 448 | sums = [ a - b for a, b in zip(sample1.diskdata, sample2.diskdata) ] | |
144 | return disk_stats | 449 | readTput = sums[0] / 2.0 * 100.0 / interval |
145 | 450 | writeTput = sums[1] / 2.0 * 100.0 / interval | |
146 | 451 | util = float( sums[2] ) / 10 / interval / numCpu | |
452 | util = max(0.0, min(1.0, util)) | ||
453 | disk_stats.append(DiskSample(sample2.time, readTput, writeTput, util)) | ||
454 | |||
455 | return disk_stats | ||
456 | |||
457 | def _parse_proc_meminfo_log(file): | ||
458 | """ | ||
459 | Parse file for global memory statistics. | ||
460 | The format of relevant lines should be: ^key: value( unit)? | ||
461 | """ | ||
462 | used_values = ('MemTotal', 'MemFree', 'Buffers', 'Cached', 'SwapTotal', 'SwapFree',) | ||
463 | |||
464 | mem_stats = [] | ||
465 | meminfo_re = re.compile(r'([^ \t:]+):\s*(\d+).*') | ||
466 | |||
467 | for time, lines in _parse_timed_blocks(file): | ||
468 | sample = MemSample(time) | ||
469 | |||
470 | for line in lines: | ||
471 | match = meminfo_re.match(line) | ||
472 | if not match: | ||
473 | raise ParseError("Invalid meminfo line \"%s\"" % match.groups(0)) | ||
474 | sample.add_value(match.group(1), int(match.group(2))) | ||
475 | |||
476 | if sample.valid(): | ||
477 | mem_stats.append(sample) | ||
478 | |||
479 | return mem_stats | ||
480 | |||
481 | # if we boot the kernel with: initcall_debug printk.time=1 we can | ||
482 | # get all manner of interesting data from the dmesg output | ||
483 | # We turn this into a pseudo-process tree: each event is | ||
484 | # characterised by a | ||
485 | # we don't try to detect a "kernel finished" state - since the kernel | ||
486 | # continues to do interesting things after init is called. | ||
487 | # | ||
488 | # sample input: | ||
489 | # [ 0.000000] ACPI: FACP 3f4fc000 000F4 (v04 INTEL Napa 00000001 MSFT 01000013) | ||
490 | # ... | ||
491 | # [ 0.039993] calling migration_init+0x0/0x6b @ 1 | ||
492 | # [ 0.039993] initcall migration_init+0x0/0x6b returned 1 after 0 usecs | ||
493 | def _parse_dmesg(writer, file): | ||
494 | timestamp_re = re.compile ("^\[\s*(\d+\.\d+)\s*]\s+(.*)$") | ||
495 | split_re = re.compile ("^(\S+)\s+([\S\+_-]+) (.*)$") | ||
496 | processMap = {} | ||
497 | idx = 0 | ||
498 | inc = 1.0 / 1000000 | ||
499 | kernel = Process(writer, idx, "k-boot", 0, 0.1) | ||
500 | processMap['k-boot'] = kernel | ||
501 | base_ts = False | ||
502 | max_ts = 0 | ||
503 | for line in file.read().decode('utf-8').split('\n'): | ||
504 | t = timestamp_re.match (line) | ||
505 | if t is None: | ||
506 | # print "duff timestamp " + line | ||
507 | continue | ||
508 | |||
509 | time_ms = float (t.group(1)) * 1000 | ||
510 | # looks like we may have a huge diff after the clock | ||
511 | # has been set up. This could lead to huge graph: | ||
512 | # so huge we will be killed by the OOM. | ||
513 | # So instead of using the plain timestamp we will | ||
514 | # use a delta to first one and skip the first one | ||
515 | # for convenience | ||
516 | if max_ts == 0 and not base_ts and time_ms > 1000: | ||
517 | base_ts = time_ms | ||
518 | continue | ||
519 | max_ts = max(time_ms, max_ts) | ||
520 | if base_ts: | ||
521 | # print "fscked clock: used %f instead of %f" % (time_ms - base_ts, time_ms) | ||
522 | time_ms -= base_ts | ||
523 | m = split_re.match (t.group(2)) | ||
524 | |||
525 | if m is None: | ||
526 | continue | ||
527 | # print "match: '%s'" % (m.group(1)) | ||
528 | type = m.group(1) | ||
529 | func = m.group(2) | ||
530 | rest = m.group(3) | ||
531 | |||
532 | if t.group(2).startswith ('Write protecting the') or \ | ||
533 | t.group(2).startswith ('Freeing unused kernel memory'): | ||
534 | kernel.duration = time_ms / 10 | ||
535 | continue | ||
536 | |||
537 | # print "foo: '%s' '%s' '%s'" % (type, func, rest) | ||
538 | if type == "calling": | ||
539 | ppid = kernel.pid | ||
540 | p = re.match ("\@ (\d+)", rest) | ||
541 | if p is not None: | ||
542 | ppid = float (p.group(1)) // 1000 | ||
543 | # print "match: '%s' ('%g') at '%s'" % (func, ppid, time_ms) | ||
544 | name = func.split ('+', 1) [0] | ||
545 | idx += inc | ||
546 | processMap[func] = Process(writer, ppid + idx, name, ppid, time_ms / 10) | ||
547 | elif type == "initcall": | ||
548 | # print "finished: '%s' at '%s'" % (func, time_ms) | ||
549 | if func in processMap: | ||
550 | process = processMap[func] | ||
551 | process.duration = (time_ms / 10) - process.start_time | ||
552 | else: | ||
553 | print("corrupted init call for %s" % (func)) | ||
554 | |||
555 | elif type == "async_waiting" or type == "async_continuing": | ||
556 | continue # ignore | ||
557 | |||
558 | return processMap.values() | ||
559 | |||
560 | # | ||
561 | # Parse binary pacct accounting file output if we have one | ||
562 | # cf. /usr/include/linux/acct.h | ||
563 | # | ||
564 | def _parse_pacct(writer, file): | ||
565 | # read LE int32 | ||
566 | def _read_le_int32(file): | ||
567 | byts = file.read(4) | ||
568 | return (ord(byts[0])) | (ord(byts[1]) << 8) | \ | ||
569 | (ord(byts[2]) << 16) | (ord(byts[3]) << 24) | ||
570 | |||
571 | parent_map = {} | ||
572 | parent_map[0] = 0 | ||
573 | while file.read(1) != "": # ignore flags | ||
574 | ver = file.read(1) | ||
575 | if ord(ver) < 3: | ||
576 | print("Invalid version 0x%x" % (ord(ver))) | ||
577 | return None | ||
578 | |||
579 | file.seek (14, 1) # user, group etc. | ||
580 | pid = _read_le_int32 (file) | ||
581 | ppid = _read_le_int32 (file) | ||
582 | # print "Parent of %d is %d" % (pid, ppid) | ||
583 | parent_map[pid] = ppid | ||
584 | file.seek (4 + 4 + 16, 1) # timings | ||
585 | file.seek (16, 1) # acct_comm | ||
586 | return parent_map | ||
587 | |||
588 | def _parse_paternity_log(writer, file): | ||
589 | parent_map = {} | ||
590 | parent_map[0] = 0 | ||
591 | for line in file.read().decode('utf-8').split('\n'): | ||
592 | if not line: | ||
593 | continue | ||
594 | elems = line.split(' ') # <Child> <Parent> | ||
595 | if len (elems) >= 2: | ||
596 | # print "paternity of %d is %d" % (int(elems[0]), int(elems[1])) | ||
597 | parent_map[int(elems[0])] = int(elems[1]) | ||
598 | else: | ||
599 | print("Odd paternity line '%s'" % (line)) | ||
600 | return parent_map | ||
601 | |||
602 | def _parse_cmdline_log(writer, file): | ||
603 | cmdLines = {} | ||
604 | for block in file.read().decode('utf-8').split('\n\n'): | ||
605 | lines = block.split('\n') | ||
606 | if len (lines) >= 3: | ||
607 | # print "Lines '%s'" % (lines[0]) | ||
608 | pid = int (lines[0]) | ||
609 | values = {} | ||
610 | values['exe'] = lines[1].lstrip(':') | ||
611 | args = lines[2].lstrip(':').split('\0') | ||
612 | args.pop() | ||
613 | values['args'] = args | ||
614 | cmdLines[pid] = values | ||
615 | return cmdLines | ||
616 | |||
147 | def get_num_cpus(headers): | 617 | def get_num_cpus(headers): |
148 | """Get the number of CPUs from the system.cpu header property. As the | 618 | """Get the number of CPUs from the system.cpu header property. As the |
149 | CPU utilization graphs are relative, the number of CPUs currently makes | 619 | CPU utilization graphs are relative, the number of CPUs currently makes |
150 | no difference.""" | 620 | no difference.""" |
151 | if headers is None: | 621 | if headers is None: |
152 | return 1 | 622 | return 1 |
623 | if headers.get("system.cpu.num"): | ||
624 | return max (int (headers.get("system.cpu.num")), 1) | ||
153 | cpu_model = headers.get("system.cpu") | 625 | cpu_model = headers.get("system.cpu") |
154 | if cpu_model is None: | 626 | if cpu_model is None: |
155 | return 1 | 627 | return 1 |
156 | mat = re.match(".*\\((\\d+)\\)", cpu_model) | 628 | mat = re.match(".*\\((\\d+)\\)", cpu_model) |
157 | if mat is None: | 629 | if mat is None: |
158 | return 1 | 630 | return 1 |
159 | return int(mat.group(1)) | 631 | return max (int(mat.group(1)), 1) |
160 | |||
161 | class ParserState: | ||
162 | def __init__(self): | ||
163 | self.processes = {} | ||
164 | self.start = {} | ||
165 | self.end = {} | ||
166 | 632 | ||
167 | def valid(self): | 633 | def _do_parse(writer, state, filename, file, mintime): |
168 | return len(self.processes) != 0 | 634 | writer.info("parsing '%s'" % filename) |
169 | 635 | t1 = clock() | |
170 | |||
171 | _relevant_files = set(["header", "proc_diskstats.log", "proc_ps.log", "proc_stat.log"]) | ||
172 | |||
173 | def _do_parse(state, filename, file, mintime): | ||
174 | #print filename | ||
175 | #writer.status("parsing '%s'" % filename) | ||
176 | paths = filename.split("/") | 636 | paths = filename.split("/") |
177 | task = paths[-1] | 637 | task = paths[-1] |
178 | pn = paths[-2] | 638 | pn = paths[-2] |
@@ -194,44 +654,49 @@ def _do_parse(state, filename, file, mintime): | |||
194 | state.end[end] = [] | 654 | state.end[end] = [] |
195 | if k not in state.end[end]: | 655 | if k not in state.end[end]: |
196 | state.end[end].append(pn + ":" + task) | 656 | state.end[end].append(pn + ":" + task) |
657 | t2 = clock() | ||
658 | writer.info(" %s seconds" % str(t2-t1)) | ||
197 | return state | 659 | return state |
198 | 660 | ||
199 | def parse_file(state, filename, mintime): | 661 | def parse_file(writer, state, filename, mintime): |
662 | if state.filename is None: | ||
663 | state.filename = filename | ||
200 | basename = os.path.basename(filename) | 664 | basename = os.path.basename(filename) |
201 | with open(filename, "rb") as file: | 665 | with open(filename, "rb") as file: |
202 | return _do_parse(state, filename, file, mintime) | 666 | return _do_parse(writer, state, filename, file, mintime) |
203 | 667 | ||
204 | def parse_paths(state, paths, mintime): | 668 | def parse_paths(writer, state, paths, mintime): |
205 | for path in paths: | 669 | for path in paths: |
206 | root,extension = os.path.splitext(path) | 670 | if state.filename is None: |
671 | state.filename = path | ||
672 | root, extension = os.path.splitext(path) | ||
207 | if not(os.path.exists(path)): | 673 | if not(os.path.exists(path)): |
208 | print "warning: path '%s' does not exist, ignoring." % path | 674 | writer.warn("warning: path '%s' does not exist, ignoring." % path) |
209 | continue | 675 | continue |
676 | #state.filename = path | ||
210 | if os.path.isdir(path): | 677 | if os.path.isdir(path): |
211 | files = [ f for f in [os.path.join(path, f) for f in os.listdir(path)] ] | 678 | files = [ f for f in [os.path.join(path, f) for f in os.listdir(path)] ] |
212 | files.sort() | 679 | files.sort() |
213 | state = parse_paths(state, files, mintime) | 680 | state = parse_paths(writer, state, files, mintime) |
214 | elif extension in [".tar", ".tgz", ".tar.gz"]: | 681 | elif extension in [".tar", ".tgz", ".gz"]: |
682 | if extension == ".gz": | ||
683 | root, extension = os.path.splitext(root) | ||
684 | if extension != ".tar": | ||
685 | writer.warn("warning: can only handle zipped tar files, not zipped '%s'-files; ignoring" % extension) | ||
686 | continue | ||
215 | tf = None | 687 | tf = None |
216 | try: | 688 | try: |
689 | writer.status("parsing '%s'" % path) | ||
217 | tf = tarfile.open(path, 'r:*') | 690 | tf = tarfile.open(path, 'r:*') |
218 | for name in tf.getnames(): | 691 | for name in tf.getnames(): |
219 | state = _do_parse(state, name, tf.extractfile(name)) | 692 | state = _do_parse(writer, state, name, tf.extractfile(name)) |
220 | except tarfile.ReadError, error: | 693 | except tarfile.ReadError as error: |
221 | raise ParseError("error: could not read tarfile '%s': %s." % (path, error)) | 694 | raise ParseError("error: could not read tarfile '%s': %s." % (path, error)) |
222 | finally: | 695 | finally: |
223 | if tf != None: | 696 | if tf != None: |
224 | tf.close() | 697 | tf.close() |
225 | else: | 698 | else: |
226 | state = parse_file(state, path, mintime) | 699 | state = parse_file(writer, state, path, mintime) |
227 | return state | ||
228 | |||
229 | def parse(paths, prune, mintime): | ||
230 | state = parse_paths(ParserState(), paths, mintime) | ||
231 | if not state.valid(): | ||
232 | raise ParseError("empty state: '%s' does not contain a valid bootchart" % ", ".join(paths)) | ||
233 | #monitored_app = state.headers.get("profile.process") | ||
234 | #proc_tree = ProcessTree(state.ps_stats, monitored_app, prune) | ||
235 | return state | 700 | return state |
236 | 701 | ||
237 | def split_res(res, n): | 702 | def split_res(res, n): |