diff options
Diffstat (limited to 'scripts/pybootchartgui/pybootchartgui/parsing.py')
-rw-r--r-- | scripts/pybootchartgui/pybootchartgui/parsing.py | 740 |
1 files changed, 740 insertions, 0 deletions
diff --git a/scripts/pybootchartgui/pybootchartgui/parsing.py b/scripts/pybootchartgui/pybootchartgui/parsing.py new file mode 100644 index 0000000000..d423b9f77c --- /dev/null +++ b/scripts/pybootchartgui/pybootchartgui/parsing.py | |||
@@ -0,0 +1,740 @@ | |||
1 | # This file is part of pybootchartgui. | ||
2 | |||
3 | # pybootchartgui is free software: you can redistribute it and/or modify | ||
4 | # it under the terms of the GNU General Public License as published by | ||
5 | # the Free Software Foundation, either version 3 of the License, or | ||
6 | # (at your option) any later version. | ||
7 | |||
8 | # pybootchartgui is distributed in the hope that it will be useful, | ||
9 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | # GNU General Public License for more details. | ||
12 | |||
13 | # You should have received a copy of the GNU General Public License | ||
14 | # along with pybootchartgui. If not, see <http://www.gnu.org/licenses/>. | ||
15 | |||
16 | |||
17 | from __future__ import with_statement | ||
18 | |||
19 | import os | ||
20 | import string | ||
21 | import re | ||
22 | import sys | ||
23 | import tarfile | ||
24 | from time import clock | ||
25 | from collections import defaultdict | ||
26 | from functools import reduce | ||
27 | |||
28 | from .samples import * | ||
29 | from .process_tree import ProcessTree | ||
30 | |||
31 | if sys.version_info >= (3, 0): | ||
32 | long = int | ||
33 | |||
34 | # Parsing produces as its end result a 'Trace' | ||
35 | |||
36 | class Trace: | ||
37 | def __init__(self, writer, paths, options): | ||
38 | self.processes = {} | ||
39 | self.start = {} | ||
40 | self.end = {} | ||
41 | self.min = None | ||
42 | self.max = None | ||
43 | self.headers = None | ||
44 | self.disk_stats = None | ||
45 | self.ps_stats = None | ||
46 | self.taskstats = None | ||
47 | self.cpu_stats = None | ||
48 | self.cmdline = None | ||
49 | self.kernel = None | ||
50 | self.kernel_tree = None | ||
51 | self.filename = None | ||
52 | self.parent_map = None | ||
53 | self.mem_stats = None | ||
54 | |||
55 | if len(paths): | ||
56 | parse_paths (writer, self, paths) | ||
57 | if not self.valid(): | ||
58 | raise ParseError("empty state: '%s' does not contain a valid bootchart" % ", ".join(paths)) | ||
59 | |||
60 | if options.full_time: | ||
61 | self.min = min(self.start.keys()) | ||
62 | self.max = max(self.end.keys()) | ||
63 | |||
64 | return | ||
65 | |||
66 | # Turn that parsed information into something more useful | ||
67 | # link processes into a tree of pointers, calculate statistics | ||
68 | self.compile(writer) | ||
69 | |||
70 | # Crop the chart to the end of the first idle period after the given | ||
71 | # process | ||
72 | if options.crop_after: | ||
73 | idle = self.crop (writer, options.crop_after) | ||
74 | else: | ||
75 | idle = None | ||
76 | |||
77 | # Annotate other times as the first start point of given process lists | ||
78 | self.times = [ idle ] | ||
79 | if options.annotate: | ||
80 | for procnames in options.annotate: | ||
81 | names = [x[:15] for x in procnames.split(",")] | ||
82 | for proc in self.ps_stats.process_map.values(): | ||
83 | if proc.cmd in names: | ||
84 | self.times.append(proc.start_time) | ||
85 | break | ||
86 | else: | ||
87 | self.times.append(None) | ||
88 | |||
89 | self.proc_tree = ProcessTree(writer, self.kernel, self.ps_stats, | ||
90 | self.ps_stats.sample_period, | ||
91 | self.headers.get("profile.process"), | ||
92 | options.prune, idle, self.taskstats, | ||
93 | self.parent_map is not None) | ||
94 | |||
95 | if self.kernel is not None: | ||
96 | self.kernel_tree = ProcessTree(writer, self.kernel, None, 0, | ||
97 | self.headers.get("profile.process"), | ||
98 | False, None, None, True) | ||
99 | |||
100 | def valid(self): | ||
101 | return len(self.processes) != 0 | ||
102 | return self.headers != None and self.disk_stats != None and \ | ||
103 | self.ps_stats != None and self.cpu_stats != None | ||
104 | |||
105 | def add_process(self, process, start, end): | ||
106 | self.processes[process] = [start, end] | ||
107 | if start not in self.start: | ||
108 | self.start[start] = [] | ||
109 | if process not in self.start[start]: | ||
110 | self.start[start].append(process) | ||
111 | if end not in self.end: | ||
112 | self.end[end] = [] | ||
113 | if process not in self.end[end]: | ||
114 | self.end[end].append(process) | ||
115 | |||
116 | def compile(self, writer): | ||
117 | |||
118 | def find_parent_id_for(pid): | ||
119 | if pid is 0: | ||
120 | return 0 | ||
121 | ppid = self.parent_map.get(pid) | ||
122 | if ppid: | ||
123 | # many of these double forks are so short lived | ||
124 | # that we have no samples, or process info for them | ||
125 | # so climb the parent hierarcy to find one | ||
126 | if int (ppid * 1000) not in self.ps_stats.process_map: | ||
127 | # print "Pid '%d' short lived with no process" % ppid | ||
128 | ppid = find_parent_id_for (ppid) | ||
129 | # else: | ||
130 | # print "Pid '%d' has an entry" % ppid | ||
131 | else: | ||
132 | # print "Pid '%d' missing from pid map" % pid | ||
133 | return 0 | ||
134 | return ppid | ||
135 | |||
136 | # merge in the cmdline data | ||
137 | if self.cmdline is not None: | ||
138 | for proc in self.ps_stats.process_map.values(): | ||
139 | rpid = int (proc.pid // 1000) | ||
140 | if rpid in self.cmdline: | ||
141 | cmd = self.cmdline[rpid] | ||
142 | proc.exe = cmd['exe'] | ||
143 | proc.args = cmd['args'] | ||
144 | # else: | ||
145 | # print "proc %d '%s' not in cmdline" % (rpid, proc.exe) | ||
146 | |||
147 | # re-parent any stray orphans if we can | ||
148 | if self.parent_map is not None: | ||
149 | for process in self.ps_stats.process_map.values(): | ||
150 | ppid = find_parent_id_for (int(process.pid // 1000)) | ||
151 | if ppid: | ||
152 | process.ppid = ppid * 1000 | ||
153 | |||
154 | # stitch the tree together with pointers | ||
155 | for process in self.ps_stats.process_map.values(): | ||
156 | process.set_parent (self.ps_stats.process_map) | ||
157 | |||
158 | # count on fingers variously | ||
159 | for process in self.ps_stats.process_map.values(): | ||
160 | process.calc_stats (self.ps_stats.sample_period) | ||
161 | |||
162 | def crop(self, writer, crop_after): | ||
163 | |||
164 | def is_idle_at(util, start, j): | ||
165 | k = j + 1 | ||
166 | while k < len(util) and util[k][0] < start + 300: | ||
167 | k += 1 | ||
168 | k = min(k, len(util)-1) | ||
169 | |||
170 | if util[j][1] >= 0.25: | ||
171 | return False | ||
172 | |||
173 | avgload = sum(u[1] for u in util[j:k+1]) / (k-j+1) | ||
174 | if avgload < 0.25: | ||
175 | return True | ||
176 | else: | ||
177 | return False | ||
178 | def is_idle(util, start): | ||
179 | for j in range(0, len(util)): | ||
180 | if util[j][0] < start: | ||
181 | continue | ||
182 | return is_idle_at(util, start, j) | ||
183 | else: | ||
184 | return False | ||
185 | |||
186 | names = [x[:15] for x in crop_after.split(",")] | ||
187 | for proc in self.ps_stats.process_map.values(): | ||
188 | if proc.cmd in names or proc.exe in names: | ||
189 | writer.info("selected proc '%s' from list (start %d)" | ||
190 | % (proc.cmd, proc.start_time)) | ||
191 | break | ||
192 | if proc is None: | ||
193 | writer.warn("no selected crop proc '%s' in list" % crop_after) | ||
194 | |||
195 | |||
196 | cpu_util = [(sample.time, sample.user + sample.sys + sample.io) for sample in self.cpu_stats] | ||
197 | disk_util = [(sample.time, sample.util) for sample in self.disk_stats] | ||
198 | |||
199 | idle = None | ||
200 | for i in range(0, len(cpu_util)): | ||
201 | if cpu_util[i][0] < proc.start_time: | ||
202 | continue | ||
203 | if is_idle_at(cpu_util, cpu_util[i][0], i) \ | ||
204 | and is_idle(disk_util, cpu_util[i][0]): | ||
205 | idle = cpu_util[i][0] | ||
206 | break | ||
207 | |||
208 | if idle is None: | ||
209 | writer.warn ("not idle after proc '%s'" % crop_after) | ||
210 | return None | ||
211 | |||
212 | crop_at = idle + 300 | ||
213 | writer.info ("cropping at time %d" % crop_at) | ||
214 | while len (self.cpu_stats) \ | ||
215 | and self.cpu_stats[-1].time > crop_at: | ||
216 | self.cpu_stats.pop() | ||
217 | while len (self.disk_stats) \ | ||
218 | and self.disk_stats[-1].time > crop_at: | ||
219 | self.disk_stats.pop() | ||
220 | |||
221 | self.ps_stats.end_time = crop_at | ||
222 | |||
223 | cropped_map = {} | ||
224 | for key, value in self.ps_stats.process_map.items(): | ||
225 | if (value.start_time <= crop_at): | ||
226 | cropped_map[key] = value | ||
227 | |||
228 | for proc in cropped_map.values(): | ||
229 | proc.duration = min (proc.duration, crop_at - proc.start_time) | ||
230 | while len (proc.samples) \ | ||
231 | and proc.samples[-1].time > crop_at: | ||
232 | proc.samples.pop() | ||
233 | |||
234 | self.ps_stats.process_map = cropped_map | ||
235 | |||
236 | return idle | ||
237 | |||
238 | |||
239 | |||
240 | class ParseError(Exception): | ||
241 | """Represents errors during parse of the bootchart.""" | ||
242 | def __init__(self, value): | ||
243 | self.value = value | ||
244 | |||
245 | def __str__(self): | ||
246 | return self.value | ||
247 | |||
248 | def _parse_headers(file): | ||
249 | """Parses the headers of the bootchart.""" | ||
250 | def parse(acc, line): | ||
251 | (headers, last) = acc | ||
252 | if '=' in line: | ||
253 | last, value = map (lambda x: x.strip(), line.split('=', 1)) | ||
254 | else: | ||
255 | value = line.strip() | ||
256 | headers[last] += value | ||
257 | return headers, last | ||
258 | return reduce(parse, file.read().decode('utf-8').split('\n'), (defaultdict(str),''))[0] | ||
259 | |||
260 | def _parse_timed_blocks(file): | ||
261 | """Parses (ie., splits) a file into so-called timed-blocks. A | ||
262 | timed-block consists of a timestamp on a line by itself followed | ||
263 | by zero or more lines of data for that point in time.""" | ||
264 | def parse(block): | ||
265 | lines = block.split('\n') | ||
266 | if not lines: | ||
267 | raise ParseError('expected a timed-block consisting a timestamp followed by data lines') | ||
268 | try: | ||
269 | return (int(lines[0]), lines[1:]) | ||
270 | except ValueError: | ||
271 | raise ParseError("expected a timed-block, but timestamp '%s' is not an integer" % lines[0]) | ||
272 | blocks = file.read().decode('utf-8').split('\n\n') | ||
273 | return [parse(block) for block in blocks if block.strip() and not block.endswith(' not running\n')] | ||
274 | |||
275 | def _parse_proc_ps_log(writer, file): | ||
276 | """ | ||
277 | * See proc(5) for details. | ||
278 | * | ||
279 | * {pid, comm, state, ppid, pgrp, session, tty_nr, tpgid, flags, minflt, cminflt, majflt, cmajflt, utime, stime, | ||
280 | * cutime, cstime, priority, nice, 0, itrealvalue, starttime, vsize, rss, rlim, startcode, endcode, startstack, | ||
281 | * kstkesp, kstkeip} | ||
282 | """ | ||
283 | processMap = {} | ||
284 | ltime = 0 | ||
285 | timed_blocks = _parse_timed_blocks(file) | ||
286 | for time, lines in timed_blocks: | ||
287 | for line in lines: | ||
288 | if not line: continue | ||
289 | tokens = line.split(' ') | ||
290 | if len(tokens) < 21: | ||
291 | continue | ||
292 | |||
293 | offset = [index for index, token in enumerate(tokens[1:]) if token[-1] == ')'][0] | ||
294 | pid, cmd, state, ppid = int(tokens[0]), ' '.join(tokens[1:2+offset]), tokens[2+offset], int(tokens[3+offset]) | ||
295 | userCpu, sysCpu, stime = int(tokens[13+offset]), int(tokens[14+offset]), int(tokens[21+offset]) | ||
296 | |||
297 | # magic fixed point-ness ... | ||
298 | pid *= 1000 | ||
299 | ppid *= 1000 | ||
300 | if pid in processMap: | ||
301 | process = processMap[pid] | ||
302 | process.cmd = cmd.strip('()') # why rename after latest name?? | ||
303 | else: | ||
304 | process = Process(writer, pid, cmd.strip('()'), ppid, min(time, stime)) | ||
305 | processMap[pid] = process | ||
306 | |||
307 | if process.last_user_cpu_time is not None and process.last_sys_cpu_time is not None and ltime is not None: | ||
308 | userCpuLoad, sysCpuLoad = process.calc_load(userCpu, sysCpu, max(1, time - ltime)) | ||
309 | cpuSample = CPUSample('null', userCpuLoad, sysCpuLoad, 0.0) | ||
310 | process.samples.append(ProcessSample(time, state, cpuSample)) | ||
311 | |||
312 | process.last_user_cpu_time = userCpu | ||
313 | process.last_sys_cpu_time = sysCpu | ||
314 | ltime = time | ||
315 | |||
316 | if len (timed_blocks) < 2: | ||
317 | return None | ||
318 | |||
319 | startTime = timed_blocks[0][0] | ||
320 | avgSampleLength = (ltime - startTime)/(len (timed_blocks) - 1) | ||
321 | |||
322 | return ProcessStats (writer, processMap, len (timed_blocks), avgSampleLength, startTime, ltime) | ||
323 | |||
324 | def _parse_taskstats_log(writer, file): | ||
325 | """ | ||
326 | * See bootchart-collector.c for details. | ||
327 | * | ||
328 | * { pid, ppid, comm, cpu_run_real_total, blkio_delay_total, swapin_delay_total } | ||
329 | * | ||
330 | """ | ||
331 | processMap = {} | ||
332 | pidRewrites = {} | ||
333 | ltime = None | ||
334 | timed_blocks = _parse_timed_blocks(file) | ||
335 | for time, lines in timed_blocks: | ||
336 | # we have no 'stime' from taskstats, so prep 'init' | ||
337 | if ltime is None: | ||
338 | process = Process(writer, 1, '[init]', 0, 0) | ||
339 | processMap[1000] = process | ||
340 | ltime = time | ||
341 | # continue | ||
342 | for line in lines: | ||
343 | if not line: continue | ||
344 | tokens = line.split(' ') | ||
345 | if len(tokens) != 6: | ||
346 | continue | ||
347 | |||
348 | opid, ppid, cmd = int(tokens[0]), int(tokens[1]), tokens[2] | ||
349 | cpu_ns, blkio_delay_ns, swapin_delay_ns = long(tokens[-3]), long(tokens[-2]), long(tokens[-1]), | ||
350 | |||
351 | # make space for trees of pids | ||
352 | opid *= 1000 | ||
353 | ppid *= 1000 | ||
354 | |||
355 | # when the process name changes, we re-write the pid. | ||
356 | if opid in pidRewrites: | ||
357 | pid = pidRewrites[opid] | ||
358 | else: | ||
359 | pid = opid | ||
360 | |||
361 | cmd = cmd.strip('(').strip(')') | ||
362 | if pid in processMap: | ||
363 | process = processMap[pid] | ||
364 | if process.cmd != cmd: | ||
365 | pid += 1 | ||
366 | pidRewrites[opid] = pid | ||
367 | # print "process mutation ! '%s' vs '%s' pid %s -> pid %s\n" % (process.cmd, cmd, opid, pid) | ||
368 | process = process.split (writer, pid, cmd, ppid, time) | ||
369 | processMap[pid] = process | ||
370 | else: | ||
371 | process.cmd = cmd; | ||
372 | else: | ||
373 | process = Process(writer, pid, cmd, ppid, time) | ||
374 | processMap[pid] = process | ||
375 | |||
376 | delta_cpu_ns = (float) (cpu_ns - process.last_cpu_ns) | ||
377 | delta_blkio_delay_ns = (float) (blkio_delay_ns - process.last_blkio_delay_ns) | ||
378 | delta_swapin_delay_ns = (float) (swapin_delay_ns - process.last_swapin_delay_ns) | ||
379 | |||
380 | # make up some state data ... | ||
381 | if delta_cpu_ns > 0: | ||
382 | state = "R" | ||
383 | elif delta_blkio_delay_ns + delta_swapin_delay_ns > 0: | ||
384 | state = "D" | ||
385 | else: | ||
386 | state = "S" | ||
387 | |||
388 | # retain the ns timing information into a CPUSample - that tries | ||
389 | # with the old-style to be a %age of CPU used in this time-slice. | ||
390 | if delta_cpu_ns + delta_blkio_delay_ns + delta_swapin_delay_ns > 0: | ||
391 | # print "proc %s cpu_ns %g delta_cpu %g" % (cmd, cpu_ns, delta_cpu_ns) | ||
392 | cpuSample = CPUSample('null', delta_cpu_ns, 0.0, | ||
393 | delta_blkio_delay_ns, | ||
394 | delta_swapin_delay_ns) | ||
395 | process.samples.append(ProcessSample(time, state, cpuSample)) | ||
396 | |||
397 | process.last_cpu_ns = cpu_ns | ||
398 | process.last_blkio_delay_ns = blkio_delay_ns | ||
399 | process.last_swapin_delay_ns = swapin_delay_ns | ||
400 | ltime = time | ||
401 | |||
402 | if len (timed_blocks) < 2: | ||
403 | return None | ||
404 | |||
405 | startTime = timed_blocks[0][0] | ||
406 | avgSampleLength = (ltime - startTime)/(len(timed_blocks)-1) | ||
407 | |||
408 | return ProcessStats (writer, processMap, len (timed_blocks), avgSampleLength, startTime, ltime) | ||
409 | |||
410 | def _parse_proc_stat_log(file): | ||
411 | samples = [] | ||
412 | ltimes = None | ||
413 | for time, lines in _parse_timed_blocks(file): | ||
414 | # skip emtpy lines | ||
415 | if not lines: | ||
416 | continue | ||
417 | # CPU times {user, nice, system, idle, io_wait, irq, softirq} | ||
418 | tokens = lines[0].split() | ||
419 | times = [ int(token) for token in tokens[1:] ] | ||
420 | if ltimes: | ||
421 | user = float((times[0] + times[1]) - (ltimes[0] + ltimes[1])) | ||
422 | system = float((times[2] + times[5] + times[6]) - (ltimes[2] + ltimes[5] + ltimes[6])) | ||
423 | idle = float(times[3] - ltimes[3]) | ||
424 | iowait = float(times[4] - ltimes[4]) | ||
425 | |||
426 | aSum = max(user + system + idle + iowait, 1) | ||
427 | samples.append( CPUSample(time, user/aSum, system/aSum, iowait/aSum) ) | ||
428 | |||
429 | ltimes = times | ||
430 | # skip the rest of statistics lines | ||
431 | return samples | ||
432 | |||
433 | def _parse_proc_disk_stat_log(file, numCpu): | ||
434 | """ | ||
435 | Parse file for disk stats, but only look at the whole device, eg. sda, | ||
436 | not sda1, sda2 etc. The format of relevant lines should be: | ||
437 | {major minor name rio rmerge rsect ruse wio wmerge wsect wuse running use aveq} | ||
438 | """ | ||
439 | disk_regex_re = re.compile ('^([hsv]d.|mtdblock\d|mmcblk\d|cciss/c\d+d\d+.*)$') | ||
440 | |||
441 | # this gets called an awful lot. | ||
442 | def is_relevant_line(linetokens): | ||
443 | if len(linetokens) != 14: | ||
444 | return False | ||
445 | disk = linetokens[2] | ||
446 | return disk_regex_re.match(disk) | ||
447 | |||
448 | disk_stat_samples = [] | ||
449 | |||
450 | for time, lines in _parse_timed_blocks(file): | ||
451 | sample = DiskStatSample(time) | ||
452 | relevant_tokens = [linetokens for linetokens in map (lambda x: x.split(),lines) if is_relevant_line(linetokens)] | ||
453 | |||
454 | for tokens in relevant_tokens: | ||
455 | disk, rsect, wsect, use = tokens[2], int(tokens[5]), int(tokens[9]), int(tokens[12]) | ||
456 | sample.add_diskdata([rsect, wsect, use]) | ||
457 | |||
458 | disk_stat_samples.append(sample) | ||
459 | |||
460 | disk_stats = [] | ||
461 | for sample1, sample2 in zip(disk_stat_samples[:-1], disk_stat_samples[1:]): | ||
462 | interval = sample1.time - sample2.time | ||
463 | if interval == 0: | ||
464 | interval = 1 | ||
465 | sums = [ a - b for a, b in zip(sample1.diskdata, sample2.diskdata) ] | ||
466 | readTput = sums[0] / 2.0 * 100.0 / interval | ||
467 | writeTput = sums[1] / 2.0 * 100.0 / interval | ||
468 | util = float( sums[2] ) / 10 / interval / numCpu | ||
469 | util = max(0.0, min(1.0, util)) | ||
470 | disk_stats.append(DiskSample(sample2.time, readTput, writeTput, util)) | ||
471 | |||
472 | return disk_stats | ||
473 | |||
474 | def _parse_proc_meminfo_log(file): | ||
475 | """ | ||
476 | Parse file for global memory statistics. | ||
477 | The format of relevant lines should be: ^key: value( unit)? | ||
478 | """ | ||
479 | used_values = ('MemTotal', 'MemFree', 'Buffers', 'Cached', 'SwapTotal', 'SwapFree',) | ||
480 | |||
481 | mem_stats = [] | ||
482 | meminfo_re = re.compile(r'([^ \t:]+):\s*(\d+).*') | ||
483 | |||
484 | for time, lines in _parse_timed_blocks(file): | ||
485 | sample = MemSample(time) | ||
486 | |||
487 | for line in lines: | ||
488 | match = meminfo_re.match(line) | ||
489 | if not match: | ||
490 | raise ParseError("Invalid meminfo line \"%s\"" % match.groups(0)) | ||
491 | sample.add_value(match.group(1), int(match.group(2))) | ||
492 | |||
493 | if sample.valid(): | ||
494 | mem_stats.append(sample) | ||
495 | |||
496 | return mem_stats | ||
497 | |||
498 | # if we boot the kernel with: initcall_debug printk.time=1 we can | ||
499 | # get all manner of interesting data from the dmesg output | ||
500 | # We turn this into a pseudo-process tree: each event is | ||
501 | # characterised by a | ||
502 | # we don't try to detect a "kernel finished" state - since the kernel | ||
503 | # continues to do interesting things after init is called. | ||
504 | # | ||
505 | # sample input: | ||
506 | # [ 0.000000] ACPI: FACP 3f4fc000 000F4 (v04 INTEL Napa 00000001 MSFT 01000013) | ||
507 | # ... | ||
508 | # [ 0.039993] calling migration_init+0x0/0x6b @ 1 | ||
509 | # [ 0.039993] initcall migration_init+0x0/0x6b returned 1 after 0 usecs | ||
510 | def _parse_dmesg(writer, file): | ||
511 | timestamp_re = re.compile ("^\[\s*(\d+\.\d+)\s*]\s+(.*)$") | ||
512 | split_re = re.compile ("^(\S+)\s+([\S\+_-]+) (.*)$") | ||
513 | processMap = {} | ||
514 | idx = 0 | ||
515 | inc = 1.0 / 1000000 | ||
516 | kernel = Process(writer, idx, "k-boot", 0, 0.1) | ||
517 | processMap['k-boot'] = kernel | ||
518 | base_ts = False | ||
519 | max_ts = 0 | ||
520 | for line in file.read().decode('utf-8').split('\n'): | ||
521 | t = timestamp_re.match (line) | ||
522 | if t is None: | ||
523 | # print "duff timestamp " + line | ||
524 | continue | ||
525 | |||
526 | time_ms = float (t.group(1)) * 1000 | ||
527 | # looks like we may have a huge diff after the clock | ||
528 | # has been set up. This could lead to huge graph: | ||
529 | # so huge we will be killed by the OOM. | ||
530 | # So instead of using the plain timestamp we will | ||
531 | # use a delta to first one and skip the first one | ||
532 | # for convenience | ||
533 | if max_ts == 0 and not base_ts and time_ms > 1000: | ||
534 | base_ts = time_ms | ||
535 | continue | ||
536 | max_ts = max(time_ms, max_ts) | ||
537 | if base_ts: | ||
538 | # print "fscked clock: used %f instead of %f" % (time_ms - base_ts, time_ms) | ||
539 | time_ms -= base_ts | ||
540 | m = split_re.match (t.group(2)) | ||
541 | |||
542 | if m is None: | ||
543 | continue | ||
544 | # print "match: '%s'" % (m.group(1)) | ||
545 | type = m.group(1) | ||
546 | func = m.group(2) | ||
547 | rest = m.group(3) | ||
548 | |||
549 | if t.group(2).startswith ('Write protecting the') or \ | ||
550 | t.group(2).startswith ('Freeing unused kernel memory'): | ||
551 | kernel.duration = time_ms / 10 | ||
552 | continue | ||
553 | |||
554 | # print "foo: '%s' '%s' '%s'" % (type, func, rest) | ||
555 | if type == "calling": | ||
556 | ppid = kernel.pid | ||
557 | p = re.match ("\@ (\d+)", rest) | ||
558 | if p is not None: | ||
559 | ppid = float (p.group(1)) // 1000 | ||
560 | # print "match: '%s' ('%g') at '%s'" % (func, ppid, time_ms) | ||
561 | name = func.split ('+', 1) [0] | ||
562 | idx += inc | ||
563 | processMap[func] = Process(writer, ppid + idx, name, ppid, time_ms / 10) | ||
564 | elif type == "initcall": | ||
565 | # print "finished: '%s' at '%s'" % (func, time_ms) | ||
566 | if func in processMap: | ||
567 | process = processMap[func] | ||
568 | process.duration = (time_ms / 10) - process.start_time | ||
569 | else: | ||
570 | print("corrupted init call for %s" % (func)) | ||
571 | |||
572 | elif type == "async_waiting" or type == "async_continuing": | ||
573 | continue # ignore | ||
574 | |||
575 | return processMap.values() | ||
576 | |||
577 | # | ||
578 | # Parse binary pacct accounting file output if we have one | ||
579 | # cf. /usr/include/linux/acct.h | ||
580 | # | ||
581 | def _parse_pacct(writer, file): | ||
582 | # read LE int32 | ||
583 | def _read_le_int32(file): | ||
584 | byts = file.read(4) | ||
585 | return (ord(byts[0])) | (ord(byts[1]) << 8) | \ | ||
586 | (ord(byts[2]) << 16) | (ord(byts[3]) << 24) | ||
587 | |||
588 | parent_map = {} | ||
589 | parent_map[0] = 0 | ||
590 | while file.read(1) != "": # ignore flags | ||
591 | ver = file.read(1) | ||
592 | if ord(ver) < 3: | ||
593 | print("Invalid version 0x%x" % (ord(ver))) | ||
594 | return None | ||
595 | |||
596 | file.seek (14, 1) # user, group etc. | ||
597 | pid = _read_le_int32 (file) | ||
598 | ppid = _read_le_int32 (file) | ||
599 | # print "Parent of %d is %d" % (pid, ppid) | ||
600 | parent_map[pid] = ppid | ||
601 | file.seek (4 + 4 + 16, 1) # timings | ||
602 | file.seek (16, 1) # acct_comm | ||
603 | return parent_map | ||
604 | |||
605 | def _parse_paternity_log(writer, file): | ||
606 | parent_map = {} | ||
607 | parent_map[0] = 0 | ||
608 | for line in file.read().decode('utf-8').split('\n'): | ||
609 | if not line: | ||
610 | continue | ||
611 | elems = line.split(' ') # <Child> <Parent> | ||
612 | if len (elems) >= 2: | ||
613 | # print "paternity of %d is %d" % (int(elems[0]), int(elems[1])) | ||
614 | parent_map[int(elems[0])] = int(elems[1]) | ||
615 | else: | ||
616 | print("Odd paternity line '%s'" % (line)) | ||
617 | return parent_map | ||
618 | |||
619 | def _parse_cmdline_log(writer, file): | ||
620 | cmdLines = {} | ||
621 | for block in file.read().decode('utf-8').split('\n\n'): | ||
622 | lines = block.split('\n') | ||
623 | if len (lines) >= 3: | ||
624 | # print "Lines '%s'" % (lines[0]) | ||
625 | pid = int (lines[0]) | ||
626 | values = {} | ||
627 | values['exe'] = lines[1].lstrip(':') | ||
628 | args = lines[2].lstrip(':').split('\0') | ||
629 | args.pop() | ||
630 | values['args'] = args | ||
631 | cmdLines[pid] = values | ||
632 | return cmdLines | ||
633 | |||
634 | def get_num_cpus(headers): | ||
635 | """Get the number of CPUs from the system.cpu header property. As the | ||
636 | CPU utilization graphs are relative, the number of CPUs currently makes | ||
637 | no difference.""" | ||
638 | if headers is None: | ||
639 | return 1 | ||
640 | if headers.get("system.cpu.num"): | ||
641 | return max (int (headers.get("system.cpu.num")), 1) | ||
642 | cpu_model = headers.get("system.cpu") | ||
643 | if cpu_model is None: | ||
644 | return 1 | ||
645 | mat = re.match(".*\\((\\d+)\\)", cpu_model) | ||
646 | if mat is None: | ||
647 | return 1 | ||
648 | return max (int(mat.group(1)), 1) | ||
649 | |||
650 | def _do_parse(writer, state, filename, file): | ||
651 | writer.info("parsing '%s'" % filename) | ||
652 | t1 = clock() | ||
653 | paths = filename.split("/") | ||
654 | task = paths[-1] | ||
655 | pn = paths[-2] | ||
656 | start = None | ||
657 | end = None | ||
658 | for line in file: | ||
659 | if line.startswith("Started:"): | ||
660 | start = int(float(line.split()[-1])) | ||
661 | elif line.startswith("Ended:"): | ||
662 | end = int(float(line.split()[-1])) | ||
663 | if start and end: | ||
664 | state.add_process(pn + ":" + task, start, end) | ||
665 | t2 = clock() | ||
666 | writer.info(" %s seconds" % str(t2-t1)) | ||
667 | return state | ||
668 | |||
669 | def parse_file(writer, state, filename): | ||
670 | if state.filename is None: | ||
671 | state.filename = filename | ||
672 | basename = os.path.basename(filename) | ||
673 | with open(filename, "rb") as file: | ||
674 | return _do_parse(writer, state, filename, file) | ||
675 | |||
676 | def parse_paths(writer, state, paths): | ||
677 | for path in paths: | ||
678 | if state.filename is None: | ||
679 | state.filename = path | ||
680 | root, extension = os.path.splitext(path) | ||
681 | if not(os.path.exists(path)): | ||
682 | writer.warn("warning: path '%s' does not exist, ignoring." % path) | ||
683 | continue | ||
684 | #state.filename = path | ||
685 | if os.path.isdir(path): | ||
686 | files = sorted([os.path.join(path, f) for f in os.listdir(path)]) | ||
687 | state = parse_paths(writer, state, files) | ||
688 | elif extension in [".tar", ".tgz", ".gz"]: | ||
689 | if extension == ".gz": | ||
690 | root, extension = os.path.splitext(root) | ||
691 | if extension != ".tar": | ||
692 | writer.warn("warning: can only handle zipped tar files, not zipped '%s'-files; ignoring" % extension) | ||
693 | continue | ||
694 | tf = None | ||
695 | try: | ||
696 | writer.status("parsing '%s'" % path) | ||
697 | tf = tarfile.open(path, 'r:*') | ||
698 | for name in tf.getnames(): | ||
699 | state = _do_parse(writer, state, name, tf.extractfile(name)) | ||
700 | except tarfile.ReadError as error: | ||
701 | raise ParseError("error: could not read tarfile '%s': %s." % (path, error)) | ||
702 | finally: | ||
703 | if tf != None: | ||
704 | tf.close() | ||
705 | else: | ||
706 | state = parse_file(writer, state, path) | ||
707 | return state | ||
708 | |||
709 | def split_res(res, options): | ||
710 | """ Split the res into n pieces """ | ||
711 | res_list = [] | ||
712 | if options.num > 1: | ||
713 | s_list = sorted(res.start.keys()) | ||
714 | frag_size = len(s_list) / float(options.num) | ||
715 | # Need the top value | ||
716 | if frag_size > int(frag_size): | ||
717 | frag_size = int(frag_size + 1) | ||
718 | else: | ||
719 | frag_size = int(frag_size) | ||
720 | |||
721 | start = 0 | ||
722 | end = frag_size | ||
723 | while start < end: | ||
724 | state = Trace(None, [], None) | ||
725 | if options.full_time: | ||
726 | state.min = min(res.start.keys()) | ||
727 | state.max = max(res.end.keys()) | ||
728 | for i in range(start, end): | ||
729 | # Add this line for reference | ||
730 | #state.add_process(pn + ":" + task, start, end) | ||
731 | for p in res.start[s_list[i]]: | ||
732 | state.add_process(p, s_list[i], res.processes[p][1]) | ||
733 | start = end | ||
734 | end = end + frag_size | ||
735 | if end > len(s_list): | ||
736 | end = len(s_list) | ||
737 | res_list.append(state) | ||
738 | else: | ||
739 | res_list.append(res) | ||
740 | return res_list | ||