summaryrefslogtreecommitdiffstats
path: root/scripts/pybootchartgui/pybootchartgui/parsing.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/pybootchartgui/pybootchartgui/parsing.py')
-rw-r--r--scripts/pybootchartgui/pybootchartgui/parsing.py791
1 files changed, 628 insertions, 163 deletions
diff --git a/scripts/pybootchartgui/pybootchartgui/parsing.py b/scripts/pybootchartgui/pybootchartgui/parsing.py
index 6343fd5a7b..c8a6a5721a 100644
--- a/scripts/pybootchartgui/pybootchartgui/parsing.py
+++ b/scripts/pybootchartgui/pybootchartgui/parsing.py
@@ -1,178 +1,638 @@
1# This file is part of pybootchartgui.
2
3# pybootchartgui is free software: you can redistribute it and/or modify
4# it under the terms of the GNU General Public License as published by
5# the Free Software Foundation, either version 3 of the License, or
6# (at your option) any later version.
7
8# pybootchartgui is distributed in the hope that it will be useful,
9# but WITHOUT ANY WARRANTY; without even the implied warranty of
10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11# GNU General Public License for more details.
12
13# You should have received a copy of the GNU General Public License
14# along with pybootchartgui. If not, see <http://www.gnu.org/licenses/>.
15
16
1from __future__ import with_statement 17from __future__ import with_statement
2 18
3import os 19import os
4import string 20import string
5import re 21import re
22import sys
6import tarfile 23import tarfile
24from time import clock
7from collections import defaultdict 25from collections import defaultdict
26from functools import reduce
27
28from .samples import *
29from .process_tree import ProcessTree
30
31if sys.version_info >= (3, 0):
32 long = int
33
34# Parsing produces as its end result a 'Trace'
35
36class Trace:
37 def __init__(self, writer, paths, options):
38 self.processes = {}
39 self.start = {}
40 self.end = {}
41 self.headers = None
42 self.disk_stats = None
43 self.ps_stats = None
44 self.taskstats = None
45 self.cpu_stats = None
46 self.cmdline = None
47 self.kernel = None
48 self.kernel_tree = None
49 self.filename = None
50 self.parent_map = None
51 self.mem_stats = None
52
53 parse_paths (writer, self, paths, options.mintime)
54 if not self.valid():
55 raise ParseError("empty state: '%s' does not contain a valid bootchart" % ", ".join(paths))
56
57 return
58
59 # Turn that parsed information into something more useful
60 # link processes into a tree of pointers, calculate statistics
61 self.compile(writer)
62
63 # Crop the chart to the end of the first idle period after the given
64 # process
65 if options.crop_after:
66 idle = self.crop (writer, options.crop_after)
67 else:
68 idle = None
69
70 # Annotate other times as the first start point of given process lists
71 self.times = [ idle ]
72 if options.annotate:
73 for procnames in options.annotate:
74 names = [x[:15] for x in procnames.split(",")]
75 for proc in self.ps_stats.process_map.values():
76 if proc.cmd in names:
77 self.times.append(proc.start_time)
78 break
79 else:
80 self.times.append(None)
81
82 self.proc_tree = ProcessTree(writer, self.kernel, self.ps_stats,
83 self.ps_stats.sample_period,
84 self.headers.get("profile.process"),
85 options.prune, idle, self.taskstats,
86 self.parent_map is not None)
87
88 if self.kernel is not None:
89 self.kernel_tree = ProcessTree(writer, self.kernel, None, 0,
90 self.headers.get("profile.process"),
91 False, None, None, True)
92
93 def valid(self):
94 return len(self.processes) != 0
95 return self.headers != None and self.disk_stats != None and \
96 self.ps_stats != None and self.cpu_stats != None
97
98
99 def compile(self, writer):
100
101 def find_parent_id_for(pid):
102 if pid is 0:
103 return 0
104 ppid = self.parent_map.get(pid)
105 if ppid:
106 # many of these double forks are so short lived
107 # that we have no samples, or process info for them
108 # so climb the parent hierarcy to find one
109 if int (ppid * 1000) not in self.ps_stats.process_map:
110# print "Pid '%d' short lived with no process" % ppid
111 ppid = find_parent_id_for (ppid)
112# else:
113# print "Pid '%d' has an entry" % ppid
114 else:
115# print "Pid '%d' missing from pid map" % pid
116 return 0
117 return ppid
118
119 # merge in the cmdline data
120 if self.cmdline is not None:
121 for proc in self.ps_stats.process_map.values():
122 rpid = int (proc.pid // 1000)
123 if rpid in self.cmdline:
124 cmd = self.cmdline[rpid]
125 proc.exe = cmd['exe']
126 proc.args = cmd['args']
127# else:
128# print "proc %d '%s' not in cmdline" % (rpid, proc.exe)
129
130 # re-parent any stray orphans if we can
131 if self.parent_map is not None:
132 for process in self.ps_stats.process_map.values():
133 ppid = find_parent_id_for (int(process.pid // 1000))
134 if ppid:
135 process.ppid = ppid * 1000
136
137 # stitch the tree together with pointers
138 for process in self.ps_stats.process_map.values():
139 process.set_parent (self.ps_stats.process_map)
140
141 # count on fingers variously
142 for process in self.ps_stats.process_map.values():
143 process.calc_stats (self.ps_stats.sample_period)
144
145 def crop(self, writer, crop_after):
146
147 def is_idle_at(util, start, j):
148 k = j + 1
149 while k < len(util) and util[k][0] < start + 300:
150 k += 1
151 k = min(k, len(util)-1)
152
153 if util[j][1] >= 0.25:
154 return False
155
156 avgload = sum(u[1] for u in util[j:k+1]) / (k-j+1)
157 if avgload < 0.25:
158 return True
159 else:
160 return False
161 def is_idle(util, start):
162 for j in range(0, len(util)):
163 if util[j][0] < start:
164 continue
165 return is_idle_at(util, start, j)
166 else:
167 return False
168
169 names = [x[:15] for x in crop_after.split(",")]
170 for proc in self.ps_stats.process_map.values():
171 if proc.cmd in names or proc.exe in names:
172 writer.info("selected proc '%s' from list (start %d)"
173 % (proc.cmd, proc.start_time))
174 break
175 if proc is None:
176 writer.warn("no selected crop proc '%s' in list" % crop_after)
177
178
179 cpu_util = [(sample.time, sample.user + sample.sys + sample.io) for sample in self.cpu_stats]
180 disk_util = [(sample.time, sample.util) for sample in self.disk_stats]
181
182 idle = None
183 for i in range(0, len(cpu_util)):
184 if cpu_util[i][0] < proc.start_time:
185 continue
186 if is_idle_at(cpu_util, cpu_util[i][0], i) \
187 and is_idle(disk_util, cpu_util[i][0]):
188 idle = cpu_util[i][0]
189 break
190
191 if idle is None:
192 writer.warn ("not idle after proc '%s'" % crop_after)
193 return None
194
195 crop_at = idle + 300
196 writer.info ("cropping at time %d" % crop_at)
197 while len (self.cpu_stats) \
198 and self.cpu_stats[-1].time > crop_at:
199 self.cpu_stats.pop()
200 while len (self.disk_stats) \
201 and self.disk_stats[-1].time > crop_at:
202 self.disk_stats.pop()
203
204 self.ps_stats.end_time = crop_at
205
206 cropped_map = {}
207 for key, value in self.ps_stats.process_map.items():
208 if (value.start_time <= crop_at):
209 cropped_map[key] = value
210
211 for proc in cropped_map.values():
212 proc.duration = min (proc.duration, crop_at - proc.start_time)
213 while len (proc.samples) \
214 and proc.samples[-1].time > crop_at:
215 proc.samples.pop()
216
217 self.ps_stats.process_map = cropped_map
218
219 return idle
220
8 221
9from samples import *
10from process_tree import ProcessTree
11 222
12class ParseError(Exception): 223class ParseError(Exception):
13 """Represents errors during parse of the bootchart.""" 224 """Represents errors during parse of the bootchart."""
14 def __init__(self, value): 225 def __init__(self, value):
15 self.value = value 226 self.value = value
16 227
17 def __str__(self): 228 def __str__(self):
18 return self.value 229 return self.value
19 230
20def _parse_headers(file): 231def _parse_headers(file):
21 """Parses the headers of the bootchart.""" 232 """Parses the headers of the bootchart."""
22 def parse((headers,last), line): 233 def parse(acc, line):
23 if '=' in line: last,value = map(string.strip, line.split('=', 1)) 234 (headers, last) = acc
24 else: value = line.strip() 235 if '=' in line:
25 headers[last] += value 236 last, value = map (lambda x: x.strip(), line.split('=', 1))
26 return headers,last 237 else:
27 return reduce(parse, file.read().split('\n'), (defaultdict(str),''))[0] 238 value = line.strip()
239 headers[last] += value
240 return headers, last
241 return reduce(parse, file.read().decode('utf-8').split('\n'), (defaultdict(str),''))[0]
28 242
29def _parse_timed_blocks(file): 243def _parse_timed_blocks(file):
30 """Parses (ie., splits) a file into so-called timed-blocks. A 244 """Parses (ie., splits) a file into so-called timed-blocks. A
31 timed-block consists of a timestamp on a line by itself followed 245 timed-block consists of a timestamp on a line by itself followed
32 by zero or more lines of data for that point in time.""" 246 by zero or more lines of data for that point in time."""
33 def parse(block): 247 def parse(block):
34 lines = block.split('\n') 248 lines = block.split('\n')
35 if not lines: 249 if not lines:
36 raise ParseError('expected a timed-block consisting a timestamp followed by data lines') 250 raise ParseError('expected a timed-block consisting a timestamp followed by data lines')
37 try: 251 try:
38 return (int(lines[0]), lines[1:]) 252 return (int(lines[0]), lines[1:])
39 except ValueError: 253 except ValueError:
40 raise ParseError("expected a timed-block, but timestamp '%s' is not an integer" % lines[0]) 254 raise ParseError("expected a timed-block, but timestamp '%s' is not an integer" % lines[0])
41 blocks = file.read().split('\n\n') 255 blocks = file.read().decode('utf-8').split('\n\n')
42 return [parse(block) for block in blocks if block.strip()] 256 return [parse(block) for block in blocks if block.strip() and not block.endswith(' not running\n')]
43 257
44def _parse_proc_ps_log(file): 258def _parse_proc_ps_log(writer, file):
45 """ 259 """
46 * See proc(5) for details. 260 * See proc(5) for details.
47 * 261 *
48 * {pid, comm, state, ppid, pgrp, session, tty_nr, tpgid, flags, minflt, cminflt, majflt, cmajflt, utime, stime, 262 * {pid, comm, state, ppid, pgrp, session, tty_nr, tpgid, flags, minflt, cminflt, majflt, cmajflt, utime, stime,
49 * cutime, cstime, priority, nice, 0, itrealvalue, starttime, vsize, rss, rlim, startcode, endcode, startstack, 263 * cutime, cstime, priority, nice, 0, itrealvalue, starttime, vsize, rss, rlim, startcode, endcode, startstack,
50 * kstkesp, kstkeip} 264 * kstkesp, kstkeip}
51 """ 265 """
52 processMap = {} 266 processMap = {}
53 ltime = 0 267 ltime = 0
54 timed_blocks = _parse_timed_blocks(file) 268 timed_blocks = _parse_timed_blocks(file)
55 for time, lines in timed_blocks: 269 for time, lines in timed_blocks:
56 for line in lines: 270 for line in lines:
57 tokens = line.split(' ') 271 if not line: continue
58 272 tokens = line.split(' ')
59 offset = [index for index, token in enumerate(tokens[1:]) if token.endswith(')')][0] 273 if len(tokens) < 21:
60 pid, cmd, state, ppid = int(tokens[0]), ' '.join(tokens[1:2+offset]), tokens[2+offset], int(tokens[3+offset]) 274 continue
61 userCpu, sysCpu, stime= int(tokens[13+offset]), int(tokens[14+offset]), int(tokens[21+offset]) 275
62 276 offset = [index for index, token in enumerate(tokens[1:]) if token[-1] == ')'][0]
63 if processMap.has_key(pid): 277 pid, cmd, state, ppid = int(tokens[0]), ' '.join(tokens[1:2+offset]), tokens[2+offset], int(tokens[3+offset])
64 process = processMap[pid] 278 userCpu, sysCpu, stime = int(tokens[13+offset]), int(tokens[14+offset]), int(tokens[21+offset])
65 process.cmd = cmd.replace('(', '').replace(')', '') # why rename after latest name?? 279
66 else: 280 # magic fixed point-ness ...
67 process = Process(pid, cmd, ppid, min(time, stime)) 281 pid *= 1000
68 processMap[pid] = process 282 ppid *= 1000
69 283 if pid in processMap:
70 if process.last_user_cpu_time is not None and process.last_sys_cpu_time is not None and ltime is not None: 284 process = processMap[pid]
71 userCpuLoad, sysCpuLoad = process.calc_load(userCpu, sysCpu, time - ltime) 285 process.cmd = cmd.strip('()') # why rename after latest name??
72 cpuSample = CPUSample('null', userCpuLoad, sysCpuLoad, 0.0) 286 else:
73 process.samples.append(ProcessSample(time, state, cpuSample)) 287 process = Process(writer, pid, cmd.strip('()'), ppid, min(time, stime))
74 288 processMap[pid] = process
75 process.last_user_cpu_time = userCpu 289
76 process.last_sys_cpu_time = sysCpu 290 if process.last_user_cpu_time is not None and process.last_sys_cpu_time is not None and ltime is not None:
77 ltime = time 291 userCpuLoad, sysCpuLoad = process.calc_load(userCpu, sysCpu, max(1, time - ltime))
78 292 cpuSample = CPUSample('null', userCpuLoad, sysCpuLoad, 0.0)
79 startTime = timed_blocks[0][0] 293 process.samples.append(ProcessSample(time, state, cpuSample))
80 avgSampleLength = (ltime - startTime)/(len(timed_blocks)-1) 294
81 295 process.last_user_cpu_time = userCpu
82 for process in processMap.values(): 296 process.last_sys_cpu_time = sysCpu
83 process.set_parent(processMap) 297 ltime = time
84 298
85 for process in processMap.values(): 299 if len (timed_blocks) < 2:
86 process.calc_stats(avgSampleLength) 300 return None
87 301
88 return ProcessStats(processMap.values(), avgSampleLength, startTime, ltime) 302 startTime = timed_blocks[0][0]
89 303 avgSampleLength = (ltime - startTime)/(len (timed_blocks) - 1)
304
305 return ProcessStats (writer, processMap, len (timed_blocks), avgSampleLength, startTime, ltime)
306
307def _parse_taskstats_log(writer, file):
308 """
309 * See bootchart-collector.c for details.
310 *
311 * { pid, ppid, comm, cpu_run_real_total, blkio_delay_total, swapin_delay_total }
312 *
313 """
314 processMap = {}
315 pidRewrites = {}
316 ltime = None
317 timed_blocks = _parse_timed_blocks(file)
318 for time, lines in timed_blocks:
319 # we have no 'stime' from taskstats, so prep 'init'
320 if ltime is None:
321 process = Process(writer, 1, '[init]', 0, 0)
322 processMap[1000] = process
323 ltime = time
324# continue
325 for line in lines:
326 if not line: continue
327 tokens = line.split(' ')
328 if len(tokens) != 6:
329 continue
330
331 opid, ppid, cmd = int(tokens[0]), int(tokens[1]), tokens[2]
332 cpu_ns, blkio_delay_ns, swapin_delay_ns = long(tokens[-3]), long(tokens[-2]), long(tokens[-1]),
333
334 # make space for trees of pids
335 opid *= 1000
336 ppid *= 1000
337
338 # when the process name changes, we re-write the pid.
339 if opid in pidRewrites:
340 pid = pidRewrites[opid]
341 else:
342 pid = opid
343
344 cmd = cmd.strip('(').strip(')')
345 if pid in processMap:
346 process = processMap[pid]
347 if process.cmd != cmd:
348 pid += 1
349 pidRewrites[opid] = pid
350# print "process mutation ! '%s' vs '%s' pid %s -> pid %s\n" % (process.cmd, cmd, opid, pid)
351 process = process.split (writer, pid, cmd, ppid, time)
352 processMap[pid] = process
353 else:
354 process.cmd = cmd;
355 else:
356 process = Process(writer, pid, cmd, ppid, time)
357 processMap[pid] = process
358
359 delta_cpu_ns = (float) (cpu_ns - process.last_cpu_ns)
360 delta_blkio_delay_ns = (float) (blkio_delay_ns - process.last_blkio_delay_ns)
361 delta_swapin_delay_ns = (float) (swapin_delay_ns - process.last_swapin_delay_ns)
362
363 # make up some state data ...
364 if delta_cpu_ns > 0:
365 state = "R"
366 elif delta_blkio_delay_ns + delta_swapin_delay_ns > 0:
367 state = "D"
368 else:
369 state = "S"
370
371 # retain the ns timing information into a CPUSample - that tries
372 # with the old-style to be a %age of CPU used in this time-slice.
373 if delta_cpu_ns + delta_blkio_delay_ns + delta_swapin_delay_ns > 0:
374# print "proc %s cpu_ns %g delta_cpu %g" % (cmd, cpu_ns, delta_cpu_ns)
375 cpuSample = CPUSample('null', delta_cpu_ns, 0.0,
376 delta_blkio_delay_ns,
377 delta_swapin_delay_ns)
378 process.samples.append(ProcessSample(time, state, cpuSample))
379
380 process.last_cpu_ns = cpu_ns
381 process.last_blkio_delay_ns = blkio_delay_ns
382 process.last_swapin_delay_ns = swapin_delay_ns
383 ltime = time
384
385 if len (timed_blocks) < 2:
386 return None
387
388 startTime = timed_blocks[0][0]
389 avgSampleLength = (ltime - startTime)/(len(timed_blocks)-1)
390
391 return ProcessStats (writer, processMap, len (timed_blocks), avgSampleLength, startTime, ltime)
392
90def _parse_proc_stat_log(file): 393def _parse_proc_stat_log(file):
91 samples = [] 394 samples = []
92 ltimes = None 395 ltimes = None
93 for time, lines in _parse_timed_blocks(file): 396 for time, lines in _parse_timed_blocks(file):
94 # CPU times {user, nice, system, idle, io_wait, irq, softirq} 397 # skip emtpy lines
95 tokens = lines[0].split(); 398 if not lines:
96 times = [ int(token) for token in tokens[1:] ] 399 continue
97 if ltimes: 400 # CPU times {user, nice, system, idle, io_wait, irq, softirq}
98 user = float((times[0] + times[1]) - (ltimes[0] + ltimes[1])) 401 tokens = lines[0].split()
99 system = float((times[2] + times[5] + times[6]) - (ltimes[2] + ltimes[5] + ltimes[6])) 402 times = [ int(token) for token in tokens[1:] ]
100 idle = float(times[3] - ltimes[3]) 403 if ltimes:
101 iowait = float(times[4] - ltimes[4]) 404 user = float((times[0] + times[1]) - (ltimes[0] + ltimes[1]))
102 405 system = float((times[2] + times[5] + times[6]) - (ltimes[2] + ltimes[5] + ltimes[6]))
103 aSum = max(user + system + idle + iowait, 1) 406 idle = float(times[3] - ltimes[3])
104 samples.append( CPUSample(time, user/aSum, system/aSum, iowait/aSum) ) 407 iowait = float(times[4] - ltimes[4])
105 408
106 ltimes = times 409 aSum = max(user + system + idle + iowait, 1)
107 # skip the rest of statistics lines 410 samples.append( CPUSample(time, user/aSum, system/aSum, iowait/aSum) )
108 return samples 411
109 412 ltimes = times
110 413 # skip the rest of statistics lines
414 return samples
415
111def _parse_proc_disk_stat_log(file, numCpu): 416def _parse_proc_disk_stat_log(file, numCpu):
112 """ 417 """
113 Parse file for disk stats, but only look at the whole disks, eg. sda, 418 Parse file for disk stats, but only look at the whole device, eg. sda,
114 not sda1, sda2 etc. The format of relevant lines should be: 419 not sda1, sda2 etc. The format of relevant lines should be:
115 {major minor name rio rmerge rsect ruse wio wmerge wsect wuse running use aveq} 420 {major minor name rio rmerge rsect ruse wio wmerge wsect wuse running use aveq}
116 """ 421 """
117 DISK_REGEX = 'hd.$|sd.$' 422 disk_regex_re = re.compile ('^([hsv]d.|mtdblock\d|mmcblk\d|cciss/c\d+d\d+.*)$')
118 423
119 def is_relevant_line(line): 424 # this gets called an awful lot.
120 return len(line.split()) == 14 and re.match(DISK_REGEX, line.split()[2]) 425 def is_relevant_line(linetokens):
121 426 if len(linetokens) != 14:
122 disk_stat_samples = [] 427 return False
123 428 disk = linetokens[2]
124 for time, lines in _parse_timed_blocks(file): 429 return disk_regex_re.match(disk)
125 sample = DiskStatSample(time) 430
126 relevant_tokens = [line.split() for line in lines if is_relevant_line(line)] 431 disk_stat_samples = []
127 432
128 for tokens in relevant_tokens: 433 for time, lines in _parse_timed_blocks(file):
129 disk, rsect, wsect, use = tokens[2], int(tokens[5]), int(tokens[9]), int(tokens[12]) 434 sample = DiskStatSample(time)
130 sample.add_diskdata([rsect, wsect, use]) 435 relevant_tokens = [linetokens for linetokens in map (lambda x: x.split(),lines) if is_relevant_line(linetokens)]
131 436
132 disk_stat_samples.append(sample) 437 for tokens in relevant_tokens:
133 438 disk, rsect, wsect, use = tokens[2], int(tokens[5]), int(tokens[9]), int(tokens[12])
134 disk_stats = [] 439 sample.add_diskdata([rsect, wsect, use])
135 for sample1, sample2 in zip(disk_stat_samples[:-1], disk_stat_samples[1:]): 440
136 interval = sample1.time - sample2.time 441 disk_stat_samples.append(sample)
137 sums = [ a - b for a, b in zip(sample1.diskdata, sample2.diskdata) ] 442
138 readTput = sums[0] / 2.0 * 100.0 / interval 443 disk_stats = []
139 writeTput = sums[1] / 2.0 * 100.0 / interval 444 for sample1, sample2 in zip(disk_stat_samples[:-1], disk_stat_samples[1:]):
140 util = float( sums[2] ) / 10 / interval / numCpu 445 interval = sample1.time - sample2.time
141 util = max(0.0, min(1.0, util)) 446 if interval == 0:
142 disk_stats.append(DiskSample(sample2.time, readTput, writeTput, util)) 447 interval = 1
143 448 sums = [ a - b for a, b in zip(sample1.diskdata, sample2.diskdata) ]
144 return disk_stats 449 readTput = sums[0] / 2.0 * 100.0 / interval
145 450 writeTput = sums[1] / 2.0 * 100.0 / interval
146 451 util = float( sums[2] ) / 10 / interval / numCpu
452 util = max(0.0, min(1.0, util))
453 disk_stats.append(DiskSample(sample2.time, readTput, writeTput, util))
454
455 return disk_stats
456
457def _parse_proc_meminfo_log(file):
458 """
459 Parse file for global memory statistics.
460 The format of relevant lines should be: ^key: value( unit)?
461 """
462 used_values = ('MemTotal', 'MemFree', 'Buffers', 'Cached', 'SwapTotal', 'SwapFree',)
463
464 mem_stats = []
465 meminfo_re = re.compile(r'([^ \t:]+):\s*(\d+).*')
466
467 for time, lines in _parse_timed_blocks(file):
468 sample = MemSample(time)
469
470 for line in lines:
471 match = meminfo_re.match(line)
472 if not match:
473 raise ParseError("Invalid meminfo line \"%s\"" % match.groups(0))
474 sample.add_value(match.group(1), int(match.group(2)))
475
476 if sample.valid():
477 mem_stats.append(sample)
478
479 return mem_stats
480
481# if we boot the kernel with: initcall_debug printk.time=1 we can
482# get all manner of interesting data from the dmesg output
483# We turn this into a pseudo-process tree: each event is
484# characterised by a
485# we don't try to detect a "kernel finished" state - since the kernel
486# continues to do interesting things after init is called.
487#
488# sample input:
489# [ 0.000000] ACPI: FACP 3f4fc000 000F4 (v04 INTEL Napa 00000001 MSFT 01000013)
490# ...
491# [ 0.039993] calling migration_init+0x0/0x6b @ 1
492# [ 0.039993] initcall migration_init+0x0/0x6b returned 1 after 0 usecs
493def _parse_dmesg(writer, file):
494 timestamp_re = re.compile ("^\[\s*(\d+\.\d+)\s*]\s+(.*)$")
495 split_re = re.compile ("^(\S+)\s+([\S\+_-]+) (.*)$")
496 processMap = {}
497 idx = 0
498 inc = 1.0 / 1000000
499 kernel = Process(writer, idx, "k-boot", 0, 0.1)
500 processMap['k-boot'] = kernel
501 base_ts = False
502 max_ts = 0
503 for line in file.read().decode('utf-8').split('\n'):
504 t = timestamp_re.match (line)
505 if t is None:
506# print "duff timestamp " + line
507 continue
508
509 time_ms = float (t.group(1)) * 1000
510 # looks like we may have a huge diff after the clock
511 # has been set up. This could lead to huge graph:
512 # so huge we will be killed by the OOM.
513 # So instead of using the plain timestamp we will
514 # use a delta to first one and skip the first one
515 # for convenience
516 if max_ts == 0 and not base_ts and time_ms > 1000:
517 base_ts = time_ms
518 continue
519 max_ts = max(time_ms, max_ts)
520 if base_ts:
521# print "fscked clock: used %f instead of %f" % (time_ms - base_ts, time_ms)
522 time_ms -= base_ts
523 m = split_re.match (t.group(2))
524
525 if m is None:
526 continue
527# print "match: '%s'" % (m.group(1))
528 type = m.group(1)
529 func = m.group(2)
530 rest = m.group(3)
531
532 if t.group(2).startswith ('Write protecting the') or \
533 t.group(2).startswith ('Freeing unused kernel memory'):
534 kernel.duration = time_ms / 10
535 continue
536
537# print "foo: '%s' '%s' '%s'" % (type, func, rest)
538 if type == "calling":
539 ppid = kernel.pid
540 p = re.match ("\@ (\d+)", rest)
541 if p is not None:
542 ppid = float (p.group(1)) // 1000
543# print "match: '%s' ('%g') at '%s'" % (func, ppid, time_ms)
544 name = func.split ('+', 1) [0]
545 idx += inc
546 processMap[func] = Process(writer, ppid + idx, name, ppid, time_ms / 10)
547 elif type == "initcall":
548# print "finished: '%s' at '%s'" % (func, time_ms)
549 if func in processMap:
550 process = processMap[func]
551 process.duration = (time_ms / 10) - process.start_time
552 else:
553 print("corrupted init call for %s" % (func))
554
555 elif type == "async_waiting" or type == "async_continuing":
556 continue # ignore
557
558 return processMap.values()
559
560#
561# Parse binary pacct accounting file output if we have one
562# cf. /usr/include/linux/acct.h
563#
564def _parse_pacct(writer, file):
565 # read LE int32
566 def _read_le_int32(file):
567 byts = file.read(4)
568 return (ord(byts[0])) | (ord(byts[1]) << 8) | \
569 (ord(byts[2]) << 16) | (ord(byts[3]) << 24)
570
571 parent_map = {}
572 parent_map[0] = 0
573 while file.read(1) != "": # ignore flags
574 ver = file.read(1)
575 if ord(ver) < 3:
576 print("Invalid version 0x%x" % (ord(ver)))
577 return None
578
579 file.seek (14, 1) # user, group etc.
580 pid = _read_le_int32 (file)
581 ppid = _read_le_int32 (file)
582# print "Parent of %d is %d" % (pid, ppid)
583 parent_map[pid] = ppid
584 file.seek (4 + 4 + 16, 1) # timings
585 file.seek (16, 1) # acct_comm
586 return parent_map
587
588def _parse_paternity_log(writer, file):
589 parent_map = {}
590 parent_map[0] = 0
591 for line in file.read().decode('utf-8').split('\n'):
592 if not line:
593 continue
594 elems = line.split(' ') # <Child> <Parent>
595 if len (elems) >= 2:
596# print "paternity of %d is %d" % (int(elems[0]), int(elems[1]))
597 parent_map[int(elems[0])] = int(elems[1])
598 else:
599 print("Odd paternity line '%s'" % (line))
600 return parent_map
601
602def _parse_cmdline_log(writer, file):
603 cmdLines = {}
604 for block in file.read().decode('utf-8').split('\n\n'):
605 lines = block.split('\n')
606 if len (lines) >= 3:
607# print "Lines '%s'" % (lines[0])
608 pid = int (lines[0])
609 values = {}
610 values['exe'] = lines[1].lstrip(':')
611 args = lines[2].lstrip(':').split('\0')
612 args.pop()
613 values['args'] = args
614 cmdLines[pid] = values
615 return cmdLines
616
147def get_num_cpus(headers): 617def get_num_cpus(headers):
148 """Get the number of CPUs from the system.cpu header property. As the 618 """Get the number of CPUs from the system.cpu header property. As the
149 CPU utilization graphs are relative, the number of CPUs currently makes 619 CPU utilization graphs are relative, the number of CPUs currently makes
150 no difference.""" 620 no difference."""
151 if headers is None: 621 if headers is None:
152 return 1 622 return 1
623 if headers.get("system.cpu.num"):
624 return max (int (headers.get("system.cpu.num")), 1)
153 cpu_model = headers.get("system.cpu") 625 cpu_model = headers.get("system.cpu")
154 if cpu_model is None: 626 if cpu_model is None:
155 return 1 627 return 1
156 mat = re.match(".*\\((\\d+)\\)", cpu_model) 628 mat = re.match(".*\\((\\d+)\\)", cpu_model)
157 if mat is None: 629 if mat is None:
158 return 1 630 return 1
159 return int(mat.group(1)) 631 return max (int(mat.group(1)), 1)
160
161class ParserState:
162 def __init__(self):
163 self.processes = {}
164 self.start = {}
165 self.end = {}
166 632
167 def valid(self): 633def _do_parse(writer, state, filename, file, mintime):
168 return len(self.processes) != 0 634 writer.info("parsing '%s'" % filename)
169 635 t1 = clock()
170
171_relevant_files = set(["header", "proc_diskstats.log", "proc_ps.log", "proc_stat.log"])
172
173def _do_parse(state, filename, file, mintime):
174 #print filename
175 #writer.status("parsing '%s'" % filename)
176 paths = filename.split("/") 636 paths = filename.split("/")
177 task = paths[-1] 637 task = paths[-1]
178 pn = paths[-2] 638 pn = paths[-2]
@@ -194,44 +654,49 @@ def _do_parse(state, filename, file, mintime):
194 state.end[end] = [] 654 state.end[end] = []
195 if k not in state.end[end]: 655 if k not in state.end[end]:
196 state.end[end].append(pn + ":" + task) 656 state.end[end].append(pn + ":" + task)
657 t2 = clock()
658 writer.info(" %s seconds" % str(t2-t1))
197 return state 659 return state
198 660
199def parse_file(state, filename, mintime): 661def parse_file(writer, state, filename, mintime):
662 if state.filename is None:
663 state.filename = filename
200 basename = os.path.basename(filename) 664 basename = os.path.basename(filename)
201 with open(filename, "rb") as file: 665 with open(filename, "rb") as file:
202 return _do_parse(state, filename, file, mintime) 666 return _do_parse(writer, state, filename, file, mintime)
203 667
204def parse_paths(state, paths, mintime): 668def parse_paths(writer, state, paths, mintime):
205 for path in paths: 669 for path in paths:
206 root,extension = os.path.splitext(path) 670 if state.filename is None:
671 state.filename = path
672 root, extension = os.path.splitext(path)
207 if not(os.path.exists(path)): 673 if not(os.path.exists(path)):
208 print "warning: path '%s' does not exist, ignoring." % path 674 writer.warn("warning: path '%s' does not exist, ignoring." % path)
209 continue 675 continue
676 #state.filename = path
210 if os.path.isdir(path): 677 if os.path.isdir(path):
211 files = [ f for f in [os.path.join(path, f) for f in os.listdir(path)] ] 678 files = [ f for f in [os.path.join(path, f) for f in os.listdir(path)] ]
212 files.sort() 679 files.sort()
213 state = parse_paths(state, files, mintime) 680 state = parse_paths(writer, state, files, mintime)
214 elif extension in [".tar", ".tgz", ".tar.gz"]: 681 elif extension in [".tar", ".tgz", ".gz"]:
682 if extension == ".gz":
683 root, extension = os.path.splitext(root)
684 if extension != ".tar":
685 writer.warn("warning: can only handle zipped tar files, not zipped '%s'-files; ignoring" % extension)
686 continue
215 tf = None 687 tf = None
216 try: 688 try:
689 writer.status("parsing '%s'" % path)
217 tf = tarfile.open(path, 'r:*') 690 tf = tarfile.open(path, 'r:*')
218 for name in tf.getnames(): 691 for name in tf.getnames():
219 state = _do_parse(state, name, tf.extractfile(name)) 692 state = _do_parse(writer, state, name, tf.extractfile(name))
220 except tarfile.ReadError, error: 693 except tarfile.ReadError as error:
221 raise ParseError("error: could not read tarfile '%s': %s." % (path, error)) 694 raise ParseError("error: could not read tarfile '%s': %s." % (path, error))
222 finally: 695 finally:
223 if tf != None: 696 if tf != None:
224 tf.close() 697 tf.close()
225 else: 698 else:
226 state = parse_file(state, path, mintime) 699 state = parse_file(writer, state, path, mintime)
227 return state
228
229def parse(paths, prune, mintime):
230 state = parse_paths(ParserState(), paths, mintime)
231 if not state.valid():
232 raise ParseError("empty state: '%s' does not contain a valid bootchart" % ", ".join(paths))
233 #monitored_app = state.headers.get("profile.process")
234 #proc_tree = ProcessTree(state.ps_stats, monitored_app, prune)
235 return state 700 return state
236 701
237def split_res(res, n): 702def split_res(res, n):