diff options
Diffstat (limited to 'scripts/pybootchartgui/pybootchartgui/parsing.py')
-rw-r--r-- | scripts/pybootchartgui/pybootchartgui/parsing.py | 223 |
1 files changed, 223 insertions, 0 deletions
diff --git a/scripts/pybootchartgui/pybootchartgui/parsing.py b/scripts/pybootchartgui/pybootchartgui/parsing.py new file mode 100644 index 0000000000..a350a3eb08 --- /dev/null +++ b/scripts/pybootchartgui/pybootchartgui/parsing.py | |||
@@ -0,0 +1,223 @@ | |||
1 | from __future__ import with_statement | ||
2 | |||
3 | import os | ||
4 | import string | ||
5 | import re | ||
6 | import tarfile | ||
7 | from collections import defaultdict | ||
8 | |||
9 | from samples import * | ||
10 | from process_tree import ProcessTree | ||
11 | |||
12 | class ParseError(Exception): | ||
13 | """Represents errors during parse of the bootchart.""" | ||
14 | def __init__(self, value): | ||
15 | self.value = value | ||
16 | |||
17 | def __str__(self): | ||
18 | return self.value | ||
19 | |||
20 | def _parse_headers(file): | ||
21 | """Parses the headers of the bootchart.""" | ||
22 | def parse((headers,last), line): | ||
23 | if '=' in line: last,value = map(string.strip, line.split('=', 1)) | ||
24 | else: value = line.strip() | ||
25 | headers[last] += value | ||
26 | return headers,last | ||
27 | return reduce(parse, file.read().split('\n'), (defaultdict(str),''))[0] | ||
28 | |||
29 | def _parse_timed_blocks(file): | ||
30 | """Parses (ie., splits) a file into so-called timed-blocks. A | ||
31 | timed-block consists of a timestamp on a line by itself followed | ||
32 | by zero or more lines of data for that point in time.""" | ||
33 | def parse(block): | ||
34 | lines = block.split('\n') | ||
35 | if not lines: | ||
36 | raise ParseError('expected a timed-block consisting a timestamp followed by data lines') | ||
37 | try: | ||
38 | return (int(lines[0]), lines[1:]) | ||
39 | except ValueError: | ||
40 | raise ParseError("expected a timed-block, but timestamp '%s' is not an integer" % lines[0]) | ||
41 | blocks = file.read().split('\n\n') | ||
42 | return [parse(block) for block in blocks if block.strip()] | ||
43 | |||
44 | def _parse_proc_ps_log(file): | ||
45 | """ | ||
46 | * See proc(5) for details. | ||
47 | * | ||
48 | * {pid, comm, state, ppid, pgrp, session, tty_nr, tpgid, flags, minflt, cminflt, majflt, cmajflt, utime, stime, | ||
49 | * cutime, cstime, priority, nice, 0, itrealvalue, starttime, vsize, rss, rlim, startcode, endcode, startstack, | ||
50 | * kstkesp, kstkeip} | ||
51 | """ | ||
52 | processMap = {} | ||
53 | ltime = 0 | ||
54 | timed_blocks = _parse_timed_blocks(file) | ||
55 | for time, lines in timed_blocks: | ||
56 | for line in lines: | ||
57 | tokens = line.split(' ') | ||
58 | |||
59 | offset = [index for index, token in enumerate(tokens[1:]) if token.endswith(')')][0] | ||
60 | pid, cmd, state, ppid = int(tokens[0]), ' '.join(tokens[1:2+offset]), tokens[2+offset], int(tokens[3+offset]) | ||
61 | userCpu, sysCpu, stime= int(tokens[13+offset]), int(tokens[14+offset]), int(tokens[21+offset]) | ||
62 | |||
63 | if processMap.has_key(pid): | ||
64 | process = processMap[pid] | ||
65 | process.cmd = cmd.replace('(', '').replace(')', '') # why rename after latest name?? | ||
66 | else: | ||
67 | process = Process(pid, cmd, ppid, min(time, stime)) | ||
68 | processMap[pid] = process | ||
69 | |||
70 | if process.last_user_cpu_time is not None and process.last_sys_cpu_time is not None and ltime is not None: | ||
71 | userCpuLoad, sysCpuLoad = process.calc_load(userCpu, sysCpu, time - ltime) | ||
72 | cpuSample = CPUSample('null', userCpuLoad, sysCpuLoad, 0.0) | ||
73 | process.samples.append(ProcessSample(time, state, cpuSample)) | ||
74 | |||
75 | process.last_user_cpu_time = userCpu | ||
76 | process.last_sys_cpu_time = sysCpu | ||
77 | ltime = time | ||
78 | |||
79 | startTime = timed_blocks[0][0] | ||
80 | avgSampleLength = (ltime - startTime)/(len(timed_blocks)-1) | ||
81 | |||
82 | for process in processMap.values(): | ||
83 | process.set_parent(processMap) | ||
84 | |||
85 | for process in processMap.values(): | ||
86 | process.calc_stats(avgSampleLength) | ||
87 | |||
88 | return ProcessStats(processMap.values(), avgSampleLength, startTime, ltime) | ||
89 | |||
90 | def _parse_proc_stat_log(file): | ||
91 | samples = [] | ||
92 | ltimes = None | ||
93 | for time, lines in _parse_timed_blocks(file): | ||
94 | # CPU times {user, nice, system, idle, io_wait, irq, softirq} | ||
95 | tokens = lines[0].split(); | ||
96 | times = [ int(token) for token in tokens[1:] ] | ||
97 | if ltimes: | ||
98 | user = float((times[0] + times[1]) - (ltimes[0] + ltimes[1])) | ||
99 | system = float((times[2] + times[5] + times[6]) - (ltimes[2] + ltimes[5] + ltimes[6])) | ||
100 | idle = float(times[3] - ltimes[3]) | ||
101 | iowait = float(times[4] - ltimes[4]) | ||
102 | |||
103 | aSum = max(user + system + idle + iowait, 1) | ||
104 | samples.append( CPUSample(time, user/aSum, system/aSum, iowait/aSum) ) | ||
105 | |||
106 | ltimes = times | ||
107 | # skip the rest of statistics lines | ||
108 | return samples | ||
109 | |||
110 | |||
111 | def _parse_proc_disk_stat_log(file, numCpu): | ||
112 | """ | ||
113 | Parse file for disk stats, but only look at the whole disks, eg. sda, | ||
114 | not sda1, sda2 etc. The format of relevant lines should be: | ||
115 | {major minor name rio rmerge rsect ruse wio wmerge wsect wuse running use aveq} | ||
116 | """ | ||
117 | DISK_REGEX = 'hd.$|sd.$' | ||
118 | |||
119 | def is_relevant_line(line): | ||
120 | return len(line.split()) == 14 and re.match(DISK_REGEX, line.split()[2]) | ||
121 | |||
122 | disk_stat_samples = [] | ||
123 | |||
124 | for time, lines in _parse_timed_blocks(file): | ||
125 | sample = DiskStatSample(time) | ||
126 | relevant_tokens = [line.split() for line in lines if is_relevant_line(line)] | ||
127 | |||
128 | for tokens in relevant_tokens: | ||
129 | disk, rsect, wsect, use = tokens[2], int(tokens[5]), int(tokens[9]), int(tokens[12]) | ||
130 | sample.add_diskdata([rsect, wsect, use]) | ||
131 | |||
132 | disk_stat_samples.append(sample) | ||
133 | |||
134 | disk_stats = [] | ||
135 | for sample1, sample2 in zip(disk_stat_samples[:-1], disk_stat_samples[1:]): | ||
136 | interval = sample1.time - sample2.time | ||
137 | sums = [ a - b for a, b in zip(sample1.diskdata, sample2.diskdata) ] | ||
138 | readTput = sums[0] / 2.0 * 100.0 / interval | ||
139 | writeTput = sums[1] / 2.0 * 100.0 / interval | ||
140 | util = float( sums[2] ) / 10 / interval / numCpu | ||
141 | util = max(0.0, min(1.0, util)) | ||
142 | disk_stats.append(DiskSample(sample2.time, readTput, writeTput, util)) | ||
143 | |||
144 | return disk_stats | ||
145 | |||
146 | |||
147 | def get_num_cpus(headers): | ||
148 | """Get the number of CPUs from the system.cpu header property. As the | ||
149 | CPU utilization graphs are relative, the number of CPUs currently makes | ||
150 | no difference.""" | ||
151 | if headers is None: | ||
152 | return 1 | ||
153 | cpu_model = headers.get("system.cpu") | ||
154 | if cpu_model is None: | ||
155 | return 1 | ||
156 | mat = re.match(".*\\((\\d+)\\)", cpu_model) | ||
157 | if mat is None: | ||
158 | return 1 | ||
159 | return int(mat.group(1)) | ||
160 | |||
161 | class ParserState: | ||
162 | def __init__(self): | ||
163 | self.headers = None | ||
164 | self.disk_stats = None | ||
165 | self.ps_stats = None | ||
166 | self.cpu_stats = None | ||
167 | |||
168 | def valid(self): | ||
169 | return self.headers != None and self.disk_stats != None and self.ps_stats != None and self.cpu_stats != None | ||
170 | |||
171 | |||
172 | _relevant_files = set(["header", "proc_diskstats.log", "proc_ps.log", "proc_stat.log"]) | ||
173 | |||
174 | def _do_parse(state, name, file): | ||
175 | if name == "header": | ||
176 | state.headers = _parse_headers(file) | ||
177 | elif name == "proc_diskstats.log": | ||
178 | state.disk_stats = _parse_proc_disk_stat_log(file, get_num_cpus(state.headers)) | ||
179 | elif name == "proc_ps.log": | ||
180 | state.ps_stats = _parse_proc_ps_log(file) | ||
181 | elif name == "proc_stat.log": | ||
182 | state.cpu_stats = _parse_proc_stat_log(file) | ||
183 | return state | ||
184 | |||
185 | def parse_file(state, filename): | ||
186 | basename = os.path.basename(filename) | ||
187 | if not(basename in _relevant_files): | ||
188 | return state | ||
189 | with open(filename, "rb") as file: | ||
190 | return _do_parse(state, basename, file) | ||
191 | |||
192 | def parse_paths(state, paths): | ||
193 | for path in paths: | ||
194 | root,extension = os.path.splitext(path) | ||
195 | if not(os.path.exists(path)): | ||
196 | print "warning: path '%s' does not exist, ignoring." % path | ||
197 | continue | ||
198 | if os.path.isdir(path): | ||
199 | files = [ f for f in [os.path.join(path, f) for f in os.listdir(path)] if os.path.isfile(f) ] | ||
200 | files.sort() | ||
201 | state = parse_paths(state, files) | ||
202 | elif extension in [".tar", ".tgz", ".tar.gz"]: | ||
203 | tf = None | ||
204 | try: | ||
205 | tf = tarfile.open(path, 'r:*') | ||
206 | for name in tf.getnames(): | ||
207 | state = _do_parse(state, name, tf.extractfile(name)) | ||
208 | except tarfile.ReadError, error: | ||
209 | raise ParseError("error: could not read tarfile '%s': %s." % (path, error)) | ||
210 | finally: | ||
211 | if tf != None: | ||
212 | tf.close() | ||
213 | else: | ||
214 | state = parse_file(state, path) | ||
215 | return state | ||
216 | |||
217 | def parse(paths, prune): | ||
218 | state = parse_paths(ParserState(), paths) | ||
219 | if not state.valid(): | ||
220 | raise ParseError("empty state: '%s' does not contain a valid bootchart" % ", ".join(paths)) | ||
221 | monitored_app = state.headers.get("profile.process") | ||
222 | proc_tree = ProcessTree(state.ps_stats, monitored_app, prune) | ||
223 | return (state.headers, state.cpu_stats, state.disk_stats, proc_tree) | ||