summaryrefslogtreecommitdiffstats
path: root/scripts/pybootchartgui/pybootchartgui/parsing.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/pybootchartgui/pybootchartgui/parsing.py')
-rw-r--r--scripts/pybootchartgui/pybootchartgui/parsing.py223
1 files changed, 223 insertions, 0 deletions
diff --git a/scripts/pybootchartgui/pybootchartgui/parsing.py b/scripts/pybootchartgui/pybootchartgui/parsing.py
new file mode 100644
index 0000000000..a350a3eb08
--- /dev/null
+++ b/scripts/pybootchartgui/pybootchartgui/parsing.py
@@ -0,0 +1,223 @@
1from __future__ import with_statement
2
3import os
4import string
5import re
6import tarfile
7from collections import defaultdict
8
9from samples import *
10from process_tree import ProcessTree
11
12class ParseError(Exception):
13 """Represents errors during parse of the bootchart."""
14 def __init__(self, value):
15 self.value = value
16
17 def __str__(self):
18 return self.value
19
20def _parse_headers(file):
21 """Parses the headers of the bootchart."""
22 def parse((headers,last), line):
23 if '=' in line: last,value = map(string.strip, line.split('=', 1))
24 else: value = line.strip()
25 headers[last] += value
26 return headers,last
27 return reduce(parse, file.read().split('\n'), (defaultdict(str),''))[0]
28
29def _parse_timed_blocks(file):
30 """Parses (ie., splits) a file into so-called timed-blocks. A
31 timed-block consists of a timestamp on a line by itself followed
32 by zero or more lines of data for that point in time."""
33 def parse(block):
34 lines = block.split('\n')
35 if not lines:
36 raise ParseError('expected a timed-block consisting a timestamp followed by data lines')
37 try:
38 return (int(lines[0]), lines[1:])
39 except ValueError:
40 raise ParseError("expected a timed-block, but timestamp '%s' is not an integer" % lines[0])
41 blocks = file.read().split('\n\n')
42 return [parse(block) for block in blocks if block.strip()]
43
44def _parse_proc_ps_log(file):
45 """
46 * See proc(5) for details.
47 *
48 * {pid, comm, state, ppid, pgrp, session, tty_nr, tpgid, flags, minflt, cminflt, majflt, cmajflt, utime, stime,
49 * cutime, cstime, priority, nice, 0, itrealvalue, starttime, vsize, rss, rlim, startcode, endcode, startstack,
50 * kstkesp, kstkeip}
51 """
52 processMap = {}
53 ltime = 0
54 timed_blocks = _parse_timed_blocks(file)
55 for time, lines in timed_blocks:
56 for line in lines:
57 tokens = line.split(' ')
58
59 offset = [index for index, token in enumerate(tokens[1:]) if token.endswith(')')][0]
60 pid, cmd, state, ppid = int(tokens[0]), ' '.join(tokens[1:2+offset]), tokens[2+offset], int(tokens[3+offset])
61 userCpu, sysCpu, stime= int(tokens[13+offset]), int(tokens[14+offset]), int(tokens[21+offset])
62
63 if processMap.has_key(pid):
64 process = processMap[pid]
65 process.cmd = cmd.replace('(', '').replace(')', '') # why rename after latest name??
66 else:
67 process = Process(pid, cmd, ppid, min(time, stime))
68 processMap[pid] = process
69
70 if process.last_user_cpu_time is not None and process.last_sys_cpu_time is not None and ltime is not None:
71 userCpuLoad, sysCpuLoad = process.calc_load(userCpu, sysCpu, time - ltime)
72 cpuSample = CPUSample('null', userCpuLoad, sysCpuLoad, 0.0)
73 process.samples.append(ProcessSample(time, state, cpuSample))
74
75 process.last_user_cpu_time = userCpu
76 process.last_sys_cpu_time = sysCpu
77 ltime = time
78
79 startTime = timed_blocks[0][0]
80 avgSampleLength = (ltime - startTime)/(len(timed_blocks)-1)
81
82 for process in processMap.values():
83 process.set_parent(processMap)
84
85 for process in processMap.values():
86 process.calc_stats(avgSampleLength)
87
88 return ProcessStats(processMap.values(), avgSampleLength, startTime, ltime)
89
90def _parse_proc_stat_log(file):
91 samples = []
92 ltimes = None
93 for time, lines in _parse_timed_blocks(file):
94 # CPU times {user, nice, system, idle, io_wait, irq, softirq}
95 tokens = lines[0].split();
96 times = [ int(token) for token in tokens[1:] ]
97 if ltimes:
98 user = float((times[0] + times[1]) - (ltimes[0] + ltimes[1]))
99 system = float((times[2] + times[5] + times[6]) - (ltimes[2] + ltimes[5] + ltimes[6]))
100 idle = float(times[3] - ltimes[3])
101 iowait = float(times[4] - ltimes[4])
102
103 aSum = max(user + system + idle + iowait, 1)
104 samples.append( CPUSample(time, user/aSum, system/aSum, iowait/aSum) )
105
106 ltimes = times
107 # skip the rest of statistics lines
108 return samples
109
110
111def _parse_proc_disk_stat_log(file, numCpu):
112 """
113 Parse file for disk stats, but only look at the whole disks, eg. sda,
114 not sda1, sda2 etc. The format of relevant lines should be:
115 {major minor name rio rmerge rsect ruse wio wmerge wsect wuse running use aveq}
116 """
117 DISK_REGEX = 'hd.$|sd.$'
118
119 def is_relevant_line(line):
120 return len(line.split()) == 14 and re.match(DISK_REGEX, line.split()[2])
121
122 disk_stat_samples = []
123
124 for time, lines in _parse_timed_blocks(file):
125 sample = DiskStatSample(time)
126 relevant_tokens = [line.split() for line in lines if is_relevant_line(line)]
127
128 for tokens in relevant_tokens:
129 disk, rsect, wsect, use = tokens[2], int(tokens[5]), int(tokens[9]), int(tokens[12])
130 sample.add_diskdata([rsect, wsect, use])
131
132 disk_stat_samples.append(sample)
133
134 disk_stats = []
135 for sample1, sample2 in zip(disk_stat_samples[:-1], disk_stat_samples[1:]):
136 interval = sample1.time - sample2.time
137 sums = [ a - b for a, b in zip(sample1.diskdata, sample2.diskdata) ]
138 readTput = sums[0] / 2.0 * 100.0 / interval
139 writeTput = sums[1] / 2.0 * 100.0 / interval
140 util = float( sums[2] ) / 10 / interval / numCpu
141 util = max(0.0, min(1.0, util))
142 disk_stats.append(DiskSample(sample2.time, readTput, writeTput, util))
143
144 return disk_stats
145
146
147def get_num_cpus(headers):
148 """Get the number of CPUs from the system.cpu header property. As the
149 CPU utilization graphs are relative, the number of CPUs currently makes
150 no difference."""
151 if headers is None:
152 return 1
153 cpu_model = headers.get("system.cpu")
154 if cpu_model is None:
155 return 1
156 mat = re.match(".*\\((\\d+)\\)", cpu_model)
157 if mat is None:
158 return 1
159 return int(mat.group(1))
160
161class ParserState:
162 def __init__(self):
163 self.headers = None
164 self.disk_stats = None
165 self.ps_stats = None
166 self.cpu_stats = None
167
168 def valid(self):
169 return self.headers != None and self.disk_stats != None and self.ps_stats != None and self.cpu_stats != None
170
171
172_relevant_files = set(["header", "proc_diskstats.log", "proc_ps.log", "proc_stat.log"])
173
174def _do_parse(state, name, file):
175 if name == "header":
176 state.headers = _parse_headers(file)
177 elif name == "proc_diskstats.log":
178 state.disk_stats = _parse_proc_disk_stat_log(file, get_num_cpus(state.headers))
179 elif name == "proc_ps.log":
180 state.ps_stats = _parse_proc_ps_log(file)
181 elif name == "proc_stat.log":
182 state.cpu_stats = _parse_proc_stat_log(file)
183 return state
184
185def parse_file(state, filename):
186 basename = os.path.basename(filename)
187 if not(basename in _relevant_files):
188 return state
189 with open(filename, "rb") as file:
190 return _do_parse(state, basename, file)
191
192def parse_paths(state, paths):
193 for path in paths:
194 root,extension = os.path.splitext(path)
195 if not(os.path.exists(path)):
196 print "warning: path '%s' does not exist, ignoring." % path
197 continue
198 if os.path.isdir(path):
199 files = [ f for f in [os.path.join(path, f) for f in os.listdir(path)] if os.path.isfile(f) ]
200 files.sort()
201 state = parse_paths(state, files)
202 elif extension in [".tar", ".tgz", ".tar.gz"]:
203 tf = None
204 try:
205 tf = tarfile.open(path, 'r:*')
206 for name in tf.getnames():
207 state = _do_parse(state, name, tf.extractfile(name))
208 except tarfile.ReadError, error:
209 raise ParseError("error: could not read tarfile '%s': %s." % (path, error))
210 finally:
211 if tf != None:
212 tf.close()
213 else:
214 state = parse_file(state, path)
215 return state
216
217def parse(paths, prune):
218 state = parse_paths(ParserState(), paths)
219 if not state.valid():
220 raise ParseError("empty state: '%s' does not contain a valid bootchart" % ", ".join(paths))
221 monitored_app = state.headers.get("profile.process")
222 proc_tree = ProcessTree(state.ps_stats, monitored_app, prune)
223 return (state.headers, state.cpu_stats, state.disk_stats, proc_tree)