1 files changed, 223 insertions, 0 deletions
diff --git a/scripts/pybootchartgui/pybootchartgui/parsing.py b/scripts/pybootchartgui/pybootchartgui/parsing.py
new file mode 100644
index 0000000000..a350a3eb08
--- /dev/null
+++ b/scripts/pybootchartgui/pybootchartgui/parsing.py
@@ -0,0 +1,223 @@
+from __future__ import with_statement
+import os
+import string
+import re
+import tarfile
+from collections import defaultdict
+from samples import *
+from process_tree import ProcessTree
+class ParseError(Exception):
+        """Represents errors during parse of the bootchart."""
+        def __init__(self, value):
+            self.value = value
+        def __str__(self):
+            return self.value
+def _parse_headers(file):
+        """Parses the headers of the bootchart."""
+        def parse((headers,last), line): 
+            if '=' in line: last,value = map(string.strip, line.split('=', 1))
+            else:           value = line.strip()
+            headers[last] += value
+            return headers,last
+        return reduce(parse, file.read().split('\n'), (defaultdict(str),''))[0]
+def _parse_timed_blocks(file):
+        """Parses (ie., splits) a file into so-called timed-blocks. A
+        timed-block consists of a timestamp on a line by itself followed
+        by zero or more lines of data for that point in time."""
+        def parse(block):
+            lines = block.split('\n')
+            if not lines:
+                raise ParseError('expected a timed-block consisting a timestamp followed by data lines')
+            try:
+                return (int(lines[0]), lines[1:])
+            except ValueError:
+                raise ParseError("expected a timed-block, but timestamp '%s' is not an integer" % lines[0])
+        blocks = file.read().split('\n\n')
+        return [parse(block) for block in blocks if block.strip()]
+        
+def _parse_proc_ps_log(file):
+        """
+         * See proc(5) for details.
+         * 
+         * {pid, comm, state, ppid, pgrp, session, tty_nr, tpgid, flags, minflt, cminflt, majflt, cmajflt, utime, stime,
+         *  cutime, cstime, priority, nice, 0, itrealvalue, starttime, vsize, rss, rlim, startcode, endcode, startstack, 
+         *  kstkesp, kstkeip}
+        """
+        processMap = {}
+        ltime = 0
+        timed_blocks = _parse_timed_blocks(file)
+        for time, lines in timed_blocks:
+                for line in lines:
+                        tokens = line.split(' ')
+                        offset = [index for index, token in enumerate(tokens[1:]) if token.endswith(')')][0]            
+                        pid, cmd, state, ppid = int(tokens[0]), ' '.join(tokens[1:2+offset]), tokens[2+offset], int(tokens[3+offset])
+                        userCpu, sysCpu, stime= int(tokens[13+offset]), int(tokens[14+offset]), int(tokens[21+offset])
+                        if processMap.has_key(pid):
+                                process = processMap[pid]
+                                process.cmd = cmd.replace('(', '').replace(')', '') # why rename after latest name??
+                        else:
+                                process = Process(pid, cmd, ppid, min(time, stime))
+                                processMap[pid] = process
+                        
+                        if process.last_user_cpu_time is not None and process.last_sys_cpu_time is not None and ltime is not None:
+                                userCpuLoad, sysCpuLoad = process.calc_load(userCpu, sysCpu, time - ltime)
+                                cpuSample = CPUSample('null', userCpuLoad, sysCpuLoad, 0.0)
+                                process.samples.append(ProcessSample(time, state, cpuSample))
+                        
+                        process.last_user_cpu_time = userCpu
+                        process.last_sys_cpu_time = sysCpu
+                ltime = time
+        startTime = timed_blocks[0][0]
+        avgSampleLength = (ltime - startTime)/(len(timed_blocks)-1)     
+        for process in processMap.values():
+                process.set_parent(processMap)
+        for process in processMap.values():
+                process.calc_stats(avgSampleLength)
+                
+        return ProcessStats(processMap.values(), avgSampleLength, startTime, ltime)
+        
+def _parse_proc_stat_log(file):
+        samples = []
+        ltimes = None
+        for time, lines in _parse_timed_blocks(file):
+                # CPU times {user, nice, system, idle, io_wait, irq, softirq}           
+                tokens = lines[0].split();
+                times = [ int(token) for token in tokens[1:] ]
+                if ltimes:
+                        user = float((times[0] + times[1]) - (ltimes[0] + ltimes[1]))
+                        system = float((times[2] + times[5] + times[6]) - (ltimes[2] + ltimes[5] + ltimes[6]))
+                        idle = float(times[3] - ltimes[3])
+                        iowait = float(times[4] - ltimes[4])
+                        
+                        aSum = max(user + system + idle + iowait, 1)
+                        samples.append( CPUSample(time, user/aSum, system/aSum, iowait/aSum) )
+                
+                ltimes = times          
+                # skip the rest of statistics lines
+        return samples
+                
+def _parse_proc_disk_stat_log(file, numCpu):
+        """
+        Parse file for disk stats, but only look at the whole disks, eg. sda,
+        not sda1, sda2 etc. The format of relevant lines should be:
+        {major minor name rio rmerge rsect ruse wio wmerge wsect wuse running use aveq}
+        """
+        DISK_REGEX = 'hd.$|sd.$'
+        
+        def is_relevant_line(line):
+                return len(line.split()) == 14 and re.match(DISK_REGEX, line.split()[2])
+        
+        disk_stat_samples = []
+        for time, lines in _parse_timed_blocks(file):
+                sample = DiskStatSample(time)           
+                relevant_tokens = [line.split() for line in lines if is_relevant_line(line)]
+                
+                for tokens in relevant_tokens:                  
+                        disk, rsect, wsect, use = tokens[2], int(tokens[5]), int(tokens[9]), int(tokens[12])                    
+                        sample.add_diskdata([rsect, wsect, use])
+                
+                disk_stat_samples.append(sample)
+                        
+        disk_stats = []
+        for sample1, sample2 in zip(disk_stat_samples[:-1], disk_stat_samples[1:]):
+                interval = sample1.time - sample2.time
+                sums = [ a - b for a, b in zip(sample1.diskdata, sample2.diskdata) ]
+                readTput = sums[0] / 2.0 * 100.0 / interval
+                writeTput = sums[1] / 2.0 * 100.0 / interval                    
+                util = float( sums[2] ) / 10 / interval / numCpu
+                util = max(0.0, min(1.0, util))
+                disk_stats.append(DiskSample(sample2.time, readTput, writeTput, util))
+        
+        return disk_stats
+        
+        
+def get_num_cpus(headers):
+    """Get the number of CPUs from the system.cpu header property. As the
+    CPU utilization graphs are relative, the number of CPUs currently makes
+    no difference."""
+    if headers is None:
+        return 1
+    cpu_model = headers.get("system.cpu")
+    if cpu_model is None:
+        return 1
+    mat = re.match(".*\\((\\d+)\\)", cpu_model)
+    if mat is None:
+        return 1
+    return int(mat.group(1))
+class ParserState:
+    def __init__(self):
+        self.headers = None
+        self.disk_stats = None
+        self.ps_stats = None
+        self.cpu_stats = None
+    def valid(self):
+        return self.headers != None and self.disk_stats != None and self.ps_stats != None and self.cpu_stats != None
+_relevant_files = set(["header", "proc_diskstats.log", "proc_ps.log", "proc_stat.log"])
+def _do_parse(state, name, file):
+    if name == "header":
+        state.headers = _parse_headers(file)
+    elif name == "proc_diskstats.log":
+        state.disk_stats = _parse_proc_disk_stat_log(file, get_num_cpus(state.headers))
+    elif name == "proc_ps.log":
+        state.ps_stats = _parse_proc_ps_log(file)
+    elif name == "proc_stat.log":
+        state.cpu_stats = _parse_proc_stat_log(file)
+    return state
+def parse_file(state, filename):
+    basename = os.path.basename(filename)
+    if not(basename in _relevant_files):
+        return state
+    with open(filename, "rb") as file:
+        return _do_parse(state, basename, file)
+def parse_paths(state, paths):
+    for path in paths:
+        root,extension = os.path.splitext(path)
+        if not(os.path.exists(path)):
+            print "warning: path '%s' does not exist, ignoring." % path
+            continue
+        if os.path.isdir(path):
+            files = [ f for f in [os.path.join(path, f) for f in os.listdir(path)] if os.path.isfile(f) ]
+            files.sort()
+            state = parse_paths(state, files)
+        elif extension in [".tar", ".tgz", ".tar.gz"]:
+            tf = None
+            try:
+                tf = tarfile.open(path, 'r:*')
+                for name in tf.getnames():
+                    state = _do_parse(state, name, tf.extractfile(name))
+            except tarfile.ReadError, error:
+                raise ParseError("error: could not read tarfile '%s': %s." % (path, error))
+            finally:
+                if tf != None:
+                    tf.close()
+        else:
+            state = parse_file(state, path)
+    return state
+def parse(paths, prune):   
+    state = parse_paths(ParserState(), paths)
+    if not state.valid():
+        raise ParseError("empty state: '%s' does not contain a valid bootchart" % ", ".join(paths))
+    monitored_app = state.headers.get("profile.process")
+    proc_tree = ProcessTree(state.ps_stats, monitored_app, prune)
+    return (state.headers, state.cpu_stats, state.disk_stats, proc_tree)

diff --git a/scripts/pybootchartgui/pybootchartgui/parsing.py b/scripts/pybootchartgui/pybootchartgui/parsing.py new file mode 100644 index 0000000000..a350a3eb08 --- /dev/null +++ b/scripts/pybootchartgui/pybootchartgui/parsing.py
@@ -0,0 +1,223 @@
	1	from __future__ import with_statement
	2
	3	import os
	4	import string
	5	import re
	6	import tarfile
	7	from collections import defaultdict
	8
	9	from samples import *
	10	from process_tree import ProcessTree
	11
	12	class ParseError(Exception):
	13	"""Represents errors during parse of the bootchart."""
	14	def __init__(self, value):
	15	self.value = value
	16
	17	def __str__(self):
	18	return self.value
	19
	20	def _parse_headers(file):
	21	"""Parses the headers of the bootchart."""
	22	def parse((headers,last), line):
	23	if '=' in line: last,value = map(string.strip, line.split('=', 1))
	24	else: value = line.strip()
	25	headers[last] += value
	26	return headers,last
	27	return reduce(parse, file.read().split('\n'), (defaultdict(str),''))[0]
	28
	29	def _parse_timed_blocks(file):
	30	"""Parses (ie., splits) a file into so-called timed-blocks. A
	31	timed-block consists of a timestamp on a line by itself followed
	32	by zero or more lines of data for that point in time."""
	33	def parse(block):
	34	lines = block.split('\n')
	35	if not lines:
	36	raise ParseError('expected a timed-block consisting a timestamp followed by data lines')
	37	try:
	38	return (int(lines[0]), lines[1:])
	39	except ValueError:
	40	raise ParseError("expected a timed-block, but timestamp '%s' is not an integer" % lines[0])
	41	blocks = file.read().split('\n\n')
	42	return [parse(block) for block in blocks if block.strip()]
	43
	44	def _parse_proc_ps_log(file):
	45	"""
	46	* See proc(5) for details.
	47	*
	48	* {pid, comm, state, ppid, pgrp, session, tty_nr, tpgid, flags, minflt, cminflt, majflt, cmajflt, utime, stime,
	49	* cutime, cstime, priority, nice, 0, itrealvalue, starttime, vsize, rss, rlim, startcode, endcode, startstack,
	50	* kstkesp, kstkeip}
	51	"""
	52	processMap = {}
	53	ltime = 0
	54	timed_blocks = _parse_timed_blocks(file)
	55	for time, lines in timed_blocks:
	56	for line in lines:
	57	tokens = line.split(' ')
	58
	59	offset = [index for index, token in enumerate(tokens[1:]) if token.endswith(')')][0]
	60	pid, cmd, state, ppid = int(tokens[0]), ' '.join(tokens[1:2+offset]), tokens[2+offset], int(tokens[3+offset])
	61	userCpu, sysCpu, stime= int(tokens[13+offset]), int(tokens[14+offset]), int(tokens[21+offset])
	62
	63	if processMap.has_key(pid):
	64	process = processMap[pid]
	65	process.cmd = cmd.replace('(', '').replace(')', '') # why rename after latest name??
	66	else:
	67	process = Process(pid, cmd, ppid, min(time, stime))
	68	processMap[pid] = process
	69
	70	if process.last_user_cpu_time is not None and process.last_sys_cpu_time is not None and ltime is not None:
	71	userCpuLoad, sysCpuLoad = process.calc_load(userCpu, sysCpu, time - ltime)
	72	cpuSample = CPUSample('null', userCpuLoad, sysCpuLoad, 0.0)
	73	process.samples.append(ProcessSample(time, state, cpuSample))
	74
	75	process.last_user_cpu_time = userCpu
	76	process.last_sys_cpu_time = sysCpu
	77	ltime = time
	78
	79	startTime = timed_blocks[0][0]
	80	avgSampleLength = (ltime - startTime)/(len(timed_blocks)-1)
	81
	82	for process in processMap.values():
	83	process.set_parent(processMap)
	84
	85	for process in processMap.values():
	86	process.calc_stats(avgSampleLength)
	87
	88	return ProcessStats(processMap.values(), avgSampleLength, startTime, ltime)
	89
	90	def _parse_proc_stat_log(file):
	91	samples = []
	92	ltimes = None
	93	for time, lines in _parse_timed_blocks(file):
	94	# CPU times {user, nice, system, idle, io_wait, irq, softirq}
	95	tokens = lines[0].split();
	96	times = [ int(token) for token in tokens[1:] ]
	97	if ltimes:
	98	user = float((times[0] + times[1]) - (ltimes[0] + ltimes[1]))
	99	system = float((times[2] + times[5] + times[6]) - (ltimes[2] + ltimes[5] + ltimes[6]))
	100	idle = float(times[3] - ltimes[3])
	101	iowait = float(times[4] - ltimes[4])
	102
	103	aSum = max(user + system + idle + iowait, 1)
	104	samples.append( CPUSample(time, user/aSum, system/aSum, iowait/aSum) )
	105
	106	ltimes = times
	107	# skip the rest of statistics lines
	108	return samples
	109
	110
	111	def _parse_proc_disk_stat_log(file, numCpu):
	112	"""
	113	Parse file for disk stats, but only look at the whole disks, eg. sda,
	114	not sda1, sda2 etc. The format of relevant lines should be:
	115	{major minor name rio rmerge rsect ruse wio wmerge wsect wuse running use aveq}
	116	"""
	117	DISK_REGEX = 'hd.$\|sd.$'
	118
	119	def is_relevant_line(line):
	120	return len(line.split()) == 14 and re.match(DISK_REGEX, line.split()[2])
	121
	122	disk_stat_samples = []
	123
	124	for time, lines in _parse_timed_blocks(file):
	125	sample = DiskStatSample(time)
	126	relevant_tokens = [line.split() for line in lines if is_relevant_line(line)]
	127
	128	for tokens in relevant_tokens:
	129	disk, rsect, wsect, use = tokens[2], int(tokens[5]), int(tokens[9]), int(tokens[12])
	130	sample.add_diskdata([rsect, wsect, use])
	131
	132	disk_stat_samples.append(sample)
	133
	134	disk_stats = []
	135	for sample1, sample2 in zip(disk_stat_samples[:-1], disk_stat_samples[1:]):
	136	interval = sample1.time - sample2.time
	137	sums = [ a - b for a, b in zip(sample1.diskdata, sample2.diskdata) ]
	138	readTput = sums[0] / 2.0 * 100.0 / interval
	139	writeTput = sums[1] / 2.0 * 100.0 / interval
	140	util = float( sums[2] ) / 10 / interval / numCpu
	141	util = max(0.0, min(1.0, util))
	142	disk_stats.append(DiskSample(sample2.time, readTput, writeTput, util))
	143
	144	return disk_stats
	145
	146
	147	def get_num_cpus(headers):
	148	"""Get the number of CPUs from the system.cpu header property. As the
	149	CPU utilization graphs are relative, the number of CPUs currently makes
	150	no difference."""
	151	if headers is None:
	152	return 1
	153	cpu_model = headers.get("system.cpu")
	154	if cpu_model is None:
	155	return 1
	156	mat = re.match(".*\\((\\d+)\\)", cpu_model)
	157	if mat is None:
	158	return 1
	159	return int(mat.group(1))
	160
	161	class ParserState:
	162	def __init__(self):
	163	self.headers = None
	164	self.disk_stats = None
	165	self.ps_stats = None
	166	self.cpu_stats = None
	167
	168	def valid(self):
	169	return self.headers != None and self.disk_stats != None and self.ps_stats != None and self.cpu_stats != None
	170
	171
	172	_relevant_files = set(["header", "proc_diskstats.log", "proc_ps.log", "proc_stat.log"])
	173
	174	def _do_parse(state, name, file):
	175	if name == "header":
	176	state.headers = _parse_headers(file)
	177	elif name == "proc_diskstats.log":
	178	state.disk_stats = _parse_proc_disk_stat_log(file, get_num_cpus(state.headers))
	179	elif name == "proc_ps.log":
	180	state.ps_stats = _parse_proc_ps_log(file)
	181	elif name == "proc_stat.log":
	182	state.cpu_stats = _parse_proc_stat_log(file)
	183	return state
	184
	185	def parse_file(state, filename):
	186	basename = os.path.basename(filename)
	187	if not(basename in _relevant_files):
	188	return state
	189	with open(filename, "rb") as file:
	190	return _do_parse(state, basename, file)
	191
	192	def parse_paths(state, paths):
	193	for path in paths:
	194	root,extension = os.path.splitext(path)
	195	if not(os.path.exists(path)):
	196	print "warning: path '%s' does not exist, ignoring." % path
	197	continue
	198	if os.path.isdir(path):
	199	files = [ f for f in [os.path.join(path, f) for f in os.listdir(path)] if os.path.isfile(f) ]
	200	files.sort()
	201	state = parse_paths(state, files)
	202	elif extension in [".tar", ".tgz", ".tar.gz"]:
	203	tf = None
	204	try:
	205	tf = tarfile.open(path, 'r:*')
	206	for name in tf.getnames():
	207	state = _do_parse(state, name, tf.extractfile(name))
	208	except tarfile.ReadError, error:
	209	raise ParseError("error: could not read tarfile '%s': %s." % (path, error))
	210	finally:
	211	if tf != None:
	212	tf.close()
	213	else:
	214	state = parse_file(state, path)
	215	return state
	216
	217	def parse(paths, prune):
	218	state = parse_paths(ParserState(), paths)
	219	if not state.valid():
	220	raise ParseError("empty state: '%s' does not contain a valid bootchart" % ", ".join(paths))
	221	monitored_app = state.headers.get("profile.process")
	222	proc_tree = ProcessTree(state.ps_stats, monitored_app, prune)
	223	return (state.headers, state.cpu_stats, state.disk_stats, proc_tree)