From d0e513d4371789d59bcb902497ca7c537a94c1f0 Mon Sep 17 00:00:00 2001 From: Peter Kjellerstedt Date: Fri, 15 Nov 2013 18:08:50 +0100 Subject: pybootchartgui: Import pybootchartgui 0.14.5 This update the pybootchartgui code to the latest release from its new location at "https://github.com/mmeeks/bootchart". This only imports the relevant parts, and not all of bootchart2. (From OE-Core rev: 6f1568e54a7808b2ab568618fc5bb244249579f1) Signed-off-by: Peter Kjellerstedt Signed-off-by: Richard Purdie --- scripts/pybootchartgui/pybootchartgui/parsing.py | 791 ++++++++++++++++++----- 1 file changed, 628 insertions(+), 163 deletions(-) (limited to 'scripts/pybootchartgui/pybootchartgui/parsing.py') diff --git a/scripts/pybootchartgui/pybootchartgui/parsing.py b/scripts/pybootchartgui/pybootchartgui/parsing.py index 6343fd5a7b..c8a6a5721a 100644 --- a/scripts/pybootchartgui/pybootchartgui/parsing.py +++ b/scripts/pybootchartgui/pybootchartgui/parsing.py @@ -1,178 +1,638 @@ +# This file is part of pybootchartgui. + +# pybootchartgui is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# pybootchartgui is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with pybootchartgui. If not, see . + + from __future__ import with_statement import os import string import re +import sys import tarfile +from time import clock from collections import defaultdict +from functools import reduce + +from .samples import * +from .process_tree import ProcessTree + +if sys.version_info >= (3, 0): + long = int + +# Parsing produces as its end result a 'Trace' + +class Trace: + def __init__(self, writer, paths, options): + self.processes = {} + self.start = {} + self.end = {} + self.headers = None + self.disk_stats = None + self.ps_stats = None + self.taskstats = None + self.cpu_stats = None + self.cmdline = None + self.kernel = None + self.kernel_tree = None + self.filename = None + self.parent_map = None + self.mem_stats = None + + parse_paths (writer, self, paths, options.mintime) + if not self.valid(): + raise ParseError("empty state: '%s' does not contain a valid bootchart" % ", ".join(paths)) + + return + + # Turn that parsed information into something more useful + # link processes into a tree of pointers, calculate statistics + self.compile(writer) + + # Crop the chart to the end of the first idle period after the given + # process + if options.crop_after: + idle = self.crop (writer, options.crop_after) + else: + idle = None + + # Annotate other times as the first start point of given process lists + self.times = [ idle ] + if options.annotate: + for procnames in options.annotate: + names = [x[:15] for x in procnames.split(",")] + for proc in self.ps_stats.process_map.values(): + if proc.cmd in names: + self.times.append(proc.start_time) + break + else: + self.times.append(None) + + self.proc_tree = ProcessTree(writer, self.kernel, self.ps_stats, + self.ps_stats.sample_period, + self.headers.get("profile.process"), + options.prune, idle, self.taskstats, + self.parent_map is not None) + + if self.kernel is not None: + self.kernel_tree = ProcessTree(writer, self.kernel, None, 0, + self.headers.get("profile.process"), + False, None, None, True) + + def valid(self): + return len(self.processes) != 0 + return self.headers != None and self.disk_stats != None and \ + self.ps_stats != None and self.cpu_stats != None + + + def compile(self, writer): + + def find_parent_id_for(pid): + if pid is 0: + return 0 + ppid = self.parent_map.get(pid) + if ppid: + # many of these double forks are so short lived + # that we have no samples, or process info for them + # so climb the parent hierarcy to find one + if int (ppid * 1000) not in self.ps_stats.process_map: +# print "Pid '%d' short lived with no process" % ppid + ppid = find_parent_id_for (ppid) +# else: +# print "Pid '%d' has an entry" % ppid + else: +# print "Pid '%d' missing from pid map" % pid + return 0 + return ppid + + # merge in the cmdline data + if self.cmdline is not None: + for proc in self.ps_stats.process_map.values(): + rpid = int (proc.pid // 1000) + if rpid in self.cmdline: + cmd = self.cmdline[rpid] + proc.exe = cmd['exe'] + proc.args = cmd['args'] +# else: +# print "proc %d '%s' not in cmdline" % (rpid, proc.exe) + + # re-parent any stray orphans if we can + if self.parent_map is not None: + for process in self.ps_stats.process_map.values(): + ppid = find_parent_id_for (int(process.pid // 1000)) + if ppid: + process.ppid = ppid * 1000 + + # stitch the tree together with pointers + for process in self.ps_stats.process_map.values(): + process.set_parent (self.ps_stats.process_map) + + # count on fingers variously + for process in self.ps_stats.process_map.values(): + process.calc_stats (self.ps_stats.sample_period) + + def crop(self, writer, crop_after): + + def is_idle_at(util, start, j): + k = j + 1 + while k < len(util) and util[k][0] < start + 300: + k += 1 + k = min(k, len(util)-1) + + if util[j][1] >= 0.25: + return False + + avgload = sum(u[1] for u in util[j:k+1]) / (k-j+1) + if avgload < 0.25: + return True + else: + return False + def is_idle(util, start): + for j in range(0, len(util)): + if util[j][0] < start: + continue + return is_idle_at(util, start, j) + else: + return False + + names = [x[:15] for x in crop_after.split(",")] + for proc in self.ps_stats.process_map.values(): + if proc.cmd in names or proc.exe in names: + writer.info("selected proc '%s' from list (start %d)" + % (proc.cmd, proc.start_time)) + break + if proc is None: + writer.warn("no selected crop proc '%s' in list" % crop_after) + + + cpu_util = [(sample.time, sample.user + sample.sys + sample.io) for sample in self.cpu_stats] + disk_util = [(sample.time, sample.util) for sample in self.disk_stats] + + idle = None + for i in range(0, len(cpu_util)): + if cpu_util[i][0] < proc.start_time: + continue + if is_idle_at(cpu_util, cpu_util[i][0], i) \ + and is_idle(disk_util, cpu_util[i][0]): + idle = cpu_util[i][0] + break + + if idle is None: + writer.warn ("not idle after proc '%s'" % crop_after) + return None + + crop_at = idle + 300 + writer.info ("cropping at time %d" % crop_at) + while len (self.cpu_stats) \ + and self.cpu_stats[-1].time > crop_at: + self.cpu_stats.pop() + while len (self.disk_stats) \ + and self.disk_stats[-1].time > crop_at: + self.disk_stats.pop() + + self.ps_stats.end_time = crop_at + + cropped_map = {} + for key, value in self.ps_stats.process_map.items(): + if (value.start_time <= crop_at): + cropped_map[key] = value + + for proc in cropped_map.values(): + proc.duration = min (proc.duration, crop_at - proc.start_time) + while len (proc.samples) \ + and proc.samples[-1].time > crop_at: + proc.samples.pop() + + self.ps_stats.process_map = cropped_map + + return idle + -from samples import * -from process_tree import ProcessTree class ParseError(Exception): - """Represents errors during parse of the bootchart.""" - def __init__(self, value): - self.value = value + """Represents errors during parse of the bootchart.""" + def __init__(self, value): + self.value = value - def __str__(self): - return self.value + def __str__(self): + return self.value def _parse_headers(file): - """Parses the headers of the bootchart.""" - def parse((headers,last), line): - if '=' in line: last,value = map(string.strip, line.split('=', 1)) - else: value = line.strip() - headers[last] += value - return headers,last - return reduce(parse, file.read().split('\n'), (defaultdict(str),''))[0] + """Parses the headers of the bootchart.""" + def parse(acc, line): + (headers, last) = acc + if '=' in line: + last, value = map (lambda x: x.strip(), line.split('=', 1)) + else: + value = line.strip() + headers[last] += value + return headers, last + return reduce(parse, file.read().decode('utf-8').split('\n'), (defaultdict(str),''))[0] def _parse_timed_blocks(file): - """Parses (ie., splits) a file into so-called timed-blocks. A - timed-block consists of a timestamp on a line by itself followed - by zero or more lines of data for that point in time.""" - def parse(block): - lines = block.split('\n') - if not lines: - raise ParseError('expected a timed-block consisting a timestamp followed by data lines') - try: - return (int(lines[0]), lines[1:]) - except ValueError: - raise ParseError("expected a timed-block, but timestamp '%s' is not an integer" % lines[0]) - blocks = file.read().split('\n\n') - return [parse(block) for block in blocks if block.strip()] - -def _parse_proc_ps_log(file): - """ - * See proc(5) for details. - * - * {pid, comm, state, ppid, pgrp, session, tty_nr, tpgid, flags, minflt, cminflt, majflt, cmajflt, utime, stime, - * cutime, cstime, priority, nice, 0, itrealvalue, starttime, vsize, rss, rlim, startcode, endcode, startstack, - * kstkesp, kstkeip} - """ - processMap = {} - ltime = 0 - timed_blocks = _parse_timed_blocks(file) - for time, lines in timed_blocks: - for line in lines: - tokens = line.split(' ') - - offset = [index for index, token in enumerate(tokens[1:]) if token.endswith(')')][0] - pid, cmd, state, ppid = int(tokens[0]), ' '.join(tokens[1:2+offset]), tokens[2+offset], int(tokens[3+offset]) - userCpu, sysCpu, stime= int(tokens[13+offset]), int(tokens[14+offset]), int(tokens[21+offset]) - - if processMap.has_key(pid): - process = processMap[pid] - process.cmd = cmd.replace('(', '').replace(')', '') # why rename after latest name?? - else: - process = Process(pid, cmd, ppid, min(time, stime)) - processMap[pid] = process - - if process.last_user_cpu_time is not None and process.last_sys_cpu_time is not None and ltime is not None: - userCpuLoad, sysCpuLoad = process.calc_load(userCpu, sysCpu, time - ltime) - cpuSample = CPUSample('null', userCpuLoad, sysCpuLoad, 0.0) - process.samples.append(ProcessSample(time, state, cpuSample)) - - process.last_user_cpu_time = userCpu - process.last_sys_cpu_time = sysCpu - ltime = time - - startTime = timed_blocks[0][0] - avgSampleLength = (ltime - startTime)/(len(timed_blocks)-1) - - for process in processMap.values(): - process.set_parent(processMap) - - for process in processMap.values(): - process.calc_stats(avgSampleLength) - - return ProcessStats(processMap.values(), avgSampleLength, startTime, ltime) - + """Parses (ie., splits) a file into so-called timed-blocks. A + timed-block consists of a timestamp on a line by itself followed + by zero or more lines of data for that point in time.""" + def parse(block): + lines = block.split('\n') + if not lines: + raise ParseError('expected a timed-block consisting a timestamp followed by data lines') + try: + return (int(lines[0]), lines[1:]) + except ValueError: + raise ParseError("expected a timed-block, but timestamp '%s' is not an integer" % lines[0]) + blocks = file.read().decode('utf-8').split('\n\n') + return [parse(block) for block in blocks if block.strip() and not block.endswith(' not running\n')] + +def _parse_proc_ps_log(writer, file): + """ + * See proc(5) for details. + * + * {pid, comm, state, ppid, pgrp, session, tty_nr, tpgid, flags, minflt, cminflt, majflt, cmajflt, utime, stime, + * cutime, cstime, priority, nice, 0, itrealvalue, starttime, vsize, rss, rlim, startcode, endcode, startstack, + * kstkesp, kstkeip} + """ + processMap = {} + ltime = 0 + timed_blocks = _parse_timed_blocks(file) + for time, lines in timed_blocks: + for line in lines: + if not line: continue + tokens = line.split(' ') + if len(tokens) < 21: + continue + + offset = [index for index, token in enumerate(tokens[1:]) if token[-1] == ')'][0] + pid, cmd, state, ppid = int(tokens[0]), ' '.join(tokens[1:2+offset]), tokens[2+offset], int(tokens[3+offset]) + userCpu, sysCpu, stime = int(tokens[13+offset]), int(tokens[14+offset]), int(tokens[21+offset]) + + # magic fixed point-ness ... + pid *= 1000 + ppid *= 1000 + if pid in processMap: + process = processMap[pid] + process.cmd = cmd.strip('()') # why rename after latest name?? + else: + process = Process(writer, pid, cmd.strip('()'), ppid, min(time, stime)) + processMap[pid] = process + + if process.last_user_cpu_time is not None and process.last_sys_cpu_time is not None and ltime is not None: + userCpuLoad, sysCpuLoad = process.calc_load(userCpu, sysCpu, max(1, time - ltime)) + cpuSample = CPUSample('null', userCpuLoad, sysCpuLoad, 0.0) + process.samples.append(ProcessSample(time, state, cpuSample)) + + process.last_user_cpu_time = userCpu + process.last_sys_cpu_time = sysCpu + ltime = time + + if len (timed_blocks) < 2: + return None + + startTime = timed_blocks[0][0] + avgSampleLength = (ltime - startTime)/(len (timed_blocks) - 1) + + return ProcessStats (writer, processMap, len (timed_blocks), avgSampleLength, startTime, ltime) + +def _parse_taskstats_log(writer, file): + """ + * See bootchart-collector.c for details. + * + * { pid, ppid, comm, cpu_run_real_total, blkio_delay_total, swapin_delay_total } + * + """ + processMap = {} + pidRewrites = {} + ltime = None + timed_blocks = _parse_timed_blocks(file) + for time, lines in timed_blocks: + # we have no 'stime' from taskstats, so prep 'init' + if ltime is None: + process = Process(writer, 1, '[init]', 0, 0) + processMap[1000] = process + ltime = time +# continue + for line in lines: + if not line: continue + tokens = line.split(' ') + if len(tokens) != 6: + continue + + opid, ppid, cmd = int(tokens[0]), int(tokens[1]), tokens[2] + cpu_ns, blkio_delay_ns, swapin_delay_ns = long(tokens[-3]), long(tokens[-2]), long(tokens[-1]), + + # make space for trees of pids + opid *= 1000 + ppid *= 1000 + + # when the process name changes, we re-write the pid. + if opid in pidRewrites: + pid = pidRewrites[opid] + else: + pid = opid + + cmd = cmd.strip('(').strip(')') + if pid in processMap: + process = processMap[pid] + if process.cmd != cmd: + pid += 1 + pidRewrites[opid] = pid +# print "process mutation ! '%s' vs '%s' pid %s -> pid %s\n" % (process.cmd, cmd, opid, pid) + process = process.split (writer, pid, cmd, ppid, time) + processMap[pid] = process + else: + process.cmd = cmd; + else: + process = Process(writer, pid, cmd, ppid, time) + processMap[pid] = process + + delta_cpu_ns = (float) (cpu_ns - process.last_cpu_ns) + delta_blkio_delay_ns = (float) (blkio_delay_ns - process.last_blkio_delay_ns) + delta_swapin_delay_ns = (float) (swapin_delay_ns - process.last_swapin_delay_ns) + + # make up some state data ... + if delta_cpu_ns > 0: + state = "R" + elif delta_blkio_delay_ns + delta_swapin_delay_ns > 0: + state = "D" + else: + state = "S" + + # retain the ns timing information into a CPUSample - that tries + # with the old-style to be a %age of CPU used in this time-slice. + if delta_cpu_ns + delta_blkio_delay_ns + delta_swapin_delay_ns > 0: +# print "proc %s cpu_ns %g delta_cpu %g" % (cmd, cpu_ns, delta_cpu_ns) + cpuSample = CPUSample('null', delta_cpu_ns, 0.0, + delta_blkio_delay_ns, + delta_swapin_delay_ns) + process.samples.append(ProcessSample(time, state, cpuSample)) + + process.last_cpu_ns = cpu_ns + process.last_blkio_delay_ns = blkio_delay_ns + process.last_swapin_delay_ns = swapin_delay_ns + ltime = time + + if len (timed_blocks) < 2: + return None + + startTime = timed_blocks[0][0] + avgSampleLength = (ltime - startTime)/(len(timed_blocks)-1) + + return ProcessStats (writer, processMap, len (timed_blocks), avgSampleLength, startTime, ltime) + def _parse_proc_stat_log(file): - samples = [] - ltimes = None - for time, lines in _parse_timed_blocks(file): - # CPU times {user, nice, system, idle, io_wait, irq, softirq} - tokens = lines[0].split(); - times = [ int(token) for token in tokens[1:] ] - if ltimes: - user = float((times[0] + times[1]) - (ltimes[0] + ltimes[1])) - system = float((times[2] + times[5] + times[6]) - (ltimes[2] + ltimes[5] + ltimes[6])) - idle = float(times[3] - ltimes[3]) - iowait = float(times[4] - ltimes[4]) - - aSum = max(user + system + idle + iowait, 1) - samples.append( CPUSample(time, user/aSum, system/aSum, iowait/aSum) ) - - ltimes = times - # skip the rest of statistics lines - return samples - - + samples = [] + ltimes = None + for time, lines in _parse_timed_blocks(file): + # skip emtpy lines + if not lines: + continue + # CPU times {user, nice, system, idle, io_wait, irq, softirq} + tokens = lines[0].split() + times = [ int(token) for token in tokens[1:] ] + if ltimes: + user = float((times[0] + times[1]) - (ltimes[0] + ltimes[1])) + system = float((times[2] + times[5] + times[6]) - (ltimes[2] + ltimes[5] + ltimes[6])) + idle = float(times[3] - ltimes[3]) + iowait = float(times[4] - ltimes[4]) + + aSum = max(user + system + idle + iowait, 1) + samples.append( CPUSample(time, user/aSum, system/aSum, iowait/aSum) ) + + ltimes = times + # skip the rest of statistics lines + return samples + def _parse_proc_disk_stat_log(file, numCpu): - """ - Parse file for disk stats, but only look at the whole disks, eg. sda, - not sda1, sda2 etc. The format of relevant lines should be: - {major minor name rio rmerge rsect ruse wio wmerge wsect wuse running use aveq} - """ - DISK_REGEX = 'hd.$|sd.$' - - def is_relevant_line(line): - return len(line.split()) == 14 and re.match(DISK_REGEX, line.split()[2]) - - disk_stat_samples = [] - - for time, lines in _parse_timed_blocks(file): - sample = DiskStatSample(time) - relevant_tokens = [line.split() for line in lines if is_relevant_line(line)] - - for tokens in relevant_tokens: - disk, rsect, wsect, use = tokens[2], int(tokens[5]), int(tokens[9]), int(tokens[12]) - sample.add_diskdata([rsect, wsect, use]) - - disk_stat_samples.append(sample) - - disk_stats = [] - for sample1, sample2 in zip(disk_stat_samples[:-1], disk_stat_samples[1:]): - interval = sample1.time - sample2.time - sums = [ a - b for a, b in zip(sample1.diskdata, sample2.diskdata) ] - readTput = sums[0] / 2.0 * 100.0 / interval - writeTput = sums[1] / 2.0 * 100.0 / interval - util = float( sums[2] ) / 10 / interval / numCpu - util = max(0.0, min(1.0, util)) - disk_stats.append(DiskSample(sample2.time, readTput, writeTput, util)) - - return disk_stats - - + """ + Parse file for disk stats, but only look at the whole device, eg. sda, + not sda1, sda2 etc. The format of relevant lines should be: + {major minor name rio rmerge rsect ruse wio wmerge wsect wuse running use aveq} + """ + disk_regex_re = re.compile ('^([hsv]d.|mtdblock\d|mmcblk\d|cciss/c\d+d\d+.*)$') + + # this gets called an awful lot. + def is_relevant_line(linetokens): + if len(linetokens) != 14: + return False + disk = linetokens[2] + return disk_regex_re.match(disk) + + disk_stat_samples = [] + + for time, lines in _parse_timed_blocks(file): + sample = DiskStatSample(time) + relevant_tokens = [linetokens for linetokens in map (lambda x: x.split(),lines) if is_relevant_line(linetokens)] + + for tokens in relevant_tokens: + disk, rsect, wsect, use = tokens[2], int(tokens[5]), int(tokens[9]), int(tokens[12]) + sample.add_diskdata([rsect, wsect, use]) + + disk_stat_samples.append(sample) + + disk_stats = [] + for sample1, sample2 in zip(disk_stat_samples[:-1], disk_stat_samples[1:]): + interval = sample1.time - sample2.time + if interval == 0: + interval = 1 + sums = [ a - b for a, b in zip(sample1.diskdata, sample2.diskdata) ] + readTput = sums[0] / 2.0 * 100.0 / interval + writeTput = sums[1] / 2.0 * 100.0 / interval + util = float( sums[2] ) / 10 / interval / numCpu + util = max(0.0, min(1.0, util)) + disk_stats.append(DiskSample(sample2.time, readTput, writeTput, util)) + + return disk_stats + +def _parse_proc_meminfo_log(file): + """ + Parse file for global memory statistics. + The format of relevant lines should be: ^key: value( unit)? + """ + used_values = ('MemTotal', 'MemFree', 'Buffers', 'Cached', 'SwapTotal', 'SwapFree',) + + mem_stats = [] + meminfo_re = re.compile(r'([^ \t:]+):\s*(\d+).*') + + for time, lines in _parse_timed_blocks(file): + sample = MemSample(time) + + for line in lines: + match = meminfo_re.match(line) + if not match: + raise ParseError("Invalid meminfo line \"%s\"" % match.groups(0)) + sample.add_value(match.group(1), int(match.group(2))) + + if sample.valid(): + mem_stats.append(sample) + + return mem_stats + +# if we boot the kernel with: initcall_debug printk.time=1 we can +# get all manner of interesting data from the dmesg output +# We turn this into a pseudo-process tree: each event is +# characterised by a +# we don't try to detect a "kernel finished" state - since the kernel +# continues to do interesting things after init is called. +# +# sample input: +# [ 0.000000] ACPI: FACP 3f4fc000 000F4 (v04 INTEL Napa 00000001 MSFT 01000013) +# ... +# [ 0.039993] calling migration_init+0x0/0x6b @ 1 +# [ 0.039993] initcall migration_init+0x0/0x6b returned 1 after 0 usecs +def _parse_dmesg(writer, file): + timestamp_re = re.compile ("^\[\s*(\d+\.\d+)\s*]\s+(.*)$") + split_re = re.compile ("^(\S+)\s+([\S\+_-]+) (.*)$") + processMap = {} + idx = 0 + inc = 1.0 / 1000000 + kernel = Process(writer, idx, "k-boot", 0, 0.1) + processMap['k-boot'] = kernel + base_ts = False + max_ts = 0 + for line in file.read().decode('utf-8').split('\n'): + t = timestamp_re.match (line) + if t is None: +# print "duff timestamp " + line + continue + + time_ms = float (t.group(1)) * 1000 + # looks like we may have a huge diff after the clock + # has been set up. This could lead to huge graph: + # so huge we will be killed by the OOM. + # So instead of using the plain timestamp we will + # use a delta to first one and skip the first one + # for convenience + if max_ts == 0 and not base_ts and time_ms > 1000: + base_ts = time_ms + continue + max_ts = max(time_ms, max_ts) + if base_ts: +# print "fscked clock: used %f instead of %f" % (time_ms - base_ts, time_ms) + time_ms -= base_ts + m = split_re.match (t.group(2)) + + if m is None: + continue +# print "match: '%s'" % (m.group(1)) + type = m.group(1) + func = m.group(2) + rest = m.group(3) + + if t.group(2).startswith ('Write protecting the') or \ + t.group(2).startswith ('Freeing unused kernel memory'): + kernel.duration = time_ms / 10 + continue + +# print "foo: '%s' '%s' '%s'" % (type, func, rest) + if type == "calling": + ppid = kernel.pid + p = re.match ("\@ (\d+)", rest) + if p is not None: + ppid = float (p.group(1)) // 1000 +# print "match: '%s' ('%g') at '%s'" % (func, ppid, time_ms) + name = func.split ('+', 1) [0] + idx += inc + processMap[func] = Process(writer, ppid + idx, name, ppid, time_ms / 10) + elif type == "initcall": +# print "finished: '%s' at '%s'" % (func, time_ms) + if func in processMap: + process = processMap[func] + process.duration = (time_ms / 10) - process.start_time + else: + print("corrupted init call for %s" % (func)) + + elif type == "async_waiting" or type == "async_continuing": + continue # ignore + + return processMap.values() + +# +# Parse binary pacct accounting file output if we have one +# cf. /usr/include/linux/acct.h +# +def _parse_pacct(writer, file): + # read LE int32 + def _read_le_int32(file): + byts = file.read(4) + return (ord(byts[0])) | (ord(byts[1]) << 8) | \ + (ord(byts[2]) << 16) | (ord(byts[3]) << 24) + + parent_map = {} + parent_map[0] = 0 + while file.read(1) != "": # ignore flags + ver = file.read(1) + if ord(ver) < 3: + print("Invalid version 0x%x" % (ord(ver))) + return None + + file.seek (14, 1) # user, group etc. + pid = _read_le_int32 (file) + ppid = _read_le_int32 (file) +# print "Parent of %d is %d" % (pid, ppid) + parent_map[pid] = ppid + file.seek (4 + 4 + 16, 1) # timings + file.seek (16, 1) # acct_comm + return parent_map + +def _parse_paternity_log(writer, file): + parent_map = {} + parent_map[0] = 0 + for line in file.read().decode('utf-8').split('\n'): + if not line: + continue + elems = line.split(' ') # + if len (elems) >= 2: +# print "paternity of %d is %d" % (int(elems[0]), int(elems[1])) + parent_map[int(elems[0])] = int(elems[1]) + else: + print("Odd paternity line '%s'" % (line)) + return parent_map + +def _parse_cmdline_log(writer, file): + cmdLines = {} + for block in file.read().decode('utf-8').split('\n\n'): + lines = block.split('\n') + if len (lines) >= 3: +# print "Lines '%s'" % (lines[0]) + pid = int (lines[0]) + values = {} + values['exe'] = lines[1].lstrip(':') + args = lines[2].lstrip(':').split('\0') + args.pop() + values['args'] = args + cmdLines[pid] = values + return cmdLines + def get_num_cpus(headers): """Get the number of CPUs from the system.cpu header property. As the CPU utilization graphs are relative, the number of CPUs currently makes no difference.""" if headers is None: return 1 + if headers.get("system.cpu.num"): + return max (int (headers.get("system.cpu.num")), 1) cpu_model = headers.get("system.cpu") if cpu_model is None: return 1 mat = re.match(".*\\((\\d+)\\)", cpu_model) if mat is None: return 1 - return int(mat.group(1)) - -class ParserState: - def __init__(self): - self.processes = {} - self.start = {} - self.end = {} + return max (int(mat.group(1)), 1) - def valid(self): - return len(self.processes) != 0 - - -_relevant_files = set(["header", "proc_diskstats.log", "proc_ps.log", "proc_stat.log"]) - -def _do_parse(state, filename, file, mintime): - #print filename - #writer.status("parsing '%s'" % filename) +def _do_parse(writer, state, filename, file, mintime): + writer.info("parsing '%s'" % filename) + t1 = clock() paths = filename.split("/") task = paths[-1] pn = paths[-2] @@ -194,44 +654,49 @@ def _do_parse(state, filename, file, mintime): state.end[end] = [] if k not in state.end[end]: state.end[end].append(pn + ":" + task) + t2 = clock() + writer.info(" %s seconds" % str(t2-t1)) return state -def parse_file(state, filename, mintime): +def parse_file(writer, state, filename, mintime): + if state.filename is None: + state.filename = filename basename = os.path.basename(filename) with open(filename, "rb") as file: - return _do_parse(state, filename, file, mintime) + return _do_parse(writer, state, filename, file, mintime) -def parse_paths(state, paths, mintime): +def parse_paths(writer, state, paths, mintime): for path in paths: - root,extension = os.path.splitext(path) + if state.filename is None: + state.filename = path + root, extension = os.path.splitext(path) if not(os.path.exists(path)): - print "warning: path '%s' does not exist, ignoring." % path + writer.warn("warning: path '%s' does not exist, ignoring." % path) continue + #state.filename = path if os.path.isdir(path): files = [ f for f in [os.path.join(path, f) for f in os.listdir(path)] ] files.sort() - state = parse_paths(state, files, mintime) - elif extension in [".tar", ".tgz", ".tar.gz"]: + state = parse_paths(writer, state, files, mintime) + elif extension in [".tar", ".tgz", ".gz"]: + if extension == ".gz": + root, extension = os.path.splitext(root) + if extension != ".tar": + writer.warn("warning: can only handle zipped tar files, not zipped '%s'-files; ignoring" % extension) + continue tf = None try: + writer.status("parsing '%s'" % path) tf = tarfile.open(path, 'r:*') for name in tf.getnames(): - state = _do_parse(state, name, tf.extractfile(name)) - except tarfile.ReadError, error: + state = _do_parse(writer, state, name, tf.extractfile(name)) + except tarfile.ReadError as error: raise ParseError("error: could not read tarfile '%s': %s." % (path, error)) finally: if tf != None: tf.close() else: - state = parse_file(state, path, mintime) - return state - -def parse(paths, prune, mintime): - state = parse_paths(ParserState(), paths, mintime) - if not state.valid(): - raise ParseError("empty state: '%s' does not contain a valid bootchart" % ", ".join(paths)) - #monitored_app = state.headers.get("profile.process") - #proc_tree = ProcessTree(state.ps_stats, monitored_app, prune) + state = parse_file(writer, state, path, mintime) return state def split_res(res, n): -- cgit v1.2.3-54-g00ecf