summaryrefslogtreecommitdiffstats
path: root/scripts/pybootchartgui/pybootchartgui/parsing.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/pybootchartgui/pybootchartgui/parsing.py')
-rw-r--r--scripts/pybootchartgui/pybootchartgui/parsing.py740
1 files changed, 740 insertions, 0 deletions
diff --git a/scripts/pybootchartgui/pybootchartgui/parsing.py b/scripts/pybootchartgui/pybootchartgui/parsing.py
new file mode 100644
index 0000000000..d423b9f77c
--- /dev/null
+++ b/scripts/pybootchartgui/pybootchartgui/parsing.py
@@ -0,0 +1,740 @@
1# This file is part of pybootchartgui.
2
3# pybootchartgui is free software: you can redistribute it and/or modify
4# it under the terms of the GNU General Public License as published by
5# the Free Software Foundation, either version 3 of the License, or
6# (at your option) any later version.
7
8# pybootchartgui is distributed in the hope that it will be useful,
9# but WITHOUT ANY WARRANTY; without even the implied warranty of
10# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11# GNU General Public License for more details.
12
13# You should have received a copy of the GNU General Public License
14# along with pybootchartgui. If not, see <http://www.gnu.org/licenses/>.
15
16
17from __future__ import with_statement
18
19import os
20import string
21import re
22import sys
23import tarfile
24from time import clock
25from collections import defaultdict
26from functools import reduce
27
28from .samples import *
29from .process_tree import ProcessTree
30
31if sys.version_info >= (3, 0):
32 long = int
33
34# Parsing produces as its end result a 'Trace'
35
36class Trace:
37 def __init__(self, writer, paths, options):
38 self.processes = {}
39 self.start = {}
40 self.end = {}
41 self.min = None
42 self.max = None
43 self.headers = None
44 self.disk_stats = None
45 self.ps_stats = None
46 self.taskstats = None
47 self.cpu_stats = None
48 self.cmdline = None
49 self.kernel = None
50 self.kernel_tree = None
51 self.filename = None
52 self.parent_map = None
53 self.mem_stats = None
54
55 if len(paths):
56 parse_paths (writer, self, paths)
57 if not self.valid():
58 raise ParseError("empty state: '%s' does not contain a valid bootchart" % ", ".join(paths))
59
60 if options.full_time:
61 self.min = min(self.start.keys())
62 self.max = max(self.end.keys())
63
64 return
65
66 # Turn that parsed information into something more useful
67 # link processes into a tree of pointers, calculate statistics
68 self.compile(writer)
69
70 # Crop the chart to the end of the first idle period after the given
71 # process
72 if options.crop_after:
73 idle = self.crop (writer, options.crop_after)
74 else:
75 idle = None
76
77 # Annotate other times as the first start point of given process lists
78 self.times = [ idle ]
79 if options.annotate:
80 for procnames in options.annotate:
81 names = [x[:15] for x in procnames.split(",")]
82 for proc in self.ps_stats.process_map.values():
83 if proc.cmd in names:
84 self.times.append(proc.start_time)
85 break
86 else:
87 self.times.append(None)
88
89 self.proc_tree = ProcessTree(writer, self.kernel, self.ps_stats,
90 self.ps_stats.sample_period,
91 self.headers.get("profile.process"),
92 options.prune, idle, self.taskstats,
93 self.parent_map is not None)
94
95 if self.kernel is not None:
96 self.kernel_tree = ProcessTree(writer, self.kernel, None, 0,
97 self.headers.get("profile.process"),
98 False, None, None, True)
99
100 def valid(self):
101 return len(self.processes) != 0
102 return self.headers != None and self.disk_stats != None and \
103 self.ps_stats != None and self.cpu_stats != None
104
105 def add_process(self, process, start, end):
106 self.processes[process] = [start, end]
107 if start not in self.start:
108 self.start[start] = []
109 if process not in self.start[start]:
110 self.start[start].append(process)
111 if end not in self.end:
112 self.end[end] = []
113 if process not in self.end[end]:
114 self.end[end].append(process)
115
116 def compile(self, writer):
117
118 def find_parent_id_for(pid):
119 if pid is 0:
120 return 0
121 ppid = self.parent_map.get(pid)
122 if ppid:
123 # many of these double forks are so short lived
124 # that we have no samples, or process info for them
125 # so climb the parent hierarcy to find one
126 if int (ppid * 1000) not in self.ps_stats.process_map:
127# print "Pid '%d' short lived with no process" % ppid
128 ppid = find_parent_id_for (ppid)
129# else:
130# print "Pid '%d' has an entry" % ppid
131 else:
132# print "Pid '%d' missing from pid map" % pid
133 return 0
134 return ppid
135
136 # merge in the cmdline data
137 if self.cmdline is not None:
138 for proc in self.ps_stats.process_map.values():
139 rpid = int (proc.pid // 1000)
140 if rpid in self.cmdline:
141 cmd = self.cmdline[rpid]
142 proc.exe = cmd['exe']
143 proc.args = cmd['args']
144# else:
145# print "proc %d '%s' not in cmdline" % (rpid, proc.exe)
146
147 # re-parent any stray orphans if we can
148 if self.parent_map is not None:
149 for process in self.ps_stats.process_map.values():
150 ppid = find_parent_id_for (int(process.pid // 1000))
151 if ppid:
152 process.ppid = ppid * 1000
153
154 # stitch the tree together with pointers
155 for process in self.ps_stats.process_map.values():
156 process.set_parent (self.ps_stats.process_map)
157
158 # count on fingers variously
159 for process in self.ps_stats.process_map.values():
160 process.calc_stats (self.ps_stats.sample_period)
161
162 def crop(self, writer, crop_after):
163
164 def is_idle_at(util, start, j):
165 k = j + 1
166 while k < len(util) and util[k][0] < start + 300:
167 k += 1
168 k = min(k, len(util)-1)
169
170 if util[j][1] >= 0.25:
171 return False
172
173 avgload = sum(u[1] for u in util[j:k+1]) / (k-j+1)
174 if avgload < 0.25:
175 return True
176 else:
177 return False
178 def is_idle(util, start):
179 for j in range(0, len(util)):
180 if util[j][0] < start:
181 continue
182 return is_idle_at(util, start, j)
183 else:
184 return False
185
186 names = [x[:15] for x in crop_after.split(",")]
187 for proc in self.ps_stats.process_map.values():
188 if proc.cmd in names or proc.exe in names:
189 writer.info("selected proc '%s' from list (start %d)"
190 % (proc.cmd, proc.start_time))
191 break
192 if proc is None:
193 writer.warn("no selected crop proc '%s' in list" % crop_after)
194
195
196 cpu_util = [(sample.time, sample.user + sample.sys + sample.io) for sample in self.cpu_stats]
197 disk_util = [(sample.time, sample.util) for sample in self.disk_stats]
198
199 idle = None
200 for i in range(0, len(cpu_util)):
201 if cpu_util[i][0] < proc.start_time:
202 continue
203 if is_idle_at(cpu_util, cpu_util[i][0], i) \
204 and is_idle(disk_util, cpu_util[i][0]):
205 idle = cpu_util[i][0]
206 break
207
208 if idle is None:
209 writer.warn ("not idle after proc '%s'" % crop_after)
210 return None
211
212 crop_at = idle + 300
213 writer.info ("cropping at time %d" % crop_at)
214 while len (self.cpu_stats) \
215 and self.cpu_stats[-1].time > crop_at:
216 self.cpu_stats.pop()
217 while len (self.disk_stats) \
218 and self.disk_stats[-1].time > crop_at:
219 self.disk_stats.pop()
220
221 self.ps_stats.end_time = crop_at
222
223 cropped_map = {}
224 for key, value in self.ps_stats.process_map.items():
225 if (value.start_time <= crop_at):
226 cropped_map[key] = value
227
228 for proc in cropped_map.values():
229 proc.duration = min (proc.duration, crop_at - proc.start_time)
230 while len (proc.samples) \
231 and proc.samples[-1].time > crop_at:
232 proc.samples.pop()
233
234 self.ps_stats.process_map = cropped_map
235
236 return idle
237
238
239
240class ParseError(Exception):
241 """Represents errors during parse of the bootchart."""
242 def __init__(self, value):
243 self.value = value
244
245 def __str__(self):
246 return self.value
247
248def _parse_headers(file):
249 """Parses the headers of the bootchart."""
250 def parse(acc, line):
251 (headers, last) = acc
252 if '=' in line:
253 last, value = map (lambda x: x.strip(), line.split('=', 1))
254 else:
255 value = line.strip()
256 headers[last] += value
257 return headers, last
258 return reduce(parse, file.read().decode('utf-8').split('\n'), (defaultdict(str),''))[0]
259
260def _parse_timed_blocks(file):
261 """Parses (ie., splits) a file into so-called timed-blocks. A
262 timed-block consists of a timestamp on a line by itself followed
263 by zero or more lines of data for that point in time."""
264 def parse(block):
265 lines = block.split('\n')
266 if not lines:
267 raise ParseError('expected a timed-block consisting a timestamp followed by data lines')
268 try:
269 return (int(lines[0]), lines[1:])
270 except ValueError:
271 raise ParseError("expected a timed-block, but timestamp '%s' is not an integer" % lines[0])
272 blocks = file.read().decode('utf-8').split('\n\n')
273 return [parse(block) for block in blocks if block.strip() and not block.endswith(' not running\n')]
274
275def _parse_proc_ps_log(writer, file):
276 """
277 * See proc(5) for details.
278 *
279 * {pid, comm, state, ppid, pgrp, session, tty_nr, tpgid, flags, minflt, cminflt, majflt, cmajflt, utime, stime,
280 * cutime, cstime, priority, nice, 0, itrealvalue, starttime, vsize, rss, rlim, startcode, endcode, startstack,
281 * kstkesp, kstkeip}
282 """
283 processMap = {}
284 ltime = 0
285 timed_blocks = _parse_timed_blocks(file)
286 for time, lines in timed_blocks:
287 for line in lines:
288 if not line: continue
289 tokens = line.split(' ')
290 if len(tokens) < 21:
291 continue
292
293 offset = [index for index, token in enumerate(tokens[1:]) if token[-1] == ')'][0]
294 pid, cmd, state, ppid = int(tokens[0]), ' '.join(tokens[1:2+offset]), tokens[2+offset], int(tokens[3+offset])
295 userCpu, sysCpu, stime = int(tokens[13+offset]), int(tokens[14+offset]), int(tokens[21+offset])
296
297 # magic fixed point-ness ...
298 pid *= 1000
299 ppid *= 1000
300 if pid in processMap:
301 process = processMap[pid]
302 process.cmd = cmd.strip('()') # why rename after latest name??
303 else:
304 process = Process(writer, pid, cmd.strip('()'), ppid, min(time, stime))
305 processMap[pid] = process
306
307 if process.last_user_cpu_time is not None and process.last_sys_cpu_time is not None and ltime is not None:
308 userCpuLoad, sysCpuLoad = process.calc_load(userCpu, sysCpu, max(1, time - ltime))
309 cpuSample = CPUSample('null', userCpuLoad, sysCpuLoad, 0.0)
310 process.samples.append(ProcessSample(time, state, cpuSample))
311
312 process.last_user_cpu_time = userCpu
313 process.last_sys_cpu_time = sysCpu
314 ltime = time
315
316 if len (timed_blocks) < 2:
317 return None
318
319 startTime = timed_blocks[0][0]
320 avgSampleLength = (ltime - startTime)/(len (timed_blocks) - 1)
321
322 return ProcessStats (writer, processMap, len (timed_blocks), avgSampleLength, startTime, ltime)
323
324def _parse_taskstats_log(writer, file):
325 """
326 * See bootchart-collector.c for details.
327 *
328 * { pid, ppid, comm, cpu_run_real_total, blkio_delay_total, swapin_delay_total }
329 *
330 """
331 processMap = {}
332 pidRewrites = {}
333 ltime = None
334 timed_blocks = _parse_timed_blocks(file)
335 for time, lines in timed_blocks:
336 # we have no 'stime' from taskstats, so prep 'init'
337 if ltime is None:
338 process = Process(writer, 1, '[init]', 0, 0)
339 processMap[1000] = process
340 ltime = time
341# continue
342 for line in lines:
343 if not line: continue
344 tokens = line.split(' ')
345 if len(tokens) != 6:
346 continue
347
348 opid, ppid, cmd = int(tokens[0]), int(tokens[1]), tokens[2]
349 cpu_ns, blkio_delay_ns, swapin_delay_ns = long(tokens[-3]), long(tokens[-2]), long(tokens[-1]),
350
351 # make space for trees of pids
352 opid *= 1000
353 ppid *= 1000
354
355 # when the process name changes, we re-write the pid.
356 if opid in pidRewrites:
357 pid = pidRewrites[opid]
358 else:
359 pid = opid
360
361 cmd = cmd.strip('(').strip(')')
362 if pid in processMap:
363 process = processMap[pid]
364 if process.cmd != cmd:
365 pid += 1
366 pidRewrites[opid] = pid
367# print "process mutation ! '%s' vs '%s' pid %s -> pid %s\n" % (process.cmd, cmd, opid, pid)
368 process = process.split (writer, pid, cmd, ppid, time)
369 processMap[pid] = process
370 else:
371 process.cmd = cmd;
372 else:
373 process = Process(writer, pid, cmd, ppid, time)
374 processMap[pid] = process
375
376 delta_cpu_ns = (float) (cpu_ns - process.last_cpu_ns)
377 delta_blkio_delay_ns = (float) (blkio_delay_ns - process.last_blkio_delay_ns)
378 delta_swapin_delay_ns = (float) (swapin_delay_ns - process.last_swapin_delay_ns)
379
380 # make up some state data ...
381 if delta_cpu_ns > 0:
382 state = "R"
383 elif delta_blkio_delay_ns + delta_swapin_delay_ns > 0:
384 state = "D"
385 else:
386 state = "S"
387
388 # retain the ns timing information into a CPUSample - that tries
389 # with the old-style to be a %age of CPU used in this time-slice.
390 if delta_cpu_ns + delta_blkio_delay_ns + delta_swapin_delay_ns > 0:
391# print "proc %s cpu_ns %g delta_cpu %g" % (cmd, cpu_ns, delta_cpu_ns)
392 cpuSample = CPUSample('null', delta_cpu_ns, 0.0,
393 delta_blkio_delay_ns,
394 delta_swapin_delay_ns)
395 process.samples.append(ProcessSample(time, state, cpuSample))
396
397 process.last_cpu_ns = cpu_ns
398 process.last_blkio_delay_ns = blkio_delay_ns
399 process.last_swapin_delay_ns = swapin_delay_ns
400 ltime = time
401
402 if len (timed_blocks) < 2:
403 return None
404
405 startTime = timed_blocks[0][0]
406 avgSampleLength = (ltime - startTime)/(len(timed_blocks)-1)
407
408 return ProcessStats (writer, processMap, len (timed_blocks), avgSampleLength, startTime, ltime)
409
410def _parse_proc_stat_log(file):
411 samples = []
412 ltimes = None
413 for time, lines in _parse_timed_blocks(file):
414 # skip emtpy lines
415 if not lines:
416 continue
417 # CPU times {user, nice, system, idle, io_wait, irq, softirq}
418 tokens = lines[0].split()
419 times = [ int(token) for token in tokens[1:] ]
420 if ltimes:
421 user = float((times[0] + times[1]) - (ltimes[0] + ltimes[1]))
422 system = float((times[2] + times[5] + times[6]) - (ltimes[2] + ltimes[5] + ltimes[6]))
423 idle = float(times[3] - ltimes[3])
424 iowait = float(times[4] - ltimes[4])
425
426 aSum = max(user + system + idle + iowait, 1)
427 samples.append( CPUSample(time, user/aSum, system/aSum, iowait/aSum) )
428
429 ltimes = times
430 # skip the rest of statistics lines
431 return samples
432
433def _parse_proc_disk_stat_log(file, numCpu):
434 """
435 Parse file for disk stats, but only look at the whole device, eg. sda,
436 not sda1, sda2 etc. The format of relevant lines should be:
437 {major minor name rio rmerge rsect ruse wio wmerge wsect wuse running use aveq}
438 """
439 disk_regex_re = re.compile ('^([hsv]d.|mtdblock\d|mmcblk\d|cciss/c\d+d\d+.*)$')
440
441 # this gets called an awful lot.
442 def is_relevant_line(linetokens):
443 if len(linetokens) != 14:
444 return False
445 disk = linetokens[2]
446 return disk_regex_re.match(disk)
447
448 disk_stat_samples = []
449
450 for time, lines in _parse_timed_blocks(file):
451 sample = DiskStatSample(time)
452 relevant_tokens = [linetokens for linetokens in map (lambda x: x.split(),lines) if is_relevant_line(linetokens)]
453
454 for tokens in relevant_tokens:
455 disk, rsect, wsect, use = tokens[2], int(tokens[5]), int(tokens[9]), int(tokens[12])
456 sample.add_diskdata([rsect, wsect, use])
457
458 disk_stat_samples.append(sample)
459
460 disk_stats = []
461 for sample1, sample2 in zip(disk_stat_samples[:-1], disk_stat_samples[1:]):
462 interval = sample1.time - sample2.time
463 if interval == 0:
464 interval = 1
465 sums = [ a - b for a, b in zip(sample1.diskdata, sample2.diskdata) ]
466 readTput = sums[0] / 2.0 * 100.0 / interval
467 writeTput = sums[1] / 2.0 * 100.0 / interval
468 util = float( sums[2] ) / 10 / interval / numCpu
469 util = max(0.0, min(1.0, util))
470 disk_stats.append(DiskSample(sample2.time, readTput, writeTput, util))
471
472 return disk_stats
473
474def _parse_proc_meminfo_log(file):
475 """
476 Parse file for global memory statistics.
477 The format of relevant lines should be: ^key: value( unit)?
478 """
479 used_values = ('MemTotal', 'MemFree', 'Buffers', 'Cached', 'SwapTotal', 'SwapFree',)
480
481 mem_stats = []
482 meminfo_re = re.compile(r'([^ \t:]+):\s*(\d+).*')
483
484 for time, lines in _parse_timed_blocks(file):
485 sample = MemSample(time)
486
487 for line in lines:
488 match = meminfo_re.match(line)
489 if not match:
490 raise ParseError("Invalid meminfo line \"%s\"" % match.groups(0))
491 sample.add_value(match.group(1), int(match.group(2)))
492
493 if sample.valid():
494 mem_stats.append(sample)
495
496 return mem_stats
497
498# if we boot the kernel with: initcall_debug printk.time=1 we can
499# get all manner of interesting data from the dmesg output
500# We turn this into a pseudo-process tree: each event is
501# characterised by a
502# we don't try to detect a "kernel finished" state - since the kernel
503# continues to do interesting things after init is called.
504#
505# sample input:
506# [ 0.000000] ACPI: FACP 3f4fc000 000F4 (v04 INTEL Napa 00000001 MSFT 01000013)
507# ...
508# [ 0.039993] calling migration_init+0x0/0x6b @ 1
509# [ 0.039993] initcall migration_init+0x0/0x6b returned 1 after 0 usecs
510def _parse_dmesg(writer, file):
511 timestamp_re = re.compile ("^\[\s*(\d+\.\d+)\s*]\s+(.*)$")
512 split_re = re.compile ("^(\S+)\s+([\S\+_-]+) (.*)$")
513 processMap = {}
514 idx = 0
515 inc = 1.0 / 1000000
516 kernel = Process(writer, idx, "k-boot", 0, 0.1)
517 processMap['k-boot'] = kernel
518 base_ts = False
519 max_ts = 0
520 for line in file.read().decode('utf-8').split('\n'):
521 t = timestamp_re.match (line)
522 if t is None:
523# print "duff timestamp " + line
524 continue
525
526 time_ms = float (t.group(1)) * 1000
527 # looks like we may have a huge diff after the clock
528 # has been set up. This could lead to huge graph:
529 # so huge we will be killed by the OOM.
530 # So instead of using the plain timestamp we will
531 # use a delta to first one and skip the first one
532 # for convenience
533 if max_ts == 0 and not base_ts and time_ms > 1000:
534 base_ts = time_ms
535 continue
536 max_ts = max(time_ms, max_ts)
537 if base_ts:
538# print "fscked clock: used %f instead of %f" % (time_ms - base_ts, time_ms)
539 time_ms -= base_ts
540 m = split_re.match (t.group(2))
541
542 if m is None:
543 continue
544# print "match: '%s'" % (m.group(1))
545 type = m.group(1)
546 func = m.group(2)
547 rest = m.group(3)
548
549 if t.group(2).startswith ('Write protecting the') or \
550 t.group(2).startswith ('Freeing unused kernel memory'):
551 kernel.duration = time_ms / 10
552 continue
553
554# print "foo: '%s' '%s' '%s'" % (type, func, rest)
555 if type == "calling":
556 ppid = kernel.pid
557 p = re.match ("\@ (\d+)", rest)
558 if p is not None:
559 ppid = float (p.group(1)) // 1000
560# print "match: '%s' ('%g') at '%s'" % (func, ppid, time_ms)
561 name = func.split ('+', 1) [0]
562 idx += inc
563 processMap[func] = Process(writer, ppid + idx, name, ppid, time_ms / 10)
564 elif type == "initcall":
565# print "finished: '%s' at '%s'" % (func, time_ms)
566 if func in processMap:
567 process = processMap[func]
568 process.duration = (time_ms / 10) - process.start_time
569 else:
570 print("corrupted init call for %s" % (func))
571
572 elif type == "async_waiting" or type == "async_continuing":
573 continue # ignore
574
575 return processMap.values()
576
577#
578# Parse binary pacct accounting file output if we have one
579# cf. /usr/include/linux/acct.h
580#
581def _parse_pacct(writer, file):
582 # read LE int32
583 def _read_le_int32(file):
584 byts = file.read(4)
585 return (ord(byts[0])) | (ord(byts[1]) << 8) | \
586 (ord(byts[2]) << 16) | (ord(byts[3]) << 24)
587
588 parent_map = {}
589 parent_map[0] = 0
590 while file.read(1) != "": # ignore flags
591 ver = file.read(1)
592 if ord(ver) < 3:
593 print("Invalid version 0x%x" % (ord(ver)))
594 return None
595
596 file.seek (14, 1) # user, group etc.
597 pid = _read_le_int32 (file)
598 ppid = _read_le_int32 (file)
599# print "Parent of %d is %d" % (pid, ppid)
600 parent_map[pid] = ppid
601 file.seek (4 + 4 + 16, 1) # timings
602 file.seek (16, 1) # acct_comm
603 return parent_map
604
605def _parse_paternity_log(writer, file):
606 parent_map = {}
607 parent_map[0] = 0
608 for line in file.read().decode('utf-8').split('\n'):
609 if not line:
610 continue
611 elems = line.split(' ') # <Child> <Parent>
612 if len (elems) >= 2:
613# print "paternity of %d is %d" % (int(elems[0]), int(elems[1]))
614 parent_map[int(elems[0])] = int(elems[1])
615 else:
616 print("Odd paternity line '%s'" % (line))
617 return parent_map
618
619def _parse_cmdline_log(writer, file):
620 cmdLines = {}
621 for block in file.read().decode('utf-8').split('\n\n'):
622 lines = block.split('\n')
623 if len (lines) >= 3:
624# print "Lines '%s'" % (lines[0])
625 pid = int (lines[0])
626 values = {}
627 values['exe'] = lines[1].lstrip(':')
628 args = lines[2].lstrip(':').split('\0')
629 args.pop()
630 values['args'] = args
631 cmdLines[pid] = values
632 return cmdLines
633
634def get_num_cpus(headers):
635 """Get the number of CPUs from the system.cpu header property. As the
636 CPU utilization graphs are relative, the number of CPUs currently makes
637 no difference."""
638 if headers is None:
639 return 1
640 if headers.get("system.cpu.num"):
641 return max (int (headers.get("system.cpu.num")), 1)
642 cpu_model = headers.get("system.cpu")
643 if cpu_model is None:
644 return 1
645 mat = re.match(".*\\((\\d+)\\)", cpu_model)
646 if mat is None:
647 return 1
648 return max (int(mat.group(1)), 1)
649
650def _do_parse(writer, state, filename, file):
651 writer.info("parsing '%s'" % filename)
652 t1 = clock()
653 paths = filename.split("/")
654 task = paths[-1]
655 pn = paths[-2]
656 start = None
657 end = None
658 for line in file:
659 if line.startswith("Started:"):
660 start = int(float(line.split()[-1]))
661 elif line.startswith("Ended:"):
662 end = int(float(line.split()[-1]))
663 if start and end:
664 state.add_process(pn + ":" + task, start, end)
665 t2 = clock()
666 writer.info(" %s seconds" % str(t2-t1))
667 return state
668
669def parse_file(writer, state, filename):
670 if state.filename is None:
671 state.filename = filename
672 basename = os.path.basename(filename)
673 with open(filename, "rb") as file:
674 return _do_parse(writer, state, filename, file)
675
676def parse_paths(writer, state, paths):
677 for path in paths:
678 if state.filename is None:
679 state.filename = path
680 root, extension = os.path.splitext(path)
681 if not(os.path.exists(path)):
682 writer.warn("warning: path '%s' does not exist, ignoring." % path)
683 continue
684 #state.filename = path
685 if os.path.isdir(path):
686 files = sorted([os.path.join(path, f) for f in os.listdir(path)])
687 state = parse_paths(writer, state, files)
688 elif extension in [".tar", ".tgz", ".gz"]:
689 if extension == ".gz":
690 root, extension = os.path.splitext(root)
691 if extension != ".tar":
692 writer.warn("warning: can only handle zipped tar files, not zipped '%s'-files; ignoring" % extension)
693 continue
694 tf = None
695 try:
696 writer.status("parsing '%s'" % path)
697 tf = tarfile.open(path, 'r:*')
698 for name in tf.getnames():
699 state = _do_parse(writer, state, name, tf.extractfile(name))
700 except tarfile.ReadError as error:
701 raise ParseError("error: could not read tarfile '%s': %s." % (path, error))
702 finally:
703 if tf != None:
704 tf.close()
705 else:
706 state = parse_file(writer, state, path)
707 return state
708
709def split_res(res, options):
710 """ Split the res into n pieces """
711 res_list = []
712 if options.num > 1:
713 s_list = sorted(res.start.keys())
714 frag_size = len(s_list) / float(options.num)
715 # Need the top value
716 if frag_size > int(frag_size):
717 frag_size = int(frag_size + 1)
718 else:
719 frag_size = int(frag_size)
720
721 start = 0
722 end = frag_size
723 while start < end:
724 state = Trace(None, [], None)
725 if options.full_time:
726 state.min = min(res.start.keys())
727 state.max = max(res.end.keys())
728 for i in range(start, end):
729 # Add this line for reference
730 #state.add_process(pn + ":" + task, start, end)
731 for p in res.start[s_list[i]]:
732 state.add_process(p, s_list[i], res.processes[p][1])
733 start = end
734 end = end + frag_size
735 if end > len(s_list):
736 end = len(s_list)
737 res_list.append(state)
738 else:
739 res_list.append(res)
740 return res_list