summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarkus Lehtonen <markus.lehtonen@linux.intel.com>2016-09-14 14:24:42 +0300
committerRichard Purdie <richard.purdie@linuxfoundation.org>2016-09-16 15:24:03 +0100
commit12cd9c01fc8127069386da0d1728bf1ff17f8db1 (patch)
tree500ca034922f3d30178f434c92c0caa0fcc112d2
parent06e56abbd25fe66b287e0bcd78787588939a3028 (diff)
downloadpoky-12cd9c01fc8127069386da0d1728bf1ff17f8db1.tar.gz
scripts: introduce buildstats-diff
New script for comparing buildstats from two separate builds. The script has two modes: normally it prints the differences in task execution (cpu) times but using --ver-diff option makes it just print the recipe version differences without any cpu time data. Other command line options are provided to alter the sort criteria of the data and to filter out insignificant differences and/or short tasks. (From OE-Core rev: e707718374ce1c95769a5f99aa3bfdfc0be685b2) Signed-off-by: Markus Lehtonen <markus.lehtonen@linux.intel.com> Signed-off-by: Ross Burton <ross.burton@intel.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
-rwxr-xr-xscripts/buildstats-diff411
1 files changed, 411 insertions, 0 deletions
diff --git a/scripts/buildstats-diff b/scripts/buildstats-diff
new file mode 100755
index 0000000000..8ee2aaf626
--- /dev/null
+++ b/scripts/buildstats-diff
@@ -0,0 +1,411 @@
1#!/usr/bin/python3
2#
3# Script for comparing buildstats from two different builds
4#
5# Copyright (c) 2016, Intel Corporation.
6#
7# This program is free software; you can redistribute it and/or modify it
8# under the terms and conditions of the GNU General Public License,
9# version 2, as published by the Free Software Foundation.
10#
11# This program is distributed in the hope it will be useful, but WITHOUT
12# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14# more details.
15#
16import argparse
17import glob
18import json
19import logging
20import os
21import re
22import sys
23from collections import namedtuple
24from datetime import datetime, timedelta, tzinfo
25from operator import attrgetter
26
27# Setup logging
28logging.basicConfig(level=logging.INFO)
29log = logging.getLogger()
30
31
32class TimeZone(tzinfo):
33 """Simple fixed-offset tzinfo"""
34 def __init__(self, seconds, name):
35 self._offset = timedelta(seconds=seconds)
36 self._name = name
37
38 def utcoffset(self, dt):
39 return self._offset
40
41 def tzname(self, dt):
42 return self._name
43
44 def dst(self, dt):
45 return None
46
47TIMEZONES = {'UTC': TimeZone(0, 'UTC'),
48 'EET': TimeZone(7200, 'EET'),
49 'EEST': TimeZone(10800, 'EEST')}
50
51
52taskdiff_fields = ('pkg', 'pkg_op', 'task', 'task_op', 'cputime1', 'cputime2',
53 'absdiff', 'reldiff')
54TaskDiff = namedtuple('TaskDiff', ' '.join(taskdiff_fields))
55
56
57def to_datetime_obj(obj):
58 """Helper for getting timestamps in datetime format"""
59 if isinstance(obj, datetime):
60 return obj
61 else:
62 return datetime.utcfromtimestamp(obj).replace(tzinfo=TIMEZONES['UTC'])
63
64
65def read_buildstats_file(buildstat_file):
66 """Convert buildstat text file into dict/json"""
67 bs_json = {'iostat': {},
68 'rusage': {},
69 'child_rusage': {}}
70 log.debug("Reading task buildstats from %s", buildstat_file)
71 with open(buildstat_file) as fobj:
72 for line in fobj.readlines():
73 key, val = line.split(':', 1)
74 val = val.strip()
75 if key == 'Started':
76 start_time = to_datetime_obj(float(val))
77 bs_json['start_time'] = start_time
78 elif key == 'Ended':
79 end_time = to_datetime_obj(float(val))
80 elif key.startswith('IO '):
81 split = key.split()
82 bs_json['iostat'][split[1]] = int(val)
83 elif key.find('rusage') >= 0:
84 split = key.split()
85 ru_key = split[-1]
86 if ru_key in ('ru_stime', 'ru_utime'):
87 val = float(val)
88 else:
89 val = int(val)
90 ru_type = 'rusage' if split[0] == 'rusage' else \
91 'child_rusage'
92 bs_json[ru_type][ru_key] = val
93 elif key == 'Status':
94 bs_json['status'] = val
95 bs_json['elapsed_time'] = end_time - start_time
96 return bs_json
97
98
99def read_buildstats_dir(bs_dir):
100 """Read buildstats directory"""
101 def split_nevr(nevr):
102 """Split name and version information from recipe "nevr" string"""
103 n_e_v, revision = nevr.rsplit('-', 1)
104 match = re.match(r'^(?P<name>\S+)-((?P<epoch>[0-9]{1,5})_)?(?P<version>[0-9]\S*)$',
105 n_e_v)
106 if not match:
107 # If we're not able to parse a version starting with a number, just
108 # take the part after last dash
109 match = re.match(r'^(?P<name>\S+)-((?P<epoch>[0-9]{1,5})_)?(?P<version>[^-]+)$',
110 n_e_v)
111 name = match.group('name')
112 version = match.group('version')
113 epoch = match.group('epoch')
114 return name, epoch, version, revision
115
116 if os.path.isfile(os.path.join(bs_dir, 'build_stats')):
117 top_dir = bs_dir
118 else:
119 subdirs = sorted(glob.glob(bs_dir + '/*'))
120 if len(subdirs) > 1:
121 log.warning("Multiple buildstats found, using the first one")
122 top_dir = subdirs[0]
123 log.debug("Reading buildstats directory %s", top_dir)
124 subdirs = os.listdir(top_dir)
125
126 # Handle "old" style directory structure
127 if len(subdirs) == 1 and re.match('^20[0-9]{12}$', subdirs[0]):
128 top_dir = os.path.join(top_dir, subdirs[0])
129 subdirs = os.listdir(top_dir)
130
131 buildstats = {}
132 for dirname in subdirs:
133 recipe_dir = os.path.join(top_dir, dirname)
134 if not os.path.isdir(recipe_dir):
135 continue
136 name, epoch, version, revision = split_nevr(dirname)
137 recipe_bs = {'nevr': dirname,
138 'name': name,
139 'epoch': epoch,
140 'version': version,
141 'revision': revision,
142 'tasks': {}}
143 for task in os.listdir(recipe_dir):
144 recipe_bs['tasks'][task] = read_buildstats_file(
145 os.path.join(recipe_dir, task))
146 if name in buildstats:
147 log.error("Cannot handle multiple versions of the same package (%s)",
148 name)
149 sys.exit(1)
150 buildstats[name] = recipe_bs
151
152 return buildstats
153
154
155def read_buildstats_json(path):
156 """Read buildstats from JSON file"""
157 buildstats = {}
158 with open(path) as fobj:
159 bs_json = json.load(fobj)
160 for recipe_bs in bs_json:
161 if recipe_bs['name'] in buildstats:
162 log.error("Cannot handle multiple versions of the same package (%s)",
163 recipe_bs['name'])
164 sys.exit(1)
165
166 if recipe_bs['epoch'] is None:
167 recipe_bs['nevr'] = "{}-{}-{}".format(recipe_bs['name'], recipe_bs['version'], recipe_bs['revision'])
168 else:
169 recipe_bs['nevr'] = "{}-{}_{}-{}".format(recipe_bs['name'], recipe_bs['epoch'], recipe_bs['version'], recipe_bs['revision'])
170 buildstats[recipe_bs['name']] = recipe_bs
171
172 return buildstats
173
174
175def read_buildstats(path):
176 """Read buildstats"""
177 if os.path.isfile(path):
178 return read_buildstats_json(path)
179 else:
180 return read_buildstats_dir(path)
181
182
183def print_ver_diff(bs1, bs2):
184 """Print package version differences"""
185 pkgs1 = set(bs1.keys())
186 pkgs2 = set(bs2.keys())
187 new_pkgs = pkgs2 - pkgs1
188 deleted_pkgs = pkgs1 - pkgs2
189
190 echanged = []
191 vchanged = []
192 rchanged = []
193 unchanged = []
194 common_pkgs = pkgs2.intersection(pkgs1)
195 if common_pkgs:
196 for pkg in common_pkgs:
197 if bs1[pkg]['epoch'] != bs2[pkg]['epoch']:
198 echanged.append(pkg)
199 elif bs1[pkg]['version'] != bs2[pkg]['version']:
200 vchanged.append(pkg)
201 elif bs1[pkg]['revision'] != bs2[pkg]['revision']:
202 rchanged.append(pkg)
203 else:
204 unchanged.append(pkg)
205
206 maxlen = max([len(pkg) for pkg in pkgs1.union(pkgs2)])
207 fmt_str = " {:{maxlen}} ({})"
208# if unchanged:
209# print("\nUNCHANGED PACKAGES:")
210# print("-------------------")
211# maxlen = max([len(pkg) for pkg in unchanged])
212# for pkg in sorted(unchanged):
213# print(fmt_str.format(pkg, bs2[pkg]['nevr'], maxlen=maxlen))
214
215 if new_pkgs:
216 print("\nNEW PACKAGES:")
217 print("-------------")
218 for pkg in sorted(new_pkgs):
219 print(fmt_str.format(pkg, bs2[pkg]['nevr'], maxlen=maxlen))
220
221 if deleted_pkgs:
222 print("\nDELETED PACKAGES:")
223 print("-----------------")
224 for pkg in sorted(deleted_pkgs):
225 print(fmt_str.format(pkg, bs1[pkg]['nevr'], maxlen=maxlen))
226
227 fmt_str = " {0:{maxlen}} {1:<20} ({2})"
228 if rchanged:
229 print("\nREVISION CHANGED:")
230 print("-----------------")
231 for pkg in sorted(rchanged):
232 field1 = "{} -> {}".format(pkg, bs1[pkg]['revision'], bs2[pkg]['revision'])
233 field2 = "{} -> {}".format(bs1[pkg]['nevr'], bs2[pkg]['nevr'])
234 print(fmt_str.format(pkg, field1, field2, maxlen=maxlen))
235
236 if vchanged:
237 print("\nVERSION CHANGED:")
238 print("----------------")
239 for pkg in sorted(vchanged):
240 field1 = "{} -> {}".format(bs1[pkg]['version'], bs2[pkg]['version'])
241 field2 = "{} -> {}".format(bs1[pkg]['nevr'], bs2[pkg]['nevr'])
242 print(fmt_str.format(pkg, field1, field2, maxlen=maxlen))
243
244 if echanged:
245 print("\nEPOCH CHANGED:")
246 print("--------------")
247 for pkg in sorted(echanged):
248 field1 = "{} -> {}".format(pkg, bs1[pkg]['epoch'], bs2[pkg]['epoch'])
249 field2 = "{} -> {}".format(bs1[pkg]['nevr'], bs2[pkg]['nevr'])
250 print(fmt_str.format(pkg, field1, field2, maxlen=maxlen))
251
252
253def task_time(task):
254 """Calculate sum of user and system time taken by a task"""
255 cputime = task['rusage']['ru_stime'] + task['rusage']['ru_utime'] + \
256 task['child_rusage']['ru_stime'] + task['child_rusage']['ru_utime']
257 return cputime
258
259
260def print_task_diff(bs1, bs2, min_cputime=0, min_timediff=0, sort_by=('absdiff',)):
261 """Diff task execution times"""
262 tasks_diff = []
263 pkg_maxlen = 0
264 task_maxlen = 0
265
266 pkgs = set(bs1.keys()).union(set(bs2.keys()))
267 for pkg in pkgs:
268 if len(pkg) > pkg_maxlen:
269 pkg_maxlen = len(pkg)
270 tasks1 = bs1[pkg]['tasks'] if pkg in bs1 else {}
271 tasks2 = bs2[pkg]['tasks'] if pkg in bs2 else {}
272 if not tasks1:
273 pkg_op = '+ '
274 elif not tasks2:
275 pkg_op = '- '
276 else:
277 pkg_op = ' '
278
279 for task in set(tasks1.keys()).union(set(tasks2.keys())):
280 if len(task) > task_maxlen:
281 task_maxlen = len(task)
282
283 t1 = task_time(bs1[pkg]['tasks'][task]) if task in tasks1 else 0
284 t2 = task_time(bs2[pkg]['tasks'][task]) if task in tasks2 else 0
285 task_op = ' '
286 if t1 == 0:
287 reldiff = float('inf')
288 task_op = '+ '
289 else:
290 reldiff = 100 * (t2 - t1) / t1
291 if t2 == 0:
292 task_op = '- '
293
294 tasks_diff.append(TaskDiff(pkg, pkg_op, task, task_op, t1, t2, t2-t1, reldiff))
295
296 if min_cputime:
297 print("Ignoring tasks shorter than {}s".format(min_cputime))
298 if min_timediff:
299 print("Ignoring time differences shorter than {}s".format(min_timediff))
300
301 print()
302 print(" {:{pkg_maxlen}} {:{task_maxlen}} {:>8} {:>10} {:>10} {}".format(
303 'PKG', 'TASK', 'ABSDIFF', 'RELDIFF', 'CPUTIME1', 'CPUTIME2',
304 pkg_maxlen=pkg_maxlen, task_maxlen=task_maxlen))
305
306 # Sort our list
307 for field in reversed(sort_by):
308 if field.startswith('-'):
309 field = field[1:]
310 reverse = True
311 else:
312 reverse = False
313 tasks_diff = sorted(tasks_diff, key=attrgetter(field), reverse=reverse)
314
315 for diff in tasks_diff:
316 cputime = max(diff.cputime1, diff.cputime2)
317 if cputime > min_cputime:
318 if abs(diff.absdiff) > min_timediff:
319 task_prefix = diff.task_op if diff.pkg_op == ' ' else ' '
320 print("{}{:{pkg_maxlen}} {}{:{task_maxlen}} {:+7.1f}s {:+9.1f}% {:9.1f}s -> {:.1f}s".format(
321 diff.pkg_op, diff.pkg, task_prefix, diff.task, diff.absdiff, diff.reldiff, diff.cputime1, diff.cputime2,
322 pkg_maxlen=pkg_maxlen, task_maxlen=task_maxlen))
323 else:
324 log.debug("Filtering out %s (difference of %0.1fs)", task, diff.absdiff)
325 else:
326 log.debug("Filtering out %s (%0.1fs)", task, cputime)
327
328
329def print_timediff_summary(bs1, bs2):
330 """Print summary of the timediffs"""
331 def total_cputime(buildstats):
332 sum = 0.0
333 for recipe_data in buildstats.values():
334 for task_data in recipe_data['tasks'].values():
335 sum += task_time(task_data)
336 return sum
337
338 def hms_time(secs):
339 """Get time in human-readable HH:MM:SS format"""
340 h = int(secs / 3600)
341 m = int((secs % 3600) / 60)
342 s = secs % 60
343 if h == 0:
344 return "{:02d}:{:04.1f}".format(m, s)
345 else:
346 return "{:d}:{:02d}:{:04.1f}".format(h, m, s)
347
348 total1 = total_cputime(bs1)
349 total2 = total_cputime(bs2)
350 print("\nCumulative CPU Time:")
351 print (" {:+.1f}s {:+.1f}% {} ({:.1f}s) -> {} ({:.1f}s)".format(
352 total2 - total1, 100 * (total2-total1) / total1,
353 hms_time(total1), total1, hms_time(total2), total2))
354
355
356
357def parse_args(argv):
358 """Parse cmdline arguments"""
359 description="""
360Script for comparing buildstats of two separate builds."""
361 parser = argparse.ArgumentParser(
362 formatter_class=argparse.ArgumentDefaultsHelpFormatter,
363 description=description)
364
365 parser.add_argument('--debug', '-d', action='store_true',
366 help="Verbose logging")
367 parser.add_argument('--ver-diff', action='store_true',
368 help="Show package version differences and exit")
369 parser.add_argument('--min-time', default=3.0, type=float,
370 help="Filter out tasks shorter than MIN_TIME seconds")
371 parser.add_argument('--min-timediff', default=1.0, type=float,
372 help="Filter out tasks whose difference in cputime is "
373 "less that MIN_TIMEDIFF seconds")
374 parser.add_argument('--sort-by', default='absdiff',
375 help="Comma-separated list of field sort order. "
376 "Prepend the field name with '-' for reversed sort. "
377 "Available fields are: {}".format(', '.join(taskdiff_fields)))
378 parser.add_argument('buildstats1', metavar='BUILDSTATS1', help="'Left' buildstat")
379 parser.add_argument('buildstats2', metavar='BUILDSTATS2', help="'Right' buildstat")
380
381 return parser.parse_args(argv)
382
383def main(argv=None):
384 """Script entry point"""
385 args = parse_args(argv)
386 if args.debug:
387 log.setLevel(logging.DEBUG)
388
389 # Validate sort fields
390 sort_by = []
391 for field in args.sort_by.split(','):
392 if field.lstrip('-') not in taskdiff_fields:
393 log.error("Invalid sort field '%s' (must be one of: %s)" %
394 (field, ', '.join(taskdiff_fields)))
395 sys.exit(1)
396 sort_by.append(field)
397
398
399 bs1 = read_buildstats(args.buildstats1)
400 bs2 = read_buildstats(args.buildstats2)
401
402 if args.ver_diff:
403 print_ver_diff(bs1, bs2)
404 else:
405 print_task_diff(bs1, bs2, args.min_time, args.min_timediff, sort_by)
406 print_timediff_summary(bs1, bs2)
407
408 return 0
409
410if __name__ == "__main__":
411 sys.exit(main())