summaryrefslogtreecommitdiffstats
path: root/scripts/buildstats-diff
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/buildstats-diff')
-rwxr-xr-xscripts/buildstats-diff411
1 files changed, 411 insertions, 0 deletions
diff --git a/scripts/buildstats-diff b/scripts/buildstats-diff
new file mode 100755
index 0000000000..8ee2aaf626
--- /dev/null
+++ b/scripts/buildstats-diff
@@ -0,0 +1,411 @@
1#!/usr/bin/python3
2#
3# Script for comparing buildstats from two different builds
4#
5# Copyright (c) 2016, Intel Corporation.
6#
7# This program is free software; you can redistribute it and/or modify it
8# under the terms and conditions of the GNU General Public License,
9# version 2, as published by the Free Software Foundation.
10#
11# This program is distributed in the hope it will be useful, but WITHOUT
12# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14# more details.
15#
16import argparse
17import glob
18import json
19import logging
20import os
21import re
22import sys
23from collections import namedtuple
24from datetime import datetime, timedelta, tzinfo
25from operator import attrgetter
26
27# Setup logging
28logging.basicConfig(level=logging.INFO)
29log = logging.getLogger()
30
31
32class TimeZone(tzinfo):
33 """Simple fixed-offset tzinfo"""
34 def __init__(self, seconds, name):
35 self._offset = timedelta(seconds=seconds)
36 self._name = name
37
38 def utcoffset(self, dt):
39 return self._offset
40
41 def tzname(self, dt):
42 return self._name
43
44 def dst(self, dt):
45 return None
46
47TIMEZONES = {'UTC': TimeZone(0, 'UTC'),
48 'EET': TimeZone(7200, 'EET'),
49 'EEST': TimeZone(10800, 'EEST')}
50
51
52taskdiff_fields = ('pkg', 'pkg_op', 'task', 'task_op', 'cputime1', 'cputime2',
53 'absdiff', 'reldiff')
54TaskDiff = namedtuple('TaskDiff', ' '.join(taskdiff_fields))
55
56
57def to_datetime_obj(obj):
58 """Helper for getting timestamps in datetime format"""
59 if isinstance(obj, datetime):
60 return obj
61 else:
62 return datetime.utcfromtimestamp(obj).replace(tzinfo=TIMEZONES['UTC'])
63
64
65def read_buildstats_file(buildstat_file):
66 """Convert buildstat text file into dict/json"""
67 bs_json = {'iostat': {},
68 'rusage': {},
69 'child_rusage': {}}
70 log.debug("Reading task buildstats from %s", buildstat_file)
71 with open(buildstat_file) as fobj:
72 for line in fobj.readlines():
73 key, val = line.split(':', 1)
74 val = val.strip()
75 if key == 'Started':
76 start_time = to_datetime_obj(float(val))
77 bs_json['start_time'] = start_time
78 elif key == 'Ended':
79 end_time = to_datetime_obj(float(val))
80 elif key.startswith('IO '):
81 split = key.split()
82 bs_json['iostat'][split[1]] = int(val)
83 elif key.find('rusage') >= 0:
84 split = key.split()
85 ru_key = split[-1]
86 if ru_key in ('ru_stime', 'ru_utime'):
87 val = float(val)
88 else:
89 val = int(val)
90 ru_type = 'rusage' if split[0] == 'rusage' else \
91 'child_rusage'
92 bs_json[ru_type][ru_key] = val
93 elif key == 'Status':
94 bs_json['status'] = val
95 bs_json['elapsed_time'] = end_time - start_time
96 return bs_json
97
98
99def read_buildstats_dir(bs_dir):
100 """Read buildstats directory"""
101 def split_nevr(nevr):
102 """Split name and version information from recipe "nevr" string"""
103 n_e_v, revision = nevr.rsplit('-', 1)
104 match = re.match(r'^(?P<name>\S+)-((?P<epoch>[0-9]{1,5})_)?(?P<version>[0-9]\S*)$',
105 n_e_v)
106 if not match:
107 # If we're not able to parse a version starting with a number, just
108 # take the part after last dash
109 match = re.match(r'^(?P<name>\S+)-((?P<epoch>[0-9]{1,5})_)?(?P<version>[^-]+)$',
110 n_e_v)
111 name = match.group('name')
112 version = match.group('version')
113 epoch = match.group('epoch')
114 return name, epoch, version, revision
115
116 if os.path.isfile(os.path.join(bs_dir, 'build_stats')):
117 top_dir = bs_dir
118 else:
119 subdirs = sorted(glob.glob(bs_dir + '/*'))
120 if len(subdirs) > 1:
121 log.warning("Multiple buildstats found, using the first one")
122 top_dir = subdirs[0]
123 log.debug("Reading buildstats directory %s", top_dir)
124 subdirs = os.listdir(top_dir)
125
126 # Handle "old" style directory structure
127 if len(subdirs) == 1 and re.match('^20[0-9]{12}$', subdirs[0]):
128 top_dir = os.path.join(top_dir, subdirs[0])
129 subdirs = os.listdir(top_dir)
130
131 buildstats = {}
132 for dirname in subdirs:
133 recipe_dir = os.path.join(top_dir, dirname)
134 if not os.path.isdir(recipe_dir):
135 continue
136 name, epoch, version, revision = split_nevr(dirname)
137 recipe_bs = {'nevr': dirname,
138 'name': name,
139 'epoch': epoch,
140 'version': version,
141 'revision': revision,
142 'tasks': {}}
143 for task in os.listdir(recipe_dir):
144 recipe_bs['tasks'][task] = read_buildstats_file(
145 os.path.join(recipe_dir, task))
146 if name in buildstats:
147 log.error("Cannot handle multiple versions of the same package (%s)",
148 name)
149 sys.exit(1)
150 buildstats[name] = recipe_bs
151
152 return buildstats
153
154
155def read_buildstats_json(path):
156 """Read buildstats from JSON file"""
157 buildstats = {}
158 with open(path) as fobj:
159 bs_json = json.load(fobj)
160 for recipe_bs in bs_json:
161 if recipe_bs['name'] in buildstats:
162 log.error("Cannot handle multiple versions of the same package (%s)",
163 recipe_bs['name'])
164 sys.exit(1)
165
166 if recipe_bs['epoch'] is None:
167 recipe_bs['nevr'] = "{}-{}-{}".format(recipe_bs['name'], recipe_bs['version'], recipe_bs['revision'])
168 else:
169 recipe_bs['nevr'] = "{}-{}_{}-{}".format(recipe_bs['name'], recipe_bs['epoch'], recipe_bs['version'], recipe_bs['revision'])
170 buildstats[recipe_bs['name']] = recipe_bs
171
172 return buildstats
173
174
175def read_buildstats(path):
176 """Read buildstats"""
177 if os.path.isfile(path):
178 return read_buildstats_json(path)
179 else:
180 return read_buildstats_dir(path)
181
182
183def print_ver_diff(bs1, bs2):
184 """Print package version differences"""
185 pkgs1 = set(bs1.keys())
186 pkgs2 = set(bs2.keys())
187 new_pkgs = pkgs2 - pkgs1
188 deleted_pkgs = pkgs1 - pkgs2
189
190 echanged = []
191 vchanged = []
192 rchanged = []
193 unchanged = []
194 common_pkgs = pkgs2.intersection(pkgs1)
195 if common_pkgs:
196 for pkg in common_pkgs:
197 if bs1[pkg]['epoch'] != bs2[pkg]['epoch']:
198 echanged.append(pkg)
199 elif bs1[pkg]['version'] != bs2[pkg]['version']:
200 vchanged.append(pkg)
201 elif bs1[pkg]['revision'] != bs2[pkg]['revision']:
202 rchanged.append(pkg)
203 else:
204 unchanged.append(pkg)
205
206 maxlen = max([len(pkg) for pkg in pkgs1.union(pkgs2)])
207 fmt_str = " {:{maxlen}} ({})"
208# if unchanged:
209# print("\nUNCHANGED PACKAGES:")
210# print("-------------------")
211# maxlen = max([len(pkg) for pkg in unchanged])
212# for pkg in sorted(unchanged):
213# print(fmt_str.format(pkg, bs2[pkg]['nevr'], maxlen=maxlen))
214
215 if new_pkgs:
216 print("\nNEW PACKAGES:")
217 print("-------------")
218 for pkg in sorted(new_pkgs):
219 print(fmt_str.format(pkg, bs2[pkg]['nevr'], maxlen=maxlen))
220
221 if deleted_pkgs:
222 print("\nDELETED PACKAGES:")
223 print("-----------------")
224 for pkg in sorted(deleted_pkgs):
225 print(fmt_str.format(pkg, bs1[pkg]['nevr'], maxlen=maxlen))
226
227 fmt_str = " {0:{maxlen}} {1:<20} ({2})"
228 if rchanged:
229 print("\nREVISION CHANGED:")
230 print("-----------------")
231 for pkg in sorted(rchanged):
232 field1 = "{} -> {}".format(pkg, bs1[pkg]['revision'], bs2[pkg]['revision'])
233 field2 = "{} -> {}".format(bs1[pkg]['nevr'], bs2[pkg]['nevr'])
234 print(fmt_str.format(pkg, field1, field2, maxlen=maxlen))
235
236 if vchanged:
237 print("\nVERSION CHANGED:")
238 print("----------------")
239 for pkg in sorted(vchanged):
240 field1 = "{} -> {}".format(bs1[pkg]['version'], bs2[pkg]['version'])
241 field2 = "{} -> {}".format(bs1[pkg]['nevr'], bs2[pkg]['nevr'])
242 print(fmt_str.format(pkg, field1, field2, maxlen=maxlen))
243
244 if echanged:
245 print("\nEPOCH CHANGED:")
246 print("--------------")
247 for pkg in sorted(echanged):
248 field1 = "{} -> {}".format(pkg, bs1[pkg]['epoch'], bs2[pkg]['epoch'])
249 field2 = "{} -> {}".format(bs1[pkg]['nevr'], bs2[pkg]['nevr'])
250 print(fmt_str.format(pkg, field1, field2, maxlen=maxlen))
251
252
253def task_time(task):
254 """Calculate sum of user and system time taken by a task"""
255 cputime = task['rusage']['ru_stime'] + task['rusage']['ru_utime'] + \
256 task['child_rusage']['ru_stime'] + task['child_rusage']['ru_utime']
257 return cputime
258
259
260def print_task_diff(bs1, bs2, min_cputime=0, min_timediff=0, sort_by=('absdiff',)):
261 """Diff task execution times"""
262 tasks_diff = []
263 pkg_maxlen = 0
264 task_maxlen = 0
265
266 pkgs = set(bs1.keys()).union(set(bs2.keys()))
267 for pkg in pkgs:
268 if len(pkg) > pkg_maxlen:
269 pkg_maxlen = len(pkg)
270 tasks1 = bs1[pkg]['tasks'] if pkg in bs1 else {}
271 tasks2 = bs2[pkg]['tasks'] if pkg in bs2 else {}
272 if not tasks1:
273 pkg_op = '+ '
274 elif not tasks2:
275 pkg_op = '- '
276 else:
277 pkg_op = ' '
278
279 for task in set(tasks1.keys()).union(set(tasks2.keys())):
280 if len(task) > task_maxlen:
281 task_maxlen = len(task)
282
283 t1 = task_time(bs1[pkg]['tasks'][task]) if task in tasks1 else 0
284 t2 = task_time(bs2[pkg]['tasks'][task]) if task in tasks2 else 0
285 task_op = ' '
286 if t1 == 0:
287 reldiff = float('inf')
288 task_op = '+ '
289 else:
290 reldiff = 100 * (t2 - t1) / t1
291 if t2 == 0:
292 task_op = '- '
293
294 tasks_diff.append(TaskDiff(pkg, pkg_op, task, task_op, t1, t2, t2-t1, reldiff))
295
296 if min_cputime:
297 print("Ignoring tasks shorter than {}s".format(min_cputime))
298 if min_timediff:
299 print("Ignoring time differences shorter than {}s".format(min_timediff))
300
301 print()
302 print(" {:{pkg_maxlen}} {:{task_maxlen}} {:>8} {:>10} {:>10} {}".format(
303 'PKG', 'TASK', 'ABSDIFF', 'RELDIFF', 'CPUTIME1', 'CPUTIME2',
304 pkg_maxlen=pkg_maxlen, task_maxlen=task_maxlen))
305
306 # Sort our list
307 for field in reversed(sort_by):
308 if field.startswith('-'):
309 field = field[1:]
310 reverse = True
311 else:
312 reverse = False
313 tasks_diff = sorted(tasks_diff, key=attrgetter(field), reverse=reverse)
314
315 for diff in tasks_diff:
316 cputime = max(diff.cputime1, diff.cputime2)
317 if cputime > min_cputime:
318 if abs(diff.absdiff) > min_timediff:
319 task_prefix = diff.task_op if diff.pkg_op == ' ' else ' '
320 print("{}{:{pkg_maxlen}} {}{:{task_maxlen}} {:+7.1f}s {:+9.1f}% {:9.1f}s -> {:.1f}s".format(
321 diff.pkg_op, diff.pkg, task_prefix, diff.task, diff.absdiff, diff.reldiff, diff.cputime1, diff.cputime2,
322 pkg_maxlen=pkg_maxlen, task_maxlen=task_maxlen))
323 else:
324 log.debug("Filtering out %s (difference of %0.1fs)", task, diff.absdiff)
325 else:
326 log.debug("Filtering out %s (%0.1fs)", task, cputime)
327
328
329def print_timediff_summary(bs1, bs2):
330 """Print summary of the timediffs"""
331 def total_cputime(buildstats):
332 sum = 0.0
333 for recipe_data in buildstats.values():
334 for task_data in recipe_data['tasks'].values():
335 sum += task_time(task_data)
336 return sum
337
338 def hms_time(secs):
339 """Get time in human-readable HH:MM:SS format"""
340 h = int(secs / 3600)
341 m = int((secs % 3600) / 60)
342 s = secs % 60
343 if h == 0:
344 return "{:02d}:{:04.1f}".format(m, s)
345 else:
346 return "{:d}:{:02d}:{:04.1f}".format(h, m, s)
347
348 total1 = total_cputime(bs1)
349 total2 = total_cputime(bs2)
350 print("\nCumulative CPU Time:")
351 print (" {:+.1f}s {:+.1f}% {} ({:.1f}s) -> {} ({:.1f}s)".format(
352 total2 - total1, 100 * (total2-total1) / total1,
353 hms_time(total1), total1, hms_time(total2), total2))
354
355
356
357def parse_args(argv):
358 """Parse cmdline arguments"""
359 description="""
360Script for comparing buildstats of two separate builds."""
361 parser = argparse.ArgumentParser(
362 formatter_class=argparse.ArgumentDefaultsHelpFormatter,
363 description=description)
364
365 parser.add_argument('--debug', '-d', action='store_true',
366 help="Verbose logging")
367 parser.add_argument('--ver-diff', action='store_true',
368 help="Show package version differences and exit")
369 parser.add_argument('--min-time', default=3.0, type=float,
370 help="Filter out tasks shorter than MIN_TIME seconds")
371 parser.add_argument('--min-timediff', default=1.0, type=float,
372 help="Filter out tasks whose difference in cputime is "
373 "less that MIN_TIMEDIFF seconds")
374 parser.add_argument('--sort-by', default='absdiff',
375 help="Comma-separated list of field sort order. "
376 "Prepend the field name with '-' for reversed sort. "
377 "Available fields are: {}".format(', '.join(taskdiff_fields)))
378 parser.add_argument('buildstats1', metavar='BUILDSTATS1', help="'Left' buildstat")
379 parser.add_argument('buildstats2', metavar='BUILDSTATS2', help="'Right' buildstat")
380
381 return parser.parse_args(argv)
382
383def main(argv=None):
384 """Script entry point"""
385 args = parse_args(argv)
386 if args.debug:
387 log.setLevel(logging.DEBUG)
388
389 # Validate sort fields
390 sort_by = []
391 for field in args.sort_by.split(','):
392 if field.lstrip('-') not in taskdiff_fields:
393 log.error("Invalid sort field '%s' (must be one of: %s)" %
394 (field, ', '.join(taskdiff_fields)))
395 sys.exit(1)
396 sort_by.append(field)
397
398
399 bs1 = read_buildstats(args.buildstats1)
400 bs2 = read_buildstats(args.buildstats2)
401
402 if args.ver_diff:
403 print_ver_diff(bs1, bs2)
404 else:
405 print_task_diff(bs1, bs2, args.min_time, args.min_timediff, sort_by)
406 print_timediff_summary(bs1, bs2)
407
408 return 0
409
410if __name__ == "__main__":
411 sys.exit(main())