summaryrefslogtreecommitdiffstats
path: root/scripts/oe-build-perf-report
diff options
context:
space:
mode:
authorMarkus Lehtonen <markus.lehtonen@linux.intel.com>2017-03-31 17:07:29 +0300
committerRichard Purdie <richard.purdie@linuxfoundation.org>2017-04-01 23:28:20 +0100
commit9f299876f716f253b0a3d70eb4473a023c593fc5 (patch)
tree057d934e96df36ac3e28113b11f5b1ce70c7b614 /scripts/oe-build-perf-report
parent5a85d39c9d5502aabc2dde20f2a16bf7ac9f2d22 (diff)
downloadpoky-9f299876f716f253b0a3d70eb4473a023c593fc5.tar.gz
scripts: add oe-build-perf-report script
A new tool for pretty-printing build perf test results stored in a Git repository. The scripts is able to produce either simple plaintext report showing the difference between two commits, or, an html report that also displays trendcharts of the test results. The script uses Jinja2 templates for generating HTML reports so it requires python3-jinja2 to be installed on the system. [YOCTO #10931] (From OE-Core rev: 3b25404f0f99b72f222bdca815929be1cf1cee35) Signed-off-by: Markus Lehtonen <markus.lehtonen@linux.intel.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'scripts/oe-build-perf-report')
-rwxr-xr-xscripts/oe-build-perf-report531
1 files changed, 531 insertions, 0 deletions
diff --git a/scripts/oe-build-perf-report b/scripts/oe-build-perf-report
new file mode 100755
index 0000000000..39766135c6
--- /dev/null
+++ b/scripts/oe-build-perf-report
@@ -0,0 +1,531 @@
1#!/usr/bin/python3
2#
3# Examine build performance test results
4#
5# Copyright (c) 2017, Intel Corporation.
6#
7# This program is free software; you can redistribute it and/or modify it
8# under the terms and conditions of the GNU General Public License,
9# version 2, as published by the Free Software Foundation.
10#
11# This program is distributed in the hope it will be useful, but WITHOUT
12# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14# more details.
15#
16import argparse
17import json
18import logging
19import os
20import re
21import sys
22from collections import namedtuple, OrderedDict
23from operator import attrgetter
24from xml.etree import ElementTree as ET
25
26# Import oe libs
27scripts_path = os.path.dirname(os.path.realpath(__file__))
28sys.path.append(os.path.join(scripts_path, 'lib'))
29import scriptpath
30from build_perf import print_table
31from build_perf.report import (metadata_xml_to_json, results_xml_to_json,
32 aggregate_data, aggregate_metadata, measurement_stats)
33from build_perf import html
34
35scriptpath.add_oe_lib_path()
36
37from oeqa.utils.git import GitRepo
38
39
40# Setup logging
41logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
42log = logging.getLogger('oe-build-perf-report')
43
44
45# Container class for tester revisions
46TestedRev = namedtuple('TestedRev', 'commit commit_number tags')
47
48
49def get_test_runs(repo, tag_name, **kwargs):
50 """Get a sorted list of test runs, matching given pattern"""
51 # First, get field names from the tag name pattern
52 field_names = [m.group(1) for m in re.finditer(r'{(\w+)}', tag_name)]
53 undef_fields = [f for f in field_names if f not in kwargs.keys()]
54
55 # Fields for formatting tag name pattern
56 str_fields = dict([(f, '*') for f in field_names])
57 str_fields.update(kwargs)
58
59 # Get a list of all matching tags
60 tag_pattern = tag_name.format(**str_fields)
61 tags = repo.run_cmd(['tag', '-l', tag_pattern]).splitlines()
62 log.debug("Found %d tags matching pattern '%s'", len(tags), tag_pattern)
63
64 # Parse undefined fields from tag names
65 str_fields = dict([(f, r'(?P<{}>[\w\-.]+)'.format(f)) for f in field_names])
66 str_fields['commit'] = '(?P<commit>[0-9a-f]{7,40})'
67 str_fields['commit_number'] = '(?P<commit_number>[0-9]{1,7})'
68 str_fields['tag_number'] = '(?P<tag_number>[0-9]{1,5})'
69 str_fields.update(kwargs)
70 tag_re = re.compile(tag_name.format(**str_fields))
71
72 # Parse fields from tags
73 revs = []
74 for tag in tags:
75 m = tag_re.match(tag)
76 groups = m.groupdict()
77 revs.append([groups[f] for f in undef_fields] + [tag])
78
79 # Return field names and a sorted list of revs
80 return undef_fields, sorted(revs)
81
82def list_test_revs(repo, tag_name, **kwargs):
83 """Get list of all tested revisions"""
84 fields, revs = get_test_runs(repo, tag_name, **kwargs)
85 ignore_fields = ['tag_number']
86 print_fields = [i for i, f in enumerate(fields) if f not in ignore_fields]
87
88 # Sort revs
89 rows = [[fields[i].upper() for i in print_fields] + ['TEST RUNS']]
90 prev = [''] * len(revs)
91 for rev in revs:
92 # Only use fields that we want to print
93 rev = [rev[i] for i in print_fields]
94
95 if rev != prev:
96 new_row = [''] * len(print_fields) + [1]
97 for i in print_fields:
98 if rev[i] != prev[i]:
99 break
100 new_row[i:-1] = rev[i:]
101 rows.append(new_row)
102 else:
103 rows[-1][-1] += 1
104 prev = rev
105
106 print_table(rows)
107
108def get_test_revs(repo, tag_name, **kwargs):
109 """Get list of all tested revisions"""
110 fields, runs = get_test_runs(repo, tag_name, **kwargs)
111
112 revs = {}
113 commit_i = fields.index('commit')
114 commit_num_i = fields.index('commit_number')
115 for run in runs:
116 commit = run[commit_i]
117 commit_num = run[commit_num_i]
118 tag = run[-1]
119 if not commit in revs:
120 revs[commit] = TestedRev(commit, commit_num, [tag])
121 else:
122 assert commit_num == revs[commit].commit_number, "Commit numbers do not match"
123 revs[commit].tags.append(tag)
124
125 # Return in sorted table
126 revs = sorted(revs.values(), key=attrgetter('commit_number'))
127 log.debug("Found %d tested revisions:\n %s", len(revs),
128 "\n ".join(['{} ({})'.format(rev.commit_number, rev.commit) for rev in revs]))
129 return revs
130
131def rev_find(revs, attr, val):
132 """Search from a list of TestedRev"""
133 for i, rev in enumerate(revs):
134 if getattr(rev, attr) == val:
135 return i
136 raise ValueError("Unable to find '{}' value '{}'".format(attr, val))
137
138def is_xml_format(repo, commit):
139 """Check if the commit contains xml (or json) data"""
140 if repo.rev_parse(commit + ':results.xml'):
141 log.debug("Detected report in xml format in %s", commit)
142 return True
143 else:
144 log.debug("No xml report in %s, assuming json formatted results", commit)
145 return False
146
147def read_results(repo, tags, xml=True):
148 """Read result files from repo"""
149
150 def parse_xml_stream(data):
151 """Parse multiple concatenated XML objects"""
152 objs = []
153 xml_d = ""
154 for line in data.splitlines():
155 if xml_d and line.startswith('<?xml version='):
156 objs.append(ET.fromstring(xml_d))
157 xml_d = line
158 else:
159 xml_d += line
160 objs.append(ET.fromstring(xml_d))
161 return objs
162
163 def parse_json_stream(data):
164 """Parse multiple concatenated JSON objects"""
165 objs = []
166 json_d = ""
167 for line in data.splitlines():
168 if line == '}{':
169 json_d += '}'
170 objs.append(json.loads(json_d, object_pairs_hook=OrderedDict))
171 json_d = '{'
172 else:
173 json_d += line
174 objs.append(json.loads(json_d, object_pairs_hook=OrderedDict))
175 return objs
176
177 num_revs = len(tags)
178
179 # Optimize by reading all data with one git command
180 log.debug("Loading raw result data from %d tags, %s...", num_revs, tags[0])
181 if xml:
182 git_objs = [tag + ':metadata.xml' for tag in tags] + [tag + ':results.xml' for tag in tags]
183 data = parse_xml_stream(repo.run_cmd(['show'] + git_objs + ['--']))
184 return ([metadata_xml_to_json(e) for e in data[0:num_revs]],
185 [results_xml_to_json(e) for e in data[num_revs:]])
186 else:
187 git_objs = [tag + ':metadata.json' for tag in tags] + [tag + ':results.json' for tag in tags]
188 data = parse_json_stream(repo.run_cmd(['show'] + git_objs + ['--']))
189 return data[0:num_revs], data[num_revs:]
190
191
192def get_data_item(data, key):
193 """Nested getitem lookup"""
194 for k in key.split('.'):
195 data = data[k]
196 return data
197
198
199def metadata_diff(metadata_l, metadata_r):
200 """Prepare a metadata diff for printing"""
201 keys = [('Hostname', 'hostname', 'hostname'),
202 ('Branch', 'branch', 'layers.meta.branch'),
203 ('Commit number', 'commit_num', 'layers.meta.commit_count'),
204 ('Commit', 'commit', 'layers.meta.commit'),
205 ('Number of test runs', 'testrun_count', 'testrun_count')
206 ]
207
208 def _metadata_diff(key):
209 """Diff metadata from two test reports"""
210 try:
211 val1 = get_data_item(metadata_l, key)
212 except KeyError:
213 val1 = '(N/A)'
214 try:
215 val2 = get_data_item(metadata_r, key)
216 except KeyError:
217 val2 = '(N/A)'
218 return val1, val2
219
220 metadata = OrderedDict()
221 for title, key, key_json in keys:
222 value_l, value_r = _metadata_diff(key_json)
223 metadata[key] = {'title': title,
224 'value_old': value_l,
225 'value': value_r}
226 return metadata
227
228
229def print_diff_report(metadata_l, data_l, metadata_r, data_r):
230 """Print differences between two data sets"""
231
232 # First, print general metadata
233 print("\nTEST METADATA:\n==============")
234 meta_diff = metadata_diff(metadata_l, metadata_r)
235 rows = []
236 row_fmt = ['{:{wid}} ', '{:<{wid}} ', '{:<{wid}}']
237 rows = [['', 'CURRENT COMMIT', 'OOMPARING WITH']]
238 for key, val in meta_diff.items():
239 # Shorten commit hashes
240 if key == 'commit':
241 rows.append([val['title'] + ':', val['value'][:20], val['value_old'][:20]])
242 else:
243 rows.append([val['title'] + ':', val['value'], val['value_old']])
244 print_table(rows, row_fmt)
245
246
247 # Print test results
248 print("\nTEST RESULTS:\n=============")
249
250 tests = list(data_l['tests'].keys())
251 # Append tests that are only present in 'right' set
252 tests += [t for t in list(data_r['tests'].keys()) if t not in tests]
253
254 # Prepare data to be printed
255 rows = []
256 row_fmt = ['{:8}', '{:{wid}}', '{:{wid}}', ' {:>{wid}}', ' {:{wid}} ', '{:{wid}}',
257 ' {:>{wid}}', ' {:>{wid}}']
258 num_cols = len(row_fmt)
259 for test in tests:
260 test_l = data_l['tests'][test] if test in data_l['tests'] else None
261 test_r = data_r['tests'][test] if test in data_r['tests'] else None
262 pref = ' '
263 if test_l is None:
264 pref = '+'
265 elif test_r is None:
266 pref = '-'
267 descr = test_l['description'] if test_l else test_r['description']
268 heading = "{} {}: {}".format(pref, test, descr)
269
270 rows.append([heading])
271
272 # Generate the list of measurements
273 meas_l = test_l['measurements'] if test_l else {}
274 meas_r = test_r['measurements'] if test_r else {}
275 measurements = list(meas_l.keys())
276 measurements += [m for m in list(meas_r.keys()) if m not in measurements]
277
278 for meas in measurements:
279 m_pref = ' '
280 if meas in meas_l:
281 stats_l = measurement_stats(meas_l[meas], 'l.')
282 else:
283 stats_l = measurement_stats(None, 'l.')
284 m_pref = '+'
285 if meas in meas_r:
286 stats_r = measurement_stats(meas_r[meas], 'r.')
287 else:
288 stats_r = measurement_stats(None, 'r.')
289 m_pref = '-'
290 stats = stats_l.copy()
291 stats.update(stats_r)
292
293 absdiff = stats['val_cls'](stats['r.mean'] - stats['l.mean'])
294 reldiff = "{:+.1f} %".format(absdiff * 100 / stats['l.mean'])
295 if stats['r.mean'] > stats['l.mean']:
296 absdiff = '+' + str(absdiff)
297 else:
298 absdiff = str(absdiff)
299 rows.append(['', m_pref, stats['name'] + ' ' + stats['quantity'],
300 str(stats['l.mean']), '->', str(stats['r.mean']),
301 absdiff, reldiff])
302 rows.append([''] * num_cols)
303
304 print_table(rows, row_fmt)
305
306 print()
307
308
309def print_html_report(data, id_comp):
310 """Print report in html format"""
311 # Handle metadata
312 metadata = {'branch': {'title': 'Branch', 'value': 'master'},
313 'hostname': {'title': 'Hostname', 'value': 'foobar'},
314 'commit': {'title': 'Commit', 'value': '1234'}
315 }
316 metadata = metadata_diff(data[id_comp][0], data[-1][0])
317
318
319 # Generate list of tests
320 tests = []
321 for test in data[-1][1]['tests'].keys():
322 test_r = data[-1][1]['tests'][test]
323 new_test = {'name': test_r['name'],
324 'description': test_r['description'],
325 'status': test_r['status'],
326 'measurements': [],
327 'err_type': test_r.get('err_type'),
328 }
329 # Limit length of err output shown
330 if 'message' in test_r:
331 lines = test_r['message'].splitlines()
332 if len(lines) > 20:
333 new_test['message'] = '...\n' + '\n'.join(lines[-20:])
334 else:
335 new_test['message'] = test_r['message']
336
337
338 # Generate the list of measurements
339 for meas in test_r['measurements'].keys():
340 meas_r = test_r['measurements'][meas]
341 meas_type = 'time' if meas_r['type'] == 'sysres' else 'size'
342 new_meas = {'name': meas_r['name'],
343 'legend': meas_r['legend'],
344 'description': meas_r['name'] + ' ' + meas_type,
345 }
346 samples = []
347
348 # Run through all revisions in our data
349 for meta, test_data in data:
350 if (not test in test_data['tests'] or
351 not meas in test_data['tests'][test]['measurements']):
352 samples.append(measurement_stats(None))
353 continue
354 test_i = test_data['tests'][test]
355 meas_i = test_i['measurements'][meas]
356 commit_num = get_data_item(meta, 'layers.meta.commit_count')
357 samples.append(measurement_stats(meas_i))
358 samples[-1]['commit_num'] = commit_num
359
360 absdiff = samples[-1]['val_cls'](samples[-1]['mean'] - samples[id_comp]['mean'])
361 new_meas['absdiff'] = absdiff
362 new_meas['absdiff_str'] = str(absdiff) if absdiff < 0 else '+' + str(absdiff)
363 new_meas['reldiff'] = "{:+.1f} %".format(absdiff * 100 / samples[id_comp]['mean'])
364 new_meas['samples'] = samples
365 new_meas['value'] = samples[-1]
366 new_meas['value_type'] = samples[-1]['val_cls']
367
368 new_test['measurements'].append(new_meas)
369 tests.append(new_test)
370
371 # Chart options
372 chart_opts = {'haxis': {'min': get_data_item(data[0][0], 'layers.meta.commit_count'),
373 'max': get_data_item(data[0][0], 'layers.meta.commit_count')}
374 }
375
376 print(html.template.render(metadata=metadata, test_data=tests, chart_opts=chart_opts))
377
378
379def auto_args(repo, args):
380 """Guess arguments, if not defined by the user"""
381 # Get the latest commit in the repo
382 log.debug("Guessing arguments from the latest commit")
383 msg = repo.run_cmd(['log', '-1', '--all', '--format=%b'])
384 for line in msg.splitlines():
385 split = line.split(':', 1)
386 if len(split) != 2:
387 continue
388
389 key = split[0]
390 val = split[1].strip()
391 if key == 'hostname':
392 log.debug("Using hostname %s", val)
393 args.hostname = val
394 elif key == 'branch':
395 log.debug("Using branch %s", val)
396 args.branch = val
397
398
399def parse_args(argv):
400 """Parse command line arguments"""
401 description = """
402Examine build performance test results from a Git repository"""
403 parser = argparse.ArgumentParser(
404 formatter_class=argparse.ArgumentDefaultsHelpFormatter,
405 description=description)
406
407 parser.add_argument('--debug', '-d', action='store_true',
408 help="Verbose logging")
409 parser.add_argument('--repo', '-r', required=True,
410 help="Results repository (local git clone)")
411 parser.add_argument('--list', '-l', action='store_true',
412 help="List available test runs")
413 parser.add_argument('--html', action='store_true',
414 help="Generate report in html format")
415 group = parser.add_argument_group('Tag and revision')
416 group.add_argument('--tag-name', '-t',
417 default='{hostname}/{branch}/{machine}/{commit_number}-g{commit}/{tag_number}',
418 help="Tag name (pattern) for finding results")
419 group.add_argument('--hostname', '-H')
420 group.add_argument('--branch', '-B', default='master')
421 group.add_argument('--machine', default='qemux86')
422 group.add_argument('--history-length', default=25, type=int,
423 help="Number of tested revisions to plot in html report")
424 group.add_argument('--commit',
425 help="Revision to search for")
426 group.add_argument('--commit-number',
427 help="Revision number to search for, redundant if "
428 "--commit is specified")
429 group.add_argument('--commit2',
430 help="Revision to compare with")
431 group.add_argument('--commit-number2',
432 help="Revision number to compare with, redundant if "
433 "--commit2 is specified")
434
435 return parser.parse_args(argv)
436
437
438def main(argv=None):
439 """Script entry point"""
440 args = parse_args(argv)
441 if args.debug:
442 log.setLevel(logging.DEBUG)
443
444 repo = GitRepo(args.repo)
445
446 if args.list:
447 list_test_revs(repo, args.tag_name)
448 return 0
449
450 # Determine hostname which to use
451 if not args.hostname:
452 auto_args(repo, args)
453
454 revs = get_test_revs(repo, args.tag_name, hostname=args.hostname,
455 branch=args.branch, machine=args.machine)
456 if len(revs) < 2:
457 log.error("%d tester revisions found, unable to generate report",
458 len(revs))
459 return 1
460
461 # Pick revisions
462 if args.commit:
463 if args.commit_number:
464 log.warning("Ignoring --commit-number as --commit was specified")
465 index1 = rev_find(revs, 'commit', args.commit)
466 elif args.commit_number:
467 index1 = rev_find(revs, 'commit_number', args.commit_number)
468 else:
469 index1 = len(revs) - 1
470
471 if args.commit2:
472 if args.commit_number2:
473 log.warning("Ignoring --commit-number2 as --commit2 was specified")
474 index2 = rev_find(revs, 'commit', args.commit2)
475 elif args.commit_number2:
476 index2 = rev_find(revs, 'commit_number', args.commit_number2)
477 else:
478 if index1 > 0:
479 index2 = index1 - 1
480 else:
481 log.error("Unable to determine the other commit, use "
482 "--commit2 or --commit-number2 to specify it")
483 return 1
484
485 index_l = min(index1, index2)
486 index_r = max(index1, index2)
487
488 rev_l = revs[index_l]
489 rev_r = revs[index_r]
490 log.debug("Using 'left' revision %s (%s), %s test runs:\n %s",
491 rev_l.commit_number, rev_l.commit, len(rev_l.tags),
492 '\n '.join(rev_l.tags))
493 log.debug("Using 'right' revision %s (%s), %s test runs:\n %s",
494 rev_r.commit_number, rev_r.commit, len(rev_r.tags),
495 '\n '.join(rev_r.tags))
496
497 # Check report format used in the repo (assume all reports in the same fmt)
498 xml = is_xml_format(repo, revs[index_r].tags[-1])
499
500 if args.html:
501 index_0 = max(0, index_r - args.history_length)
502 rev_range = range(index_0, index_r + 1)
503 else:
504 # We do not need range of commits for text report (no graphs)
505 index_0 = index_l
506 rev_range = (index_l, index_r)
507
508 # Read raw data
509 log.debug("Reading %d revisions, starting from %s (%s)",
510 len(rev_range), revs[index_0].commit_number, revs[index_0].commit)
511 raw_data = [read_results(repo, revs[i].tags, xml) for i in rev_range]
512
513 data = []
514 for raw_m, raw_d in raw_data:
515 data.append((aggregate_metadata(raw_m), aggregate_data(raw_d)))
516
517 # Re-map list indexes to the new table starting from index 0
518 index_r = index_r - index_0
519 index_l = index_l - index_0
520
521 # Print report
522 if not args.html:
523 print_diff_report(data[index_l][0], data[index_l][1],
524 data[index_r][0], data[index_r][1])
525 else:
526 print_html_report(data, index_l)
527
528 return 0
529
530if __name__ == "__main__":
531 sys.exit(main())