scripts: add oe-build-perf-report script

A new tool for pretty-printing build perf test results stored in a Git repository. The scripts is able to produce either simple plaintext report showing the difference between two commits, or, an html report that also displays trendcharts of the test results. The script uses Jinja2 templates for generating HTML reports so it requires python3-jinja2 to be installed on the system. [YOCTO #10931] (From OE-Core rev: 3b25404f0f99b72f222bdca815929be1cf1cee35) Signed-off-by: Markus Lehtonen <markus.lehtonen@linux.intel.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
author: Markus Lehtonen <markus.lehtonen@linux.intel.com> 2017-03-31 17:07:29 +0300
committer: Richard Purdie <richard.purdie@linuxfoundation.org> 2017-04-01 23:28:20 +0100
commit: 9f299876f716f253b0a3d70eb4473a023c593fc5 (patch)
tree: 057d934e96df36ac3e28113b11f5b1ce70c7b614 /scripts/oe-build-perf-report
parent: 5a85d39c9d5502aabc2dde20f2a16bf7ac9f2d22 (diff)
download: poky-9f299876f716f253b0a3d70eb4473a023c593fc5.tar.gz
1 files changed, 531 insertions, 0 deletions
diff --git a/scripts/oe-build-perf-report b/scripts/oe-build-perf-report
new file mode 100755
index 0000000000..39766135c6
--- /dev/null
+++ b/scripts/oe-build-perf-report
@@ -0,0 +1,531 @@
+#!/usr/bin/python3
+#
+# Examine build performance test results
+#
+# Copyright (c) 2017, Intel Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+import argparse
+import json
+import logging
+import os
+import re
+import sys
+from collections import namedtuple, OrderedDict
+from operator import attrgetter
+from xml.etree import ElementTree as ET
+# Import oe libs
+scripts_path = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(os.path.join(scripts_path, 'lib'))
+import scriptpath
+from build_perf import print_table
+from build_perf.report import (metadata_xml_to_json, results_xml_to_json,
+                               aggregate_data, aggregate_metadata, measurement_stats)
+from build_perf import html
+scriptpath.add_oe_lib_path()
+from oeqa.utils.git import GitRepo
+# Setup logging
+logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
+log = logging.getLogger('oe-build-perf-report')
+# Container class for tester revisions
+TestedRev = namedtuple('TestedRev', 'commit commit_number tags')
+def get_test_runs(repo, tag_name, **kwargs):
+    """Get a sorted list of test runs, matching given pattern"""
+    # First, get field names from the tag name pattern
+    field_names = [m.group(1) for m in re.finditer(r'{(\w+)}', tag_name)]
+    undef_fields = [f for f in field_names if f not in kwargs.keys()]
+    # Fields for formatting tag name pattern
+    str_fields = dict([(f, '*') for f in field_names])
+    str_fields.update(kwargs)
+    # Get a list of all matching tags
+    tag_pattern = tag_name.format(**str_fields)
+    tags = repo.run_cmd(['tag', '-l', tag_pattern]).splitlines()
+    log.debug("Found %d tags matching pattern '%s'", len(tags), tag_pattern)
+    # Parse undefined fields from tag names
+    str_fields = dict([(f, r'(?P<{}>[\w\-.]+)'.format(f)) for f in field_names])
+    str_fields['commit'] = '(?P<commit>[0-9a-f]{7,40})'
+    str_fields['commit_number'] = '(?P<commit_number>[0-9]{1,7})'
+    str_fields['tag_number'] = '(?P<tag_number>[0-9]{1,5})'
+    str_fields.update(kwargs)
+    tag_re = re.compile(tag_name.format(**str_fields))
+    # Parse fields from tags
+    revs = []
+    for tag in tags:
+        m = tag_re.match(tag)
+        groups = m.groupdict()
+        revs.append([groups[f] for f in undef_fields] + [tag])
+    # Return field names and a sorted list of revs
+    return undef_fields, sorted(revs)
+def list_test_revs(repo, tag_name, **kwargs):
+    """Get list of all tested revisions"""
+    fields, revs = get_test_runs(repo, tag_name, **kwargs)
+    ignore_fields = ['tag_number']
+    print_fields = [i for i, f in enumerate(fields) if f not in ignore_fields]
+    # Sort revs
+    rows = [[fields[i].upper() for i in print_fields] + ['TEST RUNS']]
+    prev = [''] * len(revs)
+    for rev in revs:
+        # Only use fields that we want to print
+        rev = [rev[i] for i in print_fields]
+        if rev != prev:
+            new_row = [''] * len(print_fields) + [1]
+            for i in print_fields:
+                if rev[i] != prev[i]:
+                    break
+            new_row[i:-1] = rev[i:]
+            rows.append(new_row)
+        else:
+            rows[-1][-1] += 1
+        prev = rev
+    print_table(rows)
+def get_test_revs(repo, tag_name, **kwargs):
+    """Get list of all tested revisions"""
+    fields, runs = get_test_runs(repo, tag_name, **kwargs)
+    revs = {}
+    commit_i = fields.index('commit')
+    commit_num_i = fields.index('commit_number')
+    for run in runs:
+        commit = run[commit_i]
+        commit_num = run[commit_num_i]
+        tag = run[-1]
+        if not commit in revs:
+            revs[commit] = TestedRev(commit, commit_num, [tag])
+        else:
+            assert commit_num == revs[commit].commit_number, "Commit numbers do not match"
+            revs[commit].tags.append(tag)
+    # Return in sorted table
+    revs = sorted(revs.values(), key=attrgetter('commit_number'))
+    log.debug("Found %d tested revisions:\n    %s", len(revs),
+              "\n    ".join(['{} ({})'.format(rev.commit_number, rev.commit) for rev in revs]))
+    return revs
+def rev_find(revs, attr, val):
+    """Search from a list of TestedRev"""
+    for i, rev in enumerate(revs):
+        if getattr(rev, attr) == val:
+            return i
+    raise ValueError("Unable to find '{}' value '{}'".format(attr, val))
+def is_xml_format(repo, commit):
+    """Check if the commit contains xml (or json) data"""
+    if repo.rev_parse(commit + ':results.xml'):
+        log.debug("Detected report in xml format in %s", commit)
+        return True
+    else:
+        log.debug("No xml report in %s, assuming json formatted results", commit)
+        return False
+def read_results(repo, tags, xml=True):
+    """Read result files from repo"""
+    def parse_xml_stream(data):
+        """Parse multiple concatenated XML objects"""
+        objs = []
+        xml_d = ""
+        for line in data.splitlines():
+            if xml_d and line.startswith('<?xml version='):
+                objs.append(ET.fromstring(xml_d))
+                xml_d = line
+            else:
+                xml_d += line
+        objs.append(ET.fromstring(xml_d))
+        return objs
+    def parse_json_stream(data):
+        """Parse multiple concatenated JSON objects"""
+        objs = []
+        json_d = ""
+        for line in data.splitlines():
+            if line == '}{':
+                json_d += '}'
+                objs.append(json.loads(json_d, object_pairs_hook=OrderedDict))
+                json_d = '{'
+            else:
+                json_d += line
+        objs.append(json.loads(json_d, object_pairs_hook=OrderedDict))
+        return objs
+    num_revs = len(tags)
+    # Optimize by reading all data with one git command
+    log.debug("Loading raw result data from %d tags, %s...", num_revs, tags[0])
+    if xml:
+        git_objs = [tag + ':metadata.xml' for tag in tags] + [tag + ':results.xml' for tag in tags]
+        data = parse_xml_stream(repo.run_cmd(['show'] + git_objs + ['--']))
+        return ([metadata_xml_to_json(e) for e in data[0:num_revs]],
+                [results_xml_to_json(e) for e in data[num_revs:]])
+    else:
+        git_objs = [tag + ':metadata.json' for tag in tags] + [tag + ':results.json' for tag in tags]
+        data = parse_json_stream(repo.run_cmd(['show'] + git_objs + ['--']))
+        return data[0:num_revs], data[num_revs:]
+def get_data_item(data, key):
+    """Nested getitem lookup"""
+    for k in key.split('.'):
+        data = data[k]
+    return data
+def metadata_diff(metadata_l, metadata_r):
+    """Prepare a metadata diff for printing"""
+    keys = [('Hostname', 'hostname', 'hostname'),
+            ('Branch', 'branch', 'layers.meta.branch'),
+            ('Commit number', 'commit_num', 'layers.meta.commit_count'),
+            ('Commit', 'commit', 'layers.meta.commit'),
+            ('Number of test runs', 'testrun_count', 'testrun_count')
+           ]
+    def _metadata_diff(key):
+        """Diff metadata from two test reports"""
+        try:
+            val1 = get_data_item(metadata_l, key)
+        except KeyError:
+            val1 = '(N/A)'
+        try:
+            val2 = get_data_item(metadata_r, key)
+        except KeyError:
+            val2 = '(N/A)'
+        return val1, val2
+    metadata = OrderedDict()
+    for title, key, key_json in keys:
+        value_l, value_r = _metadata_diff(key_json)
+        metadata[key] = {'title': title,
+                         'value_old': value_l,
+                         'value': value_r}
+    return metadata
+def print_diff_report(metadata_l, data_l, metadata_r, data_r):
+    """Print differences between two data sets"""
+    # First, print general metadata
+    print("\nTEST METADATA:\n==============")
+    meta_diff = metadata_diff(metadata_l, metadata_r)
+    rows = []
+    row_fmt = ['{:{wid}} ', '{:<{wid}}   ', '{:<{wid}}']
+    rows = [['', 'CURRENT COMMIT', 'OOMPARING WITH']]
+    for key, val in meta_diff.items():
+        # Shorten commit hashes
+        if key == 'commit':
+            rows.append([val['title'] + ':', val['value'][:20], val['value_old'][:20]])
+        else:
+            rows.append([val['title'] + ':', val['value'], val['value_old']])
+    print_table(rows, row_fmt)
+    # Print test results
+    print("\nTEST RESULTS:\n=============")
+    tests = list(data_l['tests'].keys())
+    # Append tests that are only present in 'right' set
+    tests += [t for t in list(data_r['tests'].keys()) if t not in tests]
+    # Prepare data to be printed
+    rows = []
+    row_fmt = ['{:8}', '{:{wid}}', '{:{wid}}', '  {:>{wid}}', ' {:{wid}} ', '{:{wid}}',
+               '  {:>{wid}}', '  {:>{wid}}']
+    num_cols = len(row_fmt)
+    for test in tests:
+        test_l = data_l['tests'][test] if test in data_l['tests'] else None
+        test_r = data_r['tests'][test] if test in data_r['tests'] else None
+        pref = ' '
+        if test_l is None:
+            pref = '+'
+        elif test_r is None:
+            pref = '-'
+        descr = test_l['description'] if test_l else test_r['description']
+        heading = "{} {}: {}".format(pref, test, descr)
+        rows.append([heading])
+        # Generate the list of measurements
+        meas_l = test_l['measurements'] if test_l else {}
+        meas_r = test_r['measurements'] if test_r else {}
+        measurements = list(meas_l.keys())
+        measurements += [m for m in list(meas_r.keys()) if m not in measurements]
+        for meas in measurements:
+            m_pref = ' '
+            if meas in meas_l:
+                stats_l = measurement_stats(meas_l[meas], 'l.')
+            else:
+                stats_l = measurement_stats(None, 'l.')
+                m_pref = '+'
+            if meas in meas_r:
+                stats_r = measurement_stats(meas_r[meas], 'r.')
+            else:
+                stats_r = measurement_stats(None, 'r.')
+                m_pref = '-'
+            stats = stats_l.copy()
+            stats.update(stats_r)
+            absdiff = stats['val_cls'](stats['r.mean'] - stats['l.mean'])
+            reldiff = "{:+.1f} %".format(absdiff * 100 / stats['l.mean'])
+            if stats['r.mean'] > stats['l.mean']:
+                absdiff = '+' + str(absdiff)
+            else:
+                absdiff = str(absdiff)
+            rows.append(['', m_pref, stats['name'] + ' ' + stats['quantity'],
+                         str(stats['l.mean']), '->', str(stats['r.mean']),
+                         absdiff, reldiff])
+        rows.append([''] * num_cols)
+    print_table(rows, row_fmt)
+    print()
+def print_html_report(data, id_comp):
+    """Print report in html format"""
+    # Handle metadata
+    metadata = {'branch': {'title': 'Branch', 'value': 'master'},
+                'hostname': {'title': 'Hostname', 'value': 'foobar'},
+                'commit': {'title': 'Commit', 'value': '1234'}
+               }
+    metadata = metadata_diff(data[id_comp][0], data[-1][0])
+    # Generate list of tests
+    tests = []
+    for test in data[-1][1]['tests'].keys():
+        test_r = data[-1][1]['tests'][test]
+        new_test = {'name': test_r['name'],
+                    'description': test_r['description'],
+                    'status': test_r['status'],
+                    'measurements': [],
+                    'err_type': test_r.get('err_type'),
+                   }
+        # Limit length of err output shown
+        if 'message' in test_r:
+            lines = test_r['message'].splitlines()
+            if len(lines) > 20:
+                new_test['message'] = '...\n' + '\n'.join(lines[-20:])
+            else:
+                new_test['message'] = test_r['message']
+        # Generate the list of measurements
+        for meas in test_r['measurements'].keys():
+            meas_r = test_r['measurements'][meas]
+            meas_type = 'time' if meas_r['type'] == 'sysres' else 'size'
+            new_meas = {'name': meas_r['name'],
+                        'legend': meas_r['legend'],
+                        'description': meas_r['name'] + ' ' + meas_type,
+                       }
+            samples = []
+            # Run through all revisions in our data
+            for meta, test_data in data:
+                if (not test in test_data['tests'] or
+                        not meas in test_data['tests'][test]['measurements']):
+                    samples.append(measurement_stats(None))
+                    continue
+                test_i = test_data['tests'][test]
+                meas_i = test_i['measurements'][meas]
+                commit_num = get_data_item(meta, 'layers.meta.commit_count')
+                samples.append(measurement_stats(meas_i))
+                samples[-1]['commit_num'] = commit_num
+            absdiff = samples[-1]['val_cls'](samples[-1]['mean'] - samples[id_comp]['mean'])
+            new_meas['absdiff'] = absdiff
+            new_meas['absdiff_str'] = str(absdiff) if absdiff < 0 else '+' + str(absdiff)
+            new_meas['reldiff'] = "{:+.1f} %".format(absdiff * 100 / samples[id_comp]['mean'])
+            new_meas['samples'] = samples
+            new_meas['value'] = samples[-1]
+            new_meas['value_type'] = samples[-1]['val_cls']
+            new_test['measurements'].append(new_meas)
+        tests.append(new_test)
+    # Chart options
+    chart_opts = {'haxis': {'min': get_data_item(data[0][0], 'layers.meta.commit_count'),
+                            'max': get_data_item(data[0][0], 'layers.meta.commit_count')}
+                 }
+    print(html.template.render(metadata=metadata, test_data=tests, chart_opts=chart_opts))
+def auto_args(repo, args):
+    """Guess arguments, if not defined by the user"""
+    # Get the latest commit in the repo
+    log.debug("Guessing arguments from the latest commit")
+    msg = repo.run_cmd(['log', '-1', '--all', '--format=%b'])
+    for line in msg.splitlines():
+        split = line.split(':', 1)
+        if len(split) != 2:
+            continue
+        key = split[0]
+        val = split[1].strip()
+        if key == 'hostname':
+            log.debug("Using hostname %s", val)
+            args.hostname = val
+        elif key == 'branch':
+            log.debug("Using branch %s", val)
+            args.branch = val
+def parse_args(argv):
+    """Parse command line arguments"""
+    description = """
+Examine build performance test results from a Git repository"""
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        description=description)
+    parser.add_argument('--debug', '-d', action='store_true',
+                        help="Verbose logging")
+    parser.add_argument('--repo', '-r', required=True,
+                        help="Results repository (local git clone)")
+    parser.add_argument('--list', '-l', action='store_true',
+                        help="List available test runs")
+    parser.add_argument('--html', action='store_true',
+                        help="Generate report in html format")
+    group = parser.add_argument_group('Tag and revision')
+    group.add_argument('--tag-name', '-t',
+                       default='{hostname}/{branch}/{machine}/{commit_number}-g{commit}/{tag_number}',
+                       help="Tag name (pattern) for finding results")
+    group.add_argument('--hostname', '-H')
+    group.add_argument('--branch', '-B', default='master')
+    group.add_argument('--machine', default='qemux86')
+    group.add_argument('--history-length', default=25, type=int,
+                       help="Number of tested revisions to plot in html report")
+    group.add_argument('--commit',
+                       help="Revision to search for")
+    group.add_argument('--commit-number',
+                       help="Revision number to search for, redundant if "
+                            "--commit is specified")
+    group.add_argument('--commit2',
+                       help="Revision to compare with")
+    group.add_argument('--commit-number2',
+                       help="Revision number to compare with, redundant if "
+                            "--commit2 is specified")
+    return parser.parse_args(argv)
+def main(argv=None):
+    """Script entry point"""
+    args = parse_args(argv)
+    if args.debug:
+        log.setLevel(logging.DEBUG)
+    repo = GitRepo(args.repo)
+    if args.list:
+        list_test_revs(repo, args.tag_name)
+        return 0
+    # Determine hostname which to use
+    if not args.hostname:
+        auto_args(repo, args)
+    revs = get_test_revs(repo, args.tag_name, hostname=args.hostname,
+                         branch=args.branch, machine=args.machine)
+    if len(revs) < 2:
+        log.error("%d tester revisions found, unable to generate report",
+                  len(revs))
+        return 1
+    # Pick revisions
+    if args.commit:
+        if args.commit_number:
+            log.warning("Ignoring --commit-number as --commit was specified")
+        index1 = rev_find(revs, 'commit', args.commit)
+    elif args.commit_number:
+        index1 = rev_find(revs, 'commit_number', args.commit_number)
+    else:
+        index1 = len(revs) - 1
+    if args.commit2:
+        if args.commit_number2:
+            log.warning("Ignoring --commit-number2 as --commit2 was specified")
+        index2 = rev_find(revs, 'commit', args.commit2)
+    elif args.commit_number2:
+        index2 = rev_find(revs, 'commit_number', args.commit_number2)
+    else:
+        if index1 > 0:
+            index2 = index1 - 1
+        else:
+            log.error("Unable to determine the other commit, use "
+                      "--commit2 or --commit-number2 to specify it")
+            return 1
+    index_l = min(index1, index2)
+    index_r = max(index1, index2)
+    rev_l = revs[index_l]
+    rev_r = revs[index_r]
+    log.debug("Using 'left' revision %s (%s), %s test runs:\n    %s",
+              rev_l.commit_number, rev_l.commit, len(rev_l.tags),
+              '\n    '.join(rev_l.tags))
+    log.debug("Using 'right' revision %s (%s), %s test runs:\n    %s",
+              rev_r.commit_number, rev_r.commit, len(rev_r.tags),
+              '\n    '.join(rev_r.tags))
+    # Check report format used in the repo (assume all reports in the same fmt)
+    xml = is_xml_format(repo, revs[index_r].tags[-1])
+    if args.html:
+        index_0 = max(0, index_r - args.history_length)
+        rev_range = range(index_0, index_r + 1)
+    else:
+        # We do not need range of commits for text report (no graphs)
+        index_0 = index_l
+        rev_range = (index_l, index_r)
+    # Read raw data
+    log.debug("Reading %d revisions, starting from %s (%s)",
+              len(rev_range), revs[index_0].commit_number, revs[index_0].commit)
+    raw_data = [read_results(repo, revs[i].tags, xml) for i in rev_range]
+    data = []
+    for raw_m, raw_d in raw_data:
+        data.append((aggregate_metadata(raw_m), aggregate_data(raw_d)))
+    # Re-map list indexes to the new table starting from index 0
+    index_r = index_r - index_0
+    index_l = index_l - index_0
+    # Print report
+    if not args.html:
+        print_diff_report(data[index_l][0], data[index_l][1],
+                          data[index_r][0], data[index_r][1])
+    else:
+        print_html_report(data, index_l)
+    return 0
+if __name__ == "__main__":
+    sys.exit(main())
author	Markus Lehtonen <markus.lehtonen@linux.intel.com>	2017-03-31 17:07:29 +0300
committer	Richard Purdie <richard.purdie@linuxfoundation.org>	2017-04-01 23:28:20 +0100
commit	9f299876f716f253b0a3d70eb4473a023c593fc5 (patch)
tree	057d934e96df36ac3e28113b11f5b1ce70c7b614 /scripts/oe-build-perf-report
parent	5a85d39c9d5502aabc2dde20f2a16bf7ac9f2d22 (diff)
download	poky-9f299876f716f253b0a3d70eb4473a023c593fc5.tar.gz

diff --git a/scripts/oe-build-perf-report b/scripts/oe-build-perf-report new file mode 100755 index 0000000000..39766135c6 --- /dev/null +++ b/scripts/oe-build-perf-report
@@ -0,0 +1,531 @@
	1	#!/usr/bin/python3
	2	#
	3	# Examine build performance test results
	4	#
	5	# Copyright (c) 2017, Intel Corporation.
	6	#
	7	# This program is free software; you can redistribute it and/or modify it
	8	# under the terms and conditions of the GNU General Public License,
	9	# version 2, as published by the Free Software Foundation.
	10	#
	11	# This program is distributed in the hope it will be useful, but WITHOUT
	12	# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
	13	# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
	14	# more details.
	15	#
	16	import argparse
	17	import json
	18	import logging
	19	import os
	20	import re
	21	import sys
	22	from collections import namedtuple, OrderedDict
	23	from operator import attrgetter
	24	from xml.etree import ElementTree as ET
	25
	26	# Import oe libs
	27	scripts_path = os.path.dirname(os.path.realpath(__file__))
	28	sys.path.append(os.path.join(scripts_path, 'lib'))
	29	import scriptpath
	30	from build_perf import print_table
	31	from build_perf.report import (metadata_xml_to_json, results_xml_to_json,
	32	aggregate_data, aggregate_metadata, measurement_stats)
	33	from build_perf import html
	34
	35	scriptpath.add_oe_lib_path()
	36
	37	from oeqa.utils.git import GitRepo
	38
	39
	40	# Setup logging
	41	logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
	42	log = logging.getLogger('oe-build-perf-report')
	43
	44
	45	# Container class for tester revisions
	46	TestedRev = namedtuple('TestedRev', 'commit commit_number tags')
	47
	48
	49	def get_test_runs(repo, tag_name, **kwargs):
	50	"""Get a sorted list of test runs, matching given pattern"""
	51	# First, get field names from the tag name pattern
	52	field_names = [m.group(1) for m in re.finditer(r'{(\w+)}', tag_name)]
	53	undef_fields = [f for f in field_names if f not in kwargs.keys()]
	54
	55	# Fields for formatting tag name pattern
	56	str_fields = dict([(f, '*') for f in field_names])
	57	str_fields.update(kwargs)
	58
	59	# Get a list of all matching tags
	60	tag_pattern = tag_name.format(**str_fields)
	61	tags = repo.run_cmd(['tag', '-l', tag_pattern]).splitlines()
	62	log.debug("Found %d tags matching pattern '%s'", len(tags), tag_pattern)
	63
	64	# Parse undefined fields from tag names
	65	str_fields = dict([(f, r'(?P<{}>[\w\-.]+)'.format(f)) for f in field_names])
	66	str_fields['commit'] = '(?P<commit>[0-9a-f]{7,40})'
	67	str_fields['commit_number'] = '(?P<commit_number>[0-9]{1,7})'
	68	str_fields['tag_number'] = '(?P<tag_number>[0-9]{1,5})'
	69	str_fields.update(kwargs)
	70	tag_re = re.compile(tag_name.format(**str_fields))
	71
	72	# Parse fields from tags
	73	revs = []
	74	for tag in tags:
	75	m = tag_re.match(tag)
	76	groups = m.groupdict()
	77	revs.append([groups[f] for f in undef_fields] + [tag])
	78
	79	# Return field names and a sorted list of revs
	80	return undef_fields, sorted(revs)
	81
	82	def list_test_revs(repo, tag_name, **kwargs):
	83	"""Get list of all tested revisions"""
	84	fields, revs = get_test_runs(repo, tag_name, **kwargs)
	85	ignore_fields = ['tag_number']
	86	print_fields = [i for i, f in enumerate(fields) if f not in ignore_fields]
	87
	88	# Sort revs
	89	rows = [[fields[i].upper() for i in print_fields] + ['TEST RUNS']]
	90	prev = [''] * len(revs)
	91	for rev in revs:
	92	# Only use fields that we want to print
	93	rev = [rev[i] for i in print_fields]
	94
	95	if rev != prev:
	96	new_row = [''] * len(print_fields) + [1]
	97	for i in print_fields:
	98	if rev[i] != prev[i]:
	99	break
	100	new_row[i:-1] = rev[i:]
	101	rows.append(new_row)
	102	else:
	103	rows[-1][-1] += 1
	104	prev = rev
	105
	106	print_table(rows)
	107
	108	def get_test_revs(repo, tag_name, **kwargs):
	109	"""Get list of all tested revisions"""
	110	fields, runs = get_test_runs(repo, tag_name, **kwargs)
	111
	112	revs = {}
	113	commit_i = fields.index('commit')
	114	commit_num_i = fields.index('commit_number')
	115	for run in runs:
	116	commit = run[commit_i]
	117	commit_num = run[commit_num_i]
	118	tag = run[-1]
	119	if not commit in revs:
	120	revs[commit] = TestedRev(commit, commit_num, [tag])
	121	else:
	122	assert commit_num == revs[commit].commit_number, "Commit numbers do not match"
	123	revs[commit].tags.append(tag)
	124
	125	# Return in sorted table
	126	revs = sorted(revs.values(), key=attrgetter('commit_number'))
	127	log.debug("Found %d tested revisions:\n %s", len(revs),
	128	"\n ".join(['{} ({})'.format(rev.commit_number, rev.commit) for rev in revs]))
	129	return revs
	130
	131	def rev_find(revs, attr, val):
	132	"""Search from a list of TestedRev"""
	133	for i, rev in enumerate(revs):
	134	if getattr(rev, attr) == val:
	135	return i
	136	raise ValueError("Unable to find '{}' value '{}'".format(attr, val))
	137
	138	def is_xml_format(repo, commit):
	139	"""Check if the commit contains xml (or json) data"""
	140	if repo.rev_parse(commit + ':results.xml'):
	141	log.debug("Detected report in xml format in %s", commit)
	142	return True
	143	else:
	144	log.debug("No xml report in %s, assuming json formatted results", commit)
	145	return False
	146
	147	def read_results(repo, tags, xml=True):
	148	"""Read result files from repo"""
	149
	150	def parse_xml_stream(data):
	151	"""Parse multiple concatenated XML objects"""
	152	objs = []
	153	xml_d = ""
	154	for line in data.splitlines():
	155	if xml_d and line.startswith('<?xml version='):
	156	objs.append(ET.fromstring(xml_d))
	157	xml_d = line
	158	else:
	159	xml_d += line
	160	objs.append(ET.fromstring(xml_d))
	161	return objs
	162
	163	def parse_json_stream(data):
	164	"""Parse multiple concatenated JSON objects"""
	165	objs = []
	166	json_d = ""
	167	for line in data.splitlines():
	168	if line == '}{':
	169	json_d += '}'
	170	objs.append(json.loads(json_d, object_pairs_hook=OrderedDict))
	171	json_d = '{'
	172	else:
	173	json_d += line
	174	objs.append(json.loads(json_d, object_pairs_hook=OrderedDict))
	175	return objs
	176
	177	num_revs = len(tags)
	178
	179	# Optimize by reading all data with one git command
	180	log.debug("Loading raw result data from %d tags, %s...", num_revs, tags[0])
	181	if xml:
	182	git_objs = [tag + ':metadata.xml' for tag in tags] + [tag + ':results.xml' for tag in tags]
	183	data = parse_xml_stream(repo.run_cmd(['show'] + git_objs + ['--']))
	184	return ([metadata_xml_to_json(e) for e in data[0:num_revs]],
	185	[results_xml_to_json(e) for e in data[num_revs:]])
	186	else:
	187	git_objs = [tag + ':metadata.json' for tag in tags] + [tag + ':results.json' for tag in tags]
	188	data = parse_json_stream(repo.run_cmd(['show'] + git_objs + ['--']))
	189	return data[0:num_revs], data[num_revs:]
	190
	191
	192	def get_data_item(data, key):
	193	"""Nested getitem lookup"""
	194	for k in key.split('.'):
	195	data = data[k]
	196	return data
	197
	198
	199	def metadata_diff(metadata_l, metadata_r):
	200	"""Prepare a metadata diff for printing"""
	201	keys = [('Hostname', 'hostname', 'hostname'),
	202	('Branch', 'branch', 'layers.meta.branch'),
	203	('Commit number', 'commit_num', 'layers.meta.commit_count'),
	204	('Commit', 'commit', 'layers.meta.commit'),
	205	('Number of test runs', 'testrun_count', 'testrun_count')
	206	]
	207
	208	def _metadata_diff(key):
	209	"""Diff metadata from two test reports"""
	210	try:
	211	val1 = get_data_item(metadata_l, key)
	212	except KeyError:
	213	val1 = '(N/A)'
	214	try:
	215	val2 = get_data_item(metadata_r, key)
	216	except KeyError:
	217	val2 = '(N/A)'
	218	return val1, val2
	219
	220	metadata = OrderedDict()
	221	for title, key, key_json in keys:
	222	value_l, value_r = _metadata_diff(key_json)
	223	metadata[key] = {'title': title,
	224	'value_old': value_l,
	225	'value': value_r}
	226	return metadata
	227
	228
	229	def print_diff_report(metadata_l, data_l, metadata_r, data_r):
	230	"""Print differences between two data sets"""
	231
	232	# First, print general metadata
	233	print("\nTEST METADATA:\n==============")
	234	meta_diff = metadata_diff(metadata_l, metadata_r)
	235	rows = []
	236	row_fmt = ['{:{wid}} ', '{:<{wid}} ', '{:<{wid}}']
	237	rows = [['', 'CURRENT COMMIT', 'OOMPARING WITH']]
	238	for key, val in meta_diff.items():
	239	# Shorten commit hashes
	240	if key == 'commit':
	241	rows.append([val['title'] + ':', val['value'][:20], val['value_old'][:20]])
	242	else:
	243	rows.append([val['title'] + ':', val['value'], val['value_old']])
	244	print_table(rows, row_fmt)
	245
	246
	247	# Print test results
	248	print("\nTEST RESULTS:\n=============")
	249
	250	tests = list(data_l['tests'].keys())
	251	# Append tests that are only present in 'right' set
	252	tests += [t for t in list(data_r['tests'].keys()) if t not in tests]
	253
	254	# Prepare data to be printed
	255	rows = []
	256	row_fmt = ['{:8}', '{:{wid}}', '{:{wid}}', ' {:>{wid}}', ' {:{wid}} ', '{:{wid}}',
	257	' {:>{wid}}', ' {:>{wid}}']
	258	num_cols = len(row_fmt)
	259	for test in tests:
	260	test_l = data_l['tests'][test] if test in data_l['tests'] else None
	261	test_r = data_r['tests'][test] if test in data_r['tests'] else None
	262	pref = ' '
	263	if test_l is None:
	264	pref = '+'
	265	elif test_r is None:
	266	pref = '-'
	267	descr = test_l['description'] if test_l else test_r['description']
	268	heading = "{} {}: {}".format(pref, test, descr)
	269
	270	rows.append([heading])
	271
	272	# Generate the list of measurements
	273	meas_l = test_l['measurements'] if test_l else {}
	274	meas_r = test_r['measurements'] if test_r else {}
	275	measurements = list(meas_l.keys())
	276	measurements += [m for m in list(meas_r.keys()) if m not in measurements]
	277
	278	for meas in measurements:
	279	m_pref = ' '
	280	if meas in meas_l:
	281	stats_l = measurement_stats(meas_l[meas], 'l.')
	282	else:
	283	stats_l = measurement_stats(None, 'l.')
	284	m_pref = '+'
	285	if meas in meas_r:
	286	stats_r = measurement_stats(meas_r[meas], 'r.')
	287	else:
	288	stats_r = measurement_stats(None, 'r.')
	289	m_pref = '-'
	290	stats = stats_l.copy()
	291	stats.update(stats_r)
	292
	293	absdiff = stats['val_cls'](stats['r.mean'] - stats['l.mean'])
	294	reldiff = "{:+.1f} %".format(absdiff * 100 / stats['l.mean'])
	295	if stats['r.mean'] > stats['l.mean']:
	296	absdiff = '+' + str(absdiff)
	297	else:
	298	absdiff = str(absdiff)
	299	rows.append(['', m_pref, stats['name'] + ' ' + stats['quantity'],
	300	str(stats['l.mean']), '->', str(stats['r.mean']),
	301	absdiff, reldiff])
	302	rows.append([''] * num_cols)
	303
	304	print_table(rows, row_fmt)
	305
	306	print()
	307
	308
	309	def print_html_report(data, id_comp):
	310	"""Print report in html format"""
	311	# Handle metadata
	312	metadata = {'branch': {'title': 'Branch', 'value': 'master'},
	313	'hostname': {'title': 'Hostname', 'value': 'foobar'},
	314	'commit': {'title': 'Commit', 'value': '1234'}
	315	}
	316	metadata = metadata_diff(data[id_comp][0], data[-1][0])
	317
	318
	319	# Generate list of tests
	320	tests = []
	321	for test in data[-1][1]['tests'].keys():
	322	test_r = data[-1][1]['tests'][test]
	323	new_test = {'name': test_r['name'],
	324	'description': test_r['description'],
	325	'status': test_r['status'],
	326	'measurements': [],
	327	'err_type': test_r.get('err_type'),
	328	}
	329	# Limit length of err output shown
	330	if 'message' in test_r:
	331	lines = test_r['message'].splitlines()
	332	if len(lines) > 20:
	333	new_test['message'] = '...\n' + '\n'.join(lines[-20:])
	334	else:
	335	new_test['message'] = test_r['message']
	336
	337
	338	# Generate the list of measurements
	339	for meas in test_r['measurements'].keys():
	340	meas_r = test_r['measurements'][meas]
	341	meas_type = 'time' if meas_r['type'] == 'sysres' else 'size'
	342	new_meas = {'name': meas_r['name'],
	343	'legend': meas_r['legend'],
	344	'description': meas_r['name'] + ' ' + meas_type,
	345	}
	346	samples = []
	347
	348	# Run through all revisions in our data
	349	for meta, test_data in data:
	350	if (not test in test_data['tests'] or
	351	not meas in test_data['tests'][test]['measurements']):
	352	samples.append(measurement_stats(None))
	353	continue
	354	test_i = test_data['tests'][test]
	355	meas_i = test_i['measurements'][meas]
	356	commit_num = get_data_item(meta, 'layers.meta.commit_count')
	357	samples.append(measurement_stats(meas_i))
	358	samples[-1]['commit_num'] = commit_num
	359
	360	absdiff = samples[-1]['val_cls'](samples[-1]['mean'] - samples[id_comp]['mean'])
	361	new_meas['absdiff'] = absdiff
	362	new_meas['absdiff_str'] = str(absdiff) if absdiff < 0 else '+' + str(absdiff)
	363	new_meas['reldiff'] = "{:+.1f} %".format(absdiff * 100 / samples[id_comp]['mean'])
	364	new_meas['samples'] = samples
	365	new_meas['value'] = samples[-1]
	366	new_meas['value_type'] = samples[-1]['val_cls']
	367
	368	new_test['measurements'].append(new_meas)
	369	tests.append(new_test)
	370
	371	# Chart options
	372	chart_opts = {'haxis': {'min': get_data_item(data[0][0], 'layers.meta.commit_count'),
	373	'max': get_data_item(data[0][0], 'layers.meta.commit_count')}
	374	}
	375
	376	print(html.template.render(metadata=metadata, test_data=tests, chart_opts=chart_opts))
	377
	378
	379	def auto_args(repo, args):
	380	"""Guess arguments, if not defined by the user"""
	381	# Get the latest commit in the repo
	382	log.debug("Guessing arguments from the latest commit")
	383	msg = repo.run_cmd(['log', '-1', '--all', '--format=%b'])
	384	for line in msg.splitlines():
	385	split = line.split(':', 1)
	386	if len(split) != 2:
	387	continue
	388
	389	key = split[0]
	390	val = split[1].strip()
	391	if key == 'hostname':
	392	log.debug("Using hostname %s", val)
	393	args.hostname = val
	394	elif key == 'branch':
	395	log.debug("Using branch %s", val)
	396	args.branch = val
	397
	398
	399	def parse_args(argv):
	400	"""Parse command line arguments"""
	401	description = """
	402	Examine build performance test results from a Git repository"""
	403	parser = argparse.ArgumentParser(
	404	formatter_class=argparse.ArgumentDefaultsHelpFormatter,
	405	description=description)
	406
	407	parser.add_argument('--debug', '-d', action='store_true',
	408	help="Verbose logging")
	409	parser.add_argument('--repo', '-r', required=True,
	410	help="Results repository (local git clone)")
	411	parser.add_argument('--list', '-l', action='store_true',
	412	help="List available test runs")
	413	parser.add_argument('--html', action='store_true',
	414	help="Generate report in html format")
	415	group = parser.add_argument_group('Tag and revision')
	416	group.add_argument('--tag-name', '-t',
	417	default='{hostname}/{branch}/{machine}/{commit_number}-g{commit}/{tag_number}',
	418	help="Tag name (pattern) for finding results")
	419	group.add_argument('--hostname', '-H')
	420	group.add_argument('--branch', '-B', default='master')
	421	group.add_argument('--machine', default='qemux86')
	422	group.add_argument('--history-length', default=25, type=int,
	423	help="Number of tested revisions to plot in html report")
	424	group.add_argument('--commit',
	425	help="Revision to search for")
	426	group.add_argument('--commit-number',
	427	help="Revision number to search for, redundant if "
	428	"--commit is specified")
	429	group.add_argument('--commit2',
	430	help="Revision to compare with")
	431	group.add_argument('--commit-number2',
	432	help="Revision number to compare with, redundant if "
	433	"--commit2 is specified")
	434
	435	return parser.parse_args(argv)
	436
	437
	438	def main(argv=None):
	439	"""Script entry point"""
	440	args = parse_args(argv)
	441	if args.debug:
	442	log.setLevel(logging.DEBUG)
	443
	444	repo = GitRepo(args.repo)
	445
	446	if args.list:
	447	list_test_revs(repo, args.tag_name)
	448	return 0
	449
	450	# Determine hostname which to use
	451	if not args.hostname:
	452	auto_args(repo, args)
	453
	454	revs = get_test_revs(repo, args.tag_name, hostname=args.hostname,
	455	branch=args.branch, machine=args.machine)
	456	if len(revs) < 2:
	457	log.error("%d tester revisions found, unable to generate report",
	458	len(revs))
	459	return 1
	460
	461	# Pick revisions
	462	if args.commit:
	463	if args.commit_number:
	464	log.warning("Ignoring --commit-number as --commit was specified")
	465	index1 = rev_find(revs, 'commit', args.commit)
	466	elif args.commit_number:
	467	index1 = rev_find(revs, 'commit_number', args.commit_number)
	468	else:
	469	index1 = len(revs) - 1
	470
	471	if args.commit2:
	472	if args.commit_number2:
	473	log.warning("Ignoring --commit-number2 as --commit2 was specified")
	474	index2 = rev_find(revs, 'commit', args.commit2)
	475	elif args.commit_number2:
	476	index2 = rev_find(revs, 'commit_number', args.commit_number2)
	477	else:
	478	if index1 > 0:
	479	index2 = index1 - 1
	480	else:
	481	log.error("Unable to determine the other commit, use "
	482	"--commit2 or --commit-number2 to specify it")
	483	return 1
	484
	485	index_l = min(index1, index2)
	486	index_r = max(index1, index2)
	487
	488	rev_l = revs[index_l]
	489	rev_r = revs[index_r]
	490	log.debug("Using 'left' revision %s (%s), %s test runs:\n %s",
	491	rev_l.commit_number, rev_l.commit, len(rev_l.tags),
	492	'\n '.join(rev_l.tags))
	493	log.debug("Using 'right' revision %s (%s), %s test runs:\n %s",
	494	rev_r.commit_number, rev_r.commit, len(rev_r.tags),
	495	'\n '.join(rev_r.tags))
	496
	497	# Check report format used in the repo (assume all reports in the same fmt)
	498	xml = is_xml_format(repo, revs[index_r].tags[-1])
	499
	500	if args.html:
	501	index_0 = max(0, index_r - args.history_length)
	502	rev_range = range(index_0, index_r + 1)
	503	else:
	504	# We do not need range of commits for text report (no graphs)
	505	index_0 = index_l
	506	rev_range = (index_l, index_r)
	507
	508	# Read raw data
	509	log.debug("Reading %d revisions, starting from %s (%s)",
	510	len(rev_range), revs[index_0].commit_number, revs[index_0].commit)
	511	raw_data = [read_results(repo, revs[i].tags, xml) for i in rev_range]
	512
	513	data = []
	514	for raw_m, raw_d in raw_data:
	515	data.append((aggregate_metadata(raw_m), aggregate_data(raw_d)))
	516
	517	# Re-map list indexes to the new table starting from index 0
	518	index_r = index_r - index_0
	519	index_l = index_l - index_0
	520
	521	# Print report
	522	if not args.html:
	523	print_diff_report(data[index_l][0], data[index_l][1],
	524	data[index_r][0], data[index_r][1])
	525	else:
	526	print_html_report(data, index_l)
	527
	528	return 0
	529
	530	if __name__ == "__main__":
	531	sys.exit(main())