scripts/lib/build_perf/report.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345

#
# Copyright (c) 2017, Intel Corporation.
#
# This program is free software; you can redistribute it and/or modify it
# under the terms and conditions of the GNU General Public License,
# version 2, as published by the Free Software Foundation.
#
# This program is distributed in the hope it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
# more details.
#
"""Handling of build perf test reports"""
from collections import OrderedDict, Mapping, namedtuple
from datetime import datetime, timezone
from numbers import Number
from statistics import mean, stdev, variance


AggregateTestData = namedtuple('AggregateTestData', ['metadata', 'results'])


def isofmt_to_timestamp(string):
    """Convert timestamp string in ISO 8601 format into unix timestamp"""
    if '.' in string:
        dt = datetime.strptime(string, '%Y-%m-%dT%H:%M:%S.%f')
    else:
        dt = datetime.strptime(string, '%Y-%m-%dT%H:%M:%S')
    return dt.replace(tzinfo=timezone.utc).timestamp()


def metadata_xml_to_json(elem):
    """Convert metadata xml into JSON format"""
    assert elem.tag == 'metadata', "Invalid metadata file format"

    def _xml_to_json(elem):
        """Convert xml element to JSON object"""
        out = OrderedDict()
        for child in elem.getchildren():
            key = child.attrib.get('name', child.tag)
            if len(child):
                out[key] = _xml_to_json(child)
            else:
                out[key] = child.text
        return out
    return _xml_to_json(elem)


def results_xml_to_json(elem):
    """Convert results xml into JSON format"""
    rusage_fields = ('ru_utime', 'ru_stime', 'ru_maxrss', 'ru_minflt',
                     'ru_majflt', 'ru_inblock', 'ru_oublock', 'ru_nvcsw',
                     'ru_nivcsw')
    iostat_fields = ('rchar', 'wchar', 'syscr', 'syscw', 'read_bytes',
                     'write_bytes', 'cancelled_write_bytes')

    def _read_measurement(elem):
        """Convert measurement to JSON"""
        data = OrderedDict()
        data['type'] = elem.tag
        data['name'] = elem.attrib['name']
        data['legend'] = elem.attrib['legend']
        values = OrderedDict()

        # SYSRES measurement
        if elem.tag == 'sysres':
            for subel in elem:
                if subel.tag == 'time':
                    values['start_time'] = isofmt_to_timestamp(subel.attrib['timestamp'])
                    values['elapsed_time'] = float(subel.text)
                elif subel.tag == 'rusage':
                    rusage = OrderedDict()
                    for field in rusage_fields:
                        if 'time' in field:
                            rusage[field] = float(subel.attrib[field])
                        else:
                            rusage[field] = int(subel.attrib[field])
                    values['rusage'] = rusage
                elif subel.tag == 'iostat':
                    values['iostat'] = OrderedDict([(f, int(subel.attrib[f]))
                        for f in iostat_fields])
                elif subel.tag == 'buildstats_file':
                    values['buildstats_file'] = subel.text
                else:
                    raise TypeError("Unknown sysres value element '{}'".format(subel.tag))
        # DISKUSAGE measurement
        elif elem.tag == 'diskusage':
            values['size'] = int(elem.find('size').text)
        else:
            raise Exception("Unknown measurement tag '{}'".format(elem.tag))
        data['values'] = values
        return data

    def _read_testcase(elem):
        """Convert testcase into JSON"""
        assert elem.tag == 'testcase', "Expecting 'testcase' element instead of {}".format(elem.tag)

        data = OrderedDict()
        data['name'] = elem.attrib['name']
        data['description'] = elem.attrib['description']
        data['status'] = 'SUCCESS'
        data['start_time'] = isofmt_to_timestamp(elem.attrib['timestamp'])
        data['elapsed_time'] = float(elem.attrib['time'])
        measurements = OrderedDict()

        for subel in elem.getchildren():
            if subel.tag == 'error' or subel.tag == 'failure':
                data['status'] = subel.tag.upper()
                data['message'] = subel.attrib['message']
                data['err_type'] = subel.attrib['type']
                data['err_output'] = subel.text
            elif subel.tag == 'skipped':
                data['status'] = 'SKIPPED'
                data['message'] = subel.text
            else:
                measurements[subel.attrib['name']] = _read_measurement(subel)
        data['measurements'] = measurements
        return data

    def _read_testsuite(elem):
        """Convert suite to JSON"""
        assert elem.tag == 'testsuite', \
                "Expecting 'testsuite' element instead of {}".format(elem.tag)

        data = OrderedDict()
        if 'hostname' in elem.attrib:
            data['tester_host'] = elem.attrib['hostname']
        data['start_time'] = isofmt_to_timestamp(elem.attrib['timestamp'])
        data['elapsed_time'] = float(elem.attrib['time'])
        tests = OrderedDict()

        for case in elem.getchildren():
            tests[case.attrib['name']] = _read_testcase(case)
        data['tests'] = tests
        return data

    # Main function
    assert elem.tag == 'testsuites', "Invalid test report format"
    assert len(elem) == 1, "Too many testsuites"

    return _read_testsuite(elem.getchildren()[0])


def aggregate_metadata(metadata):
    """Aggregate metadata into one, basically a sanity check"""
    mutable_keys = ('pretty_name', 'version_id')

    def aggregate_obj(aggregate, obj, assert_str=True):
        """Aggregate objects together"""
        assert type(aggregate) is type(obj), \
                "Type mismatch: {} != {}".format(type(aggregate), type(obj))
        if isinstance(obj, Mapping):
            assert set(aggregate.keys()) == set(obj.keys())
            for key, val in obj.items():
                aggregate_obj(aggregate[key], val, key not in mutable_keys)
        elif isinstance(obj, list):
            assert len(aggregate) == len(obj)
            for i, val in enumerate(obj):
                aggregate_obj(aggregate[i], val)
        elif not isinstance(obj, str) or (isinstance(obj, str) and assert_str):
            assert aggregate == obj, "Data mismatch {} != {}".format(aggregate, obj)

    if not metadata:
        return {}

    # Do the aggregation
    aggregate = metadata[0].copy()
    for testrun in metadata[1:]:
        aggregate_obj(aggregate, testrun)
    aggregate['testrun_count'] = len(metadata)
    return aggregate


def aggregate_data(data):
    """Aggregate multiple test results JSON structures into one"""

    mutable_keys = ('status', 'message', 'err_type', 'err_output')

    class SampleList(list):
        """Container for numerical samples"""
        pass

    def new_aggregate_obj(obj):
        """Create new object for aggregate"""
        if isinstance(obj, Number):
            new_obj = SampleList()
            new_obj.append(obj)
        elif isinstance(obj, str):
            new_obj = obj
        else:
            # Lists and and dicts are kept as is
            new_obj = obj.__class__()
            aggregate_obj(new_obj, obj)
        return new_obj

    def aggregate_obj(aggregate, obj, assert_str=True):
        """Recursive "aggregation" of JSON objects"""
        if isinstance(obj, Number):
            assert isinstance(aggregate, SampleList)
            aggregate.append(obj)
            return

        assert type(aggregate) == type(obj), \
                "Type mismatch: {} != {}".format(type(aggregate), type(obj))
        if isinstance(obj, Mapping):
            for key, val in obj.items():
                if not key in aggregate:
                    aggregate[key] = new_aggregate_obj(val)
                else:
                    aggregate_obj(aggregate[key], val, key not in mutable_keys)
        elif isinstance(obj, list):
            for i, val in enumerate(obj):
                if i >= len(aggregate):
                    aggregate[key] = new_aggregate_obj(val)
                else:
                    aggregate_obj(aggregate[i], val)
        elif isinstance(obj, str):
            # Sanity check for data
            if assert_str:
                assert aggregate == obj, "Data mismatch {} != {}".format(aggregate, obj)
        else:
            raise Exception("BUG: unable to aggregate '{}' ({})".format(type(obj), str(obj)))

    if not data:
        return {}

    # Do the aggregation
    aggregate = data[0].__class__()
    for testrun in data:
        aggregate_obj(aggregate, testrun)
    return aggregate


class MeasurementVal(float):
    """Base class representing measurement values"""
    gv_data_type = 'number'

    def gv_value(self):
        """Value formatting for visualization"""
        if self != self:
            return "null"
        else:
            return self


class TimeVal(MeasurementVal):
    """Class representing time values"""
    quantity = 'time'
    gv_title = 'elapsed time'
    gv_data_type = 'timeofday'

    def hms(self):
        """Split time into hours, minutes and seconeds"""
        hhh = int(abs(self) / 3600)
        mmm = int((abs(self) % 3600) / 60)
        sss = abs(self) % 60
        return hhh, mmm, sss

    def __str__(self):
        if self != self:
            return "nan"
        hh, mm, ss = self.hms()
        sign = '-' if self < 0 else ''
        if hh > 0:
            return '{}{:d}:{:02d}:{:02.0f}'.format(sign, hh, mm, ss)
        elif mm > 0:
            return '{}{:d}:{:04.1f}'.format(sign, mm, ss)
        elif ss > 1:
            return '{}{:.1f} s'.format(sign, ss)
        else:
            return '{}{:.2f} s'.format(sign, ss)

    def gv_value(self):
        """Value formatting for visualization"""
        if self != self:
            return "null"
        hh, mm, ss = self.hms()
        return [hh, mm, int(ss), int(ss*1000) % 1000]


class SizeVal(MeasurementVal):
    """Class representing time values"""
    quantity = 'size'
    gv_title = 'size in MiB'
    gv_data_type = 'number'

    def __str__(self):
        if self != self:
            return "nan"
        if abs(self) < 1024:
            return '{:.1f} kiB'.format(self)
        elif abs(self) < 1048576:
            return '{:.2f} MiB'.format(self / 1024)
        else:
            return '{:.2f} GiB'.format(self / 1048576)

    def gv_value(self):
        """Value formatting for visualization"""
        if self != self:
            return "null"
        return self / 1024

def measurement_stats(meas, prefix=''):
    """Get statistics of a measurement"""
    if not meas:
        return {prefix + 'sample_cnt': 0,
                prefix + 'mean': MeasurementVal('nan'),
                prefix + 'stdev': MeasurementVal('nan'),
                prefix + 'variance': MeasurementVal('nan'),
                prefix + 'min': MeasurementVal('nan'),
                prefix + 'max': MeasurementVal('nan'),
                prefix + 'minus': MeasurementVal('nan'),
                prefix + 'plus': MeasurementVal('nan')}

    stats = {'name': meas['name']}
    if meas['type'] == 'sysres':
        val_cls = TimeVal
        values = meas['values']['elapsed_time']
    elif meas['type'] == 'diskusage':
        val_cls = SizeVal
        values = meas['values']['size']
    else:
        raise Exception("Unknown measurement type '{}'".format(meas['type']))
    stats['val_cls'] = val_cls
    stats['quantity'] = val_cls.quantity
    stats[prefix + 'sample_cnt'] = len(values)

    mean_val = val_cls(mean(values))
    min_val = val_cls(min(values))
    max_val = val_cls(max(values))

    stats[prefix + 'mean'] = mean_val
    if len(values) > 1:
        stats[prefix + 'stdev'] = val_cls(stdev(values))
        stats[prefix + 'variance'] = val_cls(variance(values))
    else:
        stats[prefix + 'stdev'] = float('nan')
        stats[prefix + 'variance'] = float('nan')
    stats[prefix + 'min'] = min_val
    stats[prefix + 'max'] = max_val
    stats[prefix + 'minus'] = val_cls(mean_val - min_val)
    stats[prefix + 'plus'] = val_cls(max_val - mean_val)

    return stats