summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMarkus Lehtonen <markus.lehtonen@linux.intel.com>2017-09-15 16:04:37 +0300
committerRichard Purdie <richard.purdie@linuxfoundation.org>2017-09-18 11:07:30 +0100
commitb5fb3dd904cd22212c720f3c79d71952ecbaa9c2 (patch)
tree69620698ccaa22f97cfc7b3eb8be3e3118086c19
parent873707489fafaa5e1bd43b03b068df74d5956b00 (diff)
downloadpoky-b5fb3dd904cd22212c720f3c79d71952ecbaa9c2.tar.gz
scripts/buildstats-diff: move code to lib/buildstats.py
Move over code from buildstats-diff to new scripts/lib/buildstats.py module in order to share code related to buildstats processing. Also, refactor the code, introducing new classes to make the code readable, maintainable and easier to debug. [YOCTO #11381] (From OE-Core rev: 8a2cd9afc95919737d8e75234e78bbc52e1494a1) Signed-off-by: Markus Lehtonen <markus.lehtonen@linux.intel.com> Signed-off-by: Ross Burton <ross.burton@intel.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
-rwxr-xr-xscripts/buildstats-diff278
-rw-r--r--scripts/lib/buildstats.py301
2 files changed, 329 insertions, 250 deletions
diff --git a/scripts/buildstats-diff b/scripts/buildstats-diff
index 8e64480eb3..ce82dabee9 100755
--- a/scripts/buildstats-diff
+++ b/scripts/buildstats-diff
@@ -15,15 +15,18 @@
15# 15#
16import argparse 16import argparse
17import glob 17import glob
18import json
19import logging 18import logging
20import math 19import math
21import os 20import os
22import re
23import sys 21import sys
24from collections import namedtuple
25from operator import attrgetter 22from operator import attrgetter
26 23
24# Import oe libs
25scripts_path = os.path.dirname(os.path.realpath(__file__))
26sys.path.append(os.path.join(scripts_path, 'lib'))
27from buildstats import BuildStats, diff_buildstats, taskdiff_fields
28
29
27# Setup logging 30# Setup logging
28logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") 31logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
29log = logging.getLogger() 32log = logging.getLogger()
@@ -34,196 +37,16 @@ class ScriptError(Exception):
34 pass 37 pass
35 38
36 39
37taskdiff_fields = ('pkg', 'pkg_op', 'task', 'task_op', 'value1', 'value2',
38 'absdiff', 'reldiff')
39TaskDiff = namedtuple('TaskDiff', ' '.join(taskdiff_fields))
40
41
42class BSTask(dict):
43 def __init__(self, *args, **kwargs):
44 self['start_time'] = None
45 self['elapsed_time'] = None
46 self['status'] = None
47 self['iostat'] = {}
48 self['rusage'] = {}
49 self['child_rusage'] = {}
50 super(BSTask, self).__init__(*args, **kwargs)
51
52 @property
53 def cputime(self):
54 """Sum of user and system time taken by the task"""
55 rusage = self['rusage']['ru_stime'] + self['rusage']['ru_utime']
56 if self['child_rusage']:
57 # Child rusage may have been optimized out
58 return rusage + self['child_rusage']['ru_stime'] + self['child_rusage']['ru_utime']
59 else:
60 return rusage
61
62 @property
63 def walltime(self):
64 """Elapsed wall clock time"""
65 return self['elapsed_time']
66
67 @property
68 def read_bytes(self):
69 """Bytes read from the block layer"""
70 return self['iostat']['read_bytes']
71
72 @property
73 def write_bytes(self):
74 """Bytes written to the block layer"""
75 return self['iostat']['write_bytes']
76
77 @property
78 def read_ops(self):
79 """Number of read operations on the block layer"""
80 if self['child_rusage']:
81 # Child rusage may have been optimized out
82 return self['rusage']['ru_inblock'] + self['child_rusage']['ru_inblock']
83 else:
84 return self['rusage']['ru_inblock']
85
86 @property
87 def write_ops(self):
88 """Number of write operations on the block layer"""
89 if self['child_rusage']:
90 # Child rusage may have been optimized out
91 return self['rusage']['ru_oublock'] + self['child_rusage']['ru_oublock']
92 else:
93 return self['rusage']['ru_oublock']
94
95
96def read_buildstats_file(buildstat_file):
97 """Convert buildstat text file into dict/json"""
98 bs_task = BSTask()
99 log.debug("Reading task buildstats from %s", buildstat_file)
100 end_time = None
101 with open(buildstat_file) as fobj:
102 for line in fobj.readlines():
103 key, val = line.split(':', 1)
104 val = val.strip()
105 if key == 'Started':
106 start_time = float(val)
107 bs_task['start_time'] = start_time
108 elif key == 'Ended':
109 end_time = float(val)
110 elif key.startswith('IO '):
111 split = key.split()
112 bs_task['iostat'][split[1]] = int(val)
113 elif key.find('rusage') >= 0:
114 split = key.split()
115 ru_key = split[-1]
116 if ru_key in ('ru_stime', 'ru_utime'):
117 val = float(val)
118 else:
119 val = int(val)
120 ru_type = 'rusage' if split[0] == 'rusage' else \
121 'child_rusage'
122 bs_task[ru_type][ru_key] = val
123 elif key == 'Status':
124 bs_task['status'] = val
125 if end_time is not None and start_time is not None:
126 bs_task['elapsed_time'] = end_time - start_time
127 else:
128 raise ScriptError("{} looks like a invalid buildstats file".format(buildstat_file))
129 return bs_task
130
131
132def read_buildstats_dir(bs_dir):
133 """Read buildstats directory"""
134 def split_nevr(nevr):
135 """Split name and version information from recipe "nevr" string"""
136 n_e_v, revision = nevr.rsplit('-', 1)
137 match = re.match(r'^(?P<name>\S+)-((?P<epoch>[0-9]{1,5})_)?(?P<version>[0-9]\S*)$',
138 n_e_v)
139 if not match:
140 # If we're not able to parse a version starting with a number, just
141 # take the part after last dash
142 match = re.match(r'^(?P<name>\S+)-((?P<epoch>[0-9]{1,5})_)?(?P<version>[^-]+)$',
143 n_e_v)
144 name = match.group('name')
145 version = match.group('version')
146 epoch = match.group('epoch')
147 return name, epoch, version, revision
148
149 if not os.path.isfile(os.path.join(bs_dir, 'build_stats')):
150 raise ScriptError("{} does not look like a buildstats directory".format(bs_dir))
151
152 log.debug("Reading buildstats directory %s", bs_dir)
153
154 buildstats = {}
155 subdirs = os.listdir(bs_dir)
156 for dirname in subdirs:
157 recipe_dir = os.path.join(bs_dir, dirname)
158 if not os.path.isdir(recipe_dir):
159 continue
160 name, epoch, version, revision = split_nevr(dirname)
161 recipe_bs = {'nevr': dirname,
162 'name': name,
163 'epoch': epoch,
164 'version': version,
165 'revision': revision,
166 'tasks': {}}
167 for task in os.listdir(recipe_dir):
168 recipe_bs['tasks'][task] = [read_buildstats_file(
169 os.path.join(recipe_dir, task))]
170 if name in buildstats:
171 raise ScriptError("Cannot handle multiple versions of the same "
172 "package ({})".format(name))
173 buildstats[name] = recipe_bs
174
175 return buildstats
176
177
178def bs_append(dst, src):
179 """Append data from another buildstats"""
180 if set(dst.keys()) != set(src.keys()):
181 raise ScriptError("Refusing to join buildstats, set of packages is "
182 "different")
183 for pkg, data in dst.items():
184 if data['nevr'] != src[pkg]['nevr']:
185 raise ScriptError("Refusing to join buildstats, package version "
186 "differs: {} vs. {}".format(data['nevr'], src[pkg]['nevr']))
187 if set(data['tasks'].keys()) != set(src[pkg]['tasks'].keys()):
188 raise ScriptError("Refusing to join buildstats, set of tasks "
189 "in {} differ".format(pkg))
190 for taskname, taskdata in data['tasks'].items():
191 taskdata.extend(src[pkg]['tasks'][taskname])
192
193
194def read_buildstats_json(path):
195 """Read buildstats from JSON file"""
196 buildstats = {}
197 with open(path) as fobj:
198 bs_json = json.load(fobj)
199 for recipe_bs in bs_json:
200 if recipe_bs['name'] in buildstats:
201 raise ScriptError("Cannot handle multiple versions of the same "
202 "package ({})".format(recipe_bs['name']))
203
204 if recipe_bs['epoch'] is None:
205 recipe_bs['nevr'] = "{}-{}-{}".format(recipe_bs['name'], recipe_bs['version'], recipe_bs['revision'])
206 else:
207 recipe_bs['nevr'] = "{}-{}_{}-{}".format(recipe_bs['name'], recipe_bs['epoch'], recipe_bs['version'], recipe_bs['revision'])
208
209 for task, data in recipe_bs['tasks'].copy().items():
210 recipe_bs['tasks'][task] = [BSTask(data)]
211
212 buildstats[recipe_bs['name']] = recipe_bs
213
214 return buildstats
215
216
217def read_buildstats(path, multi): 40def read_buildstats(path, multi):
218 """Read buildstats""" 41 """Read buildstats"""
219 if not os.path.exists(path): 42 if not os.path.exists(path):
220 raise ScriptError("No such file or directory: {}".format(path)) 43 raise ScriptError("No such file or directory: {}".format(path))
221 44
222 if os.path.isfile(path): 45 if os.path.isfile(path):
223 return read_buildstats_json(path) 46 return BuildStats.from_file_json(path)
224 47
225 if os.path.isfile(os.path.join(path, 'build_stats')): 48 if os.path.isfile(os.path.join(path, 'build_stats')):
226 return read_buildstats_dir(path) 49 return BuildStats.from_dir(path)
227 50
228 # Handle a non-buildstat directory 51 # Handle a non-buildstat directory
229 subpaths = sorted(glob.glob(path + '/*')) 52 subpaths = sorted(glob.glob(path + '/*'))
@@ -238,17 +61,16 @@ def read_buildstats(path, multi):
238 bs = None 61 bs = None
239 for subpath in subpaths: 62 for subpath in subpaths:
240 if os.path.isfile(subpath): 63 if os.path.isfile(subpath):
241 tmpbs = read_buildstats_json(subpath) 64 _bs = BuildStats.from_file_json(subpath)
242 else: 65 else:
243 tmpbs = read_buildstats_dir(subpath) 66 _bs = BuildStats.from_dir(subpath)
244 if not bs: 67 if bs is None:
245 bs = tmpbs 68 bs = _bs
246 else: 69 else:
247 log.debug("Joining buildstats") 70 bs.aggregate(_bs)
248 bs_append(bs, tmpbs)
249
250 if not bs: 71 if not bs:
251 raise ScriptError("No buildstats found under {}".format(path)) 72 raise ScriptError("No buildstats found under {}".format(path))
73
252 return bs 74 return bs
253 75
254 76
@@ -266,11 +88,11 @@ def print_ver_diff(bs1, bs2):
266 common_pkgs = pkgs2.intersection(pkgs1) 88 common_pkgs = pkgs2.intersection(pkgs1)
267 if common_pkgs: 89 if common_pkgs:
268 for pkg in common_pkgs: 90 for pkg in common_pkgs:
269 if bs1[pkg]['epoch'] != bs2[pkg]['epoch']: 91 if bs1[pkg].epoch != bs2[pkg].epoch:
270 echanged.append(pkg) 92 echanged.append(pkg)
271 elif bs1[pkg]['version'] != bs2[pkg]['version']: 93 elif bs1[pkg].version != bs2[pkg].version:
272 vchanged.append(pkg) 94 vchanged.append(pkg)
273 elif bs1[pkg]['revision'] != bs2[pkg]['revision']: 95 elif bs1[pkg].revision != bs2[pkg].revision:
274 rchanged.append(pkg) 96 rchanged.append(pkg)
275 else: 97 else:
276 unchanged.append(pkg) 98 unchanged.append(pkg)
@@ -288,37 +110,37 @@ def print_ver_diff(bs1, bs2):
288 print("\nNEW PACKAGES:") 110 print("\nNEW PACKAGES:")
289 print("-------------") 111 print("-------------")
290 for pkg in sorted(new_pkgs): 112 for pkg in sorted(new_pkgs):
291 print(fmt_str.format(pkg, bs2[pkg]['nevr'], maxlen=maxlen)) 113 print(fmt_str.format(pkg, bs2[pkg].nevr, maxlen=maxlen))
292 114
293 if deleted_pkgs: 115 if deleted_pkgs:
294 print("\nDELETED PACKAGES:") 116 print("\nDELETED PACKAGES:")
295 print("-----------------") 117 print("-----------------")
296 for pkg in sorted(deleted_pkgs): 118 for pkg in sorted(deleted_pkgs):
297 print(fmt_str.format(pkg, bs1[pkg]['nevr'], maxlen=maxlen)) 119 print(fmt_str.format(pkg, bs1[pkg].nevr, maxlen=maxlen))
298 120
299 fmt_str = " {0:{maxlen}} {1:<20} ({2})" 121 fmt_str = " {0:{maxlen}} {1:<20} ({2})"
300 if rchanged: 122 if rchanged:
301 print("\nREVISION CHANGED:") 123 print("\nREVISION CHANGED:")
302 print("-----------------") 124 print("-----------------")
303 for pkg in sorted(rchanged): 125 for pkg in sorted(rchanged):
304 field1 = "{} -> {}".format(pkg, bs1[pkg]['revision'], bs2[pkg]['revision']) 126 field1 = "{} -> {}".format(pkg, bs1[pkg].revision, bs2[pkg].revision)
305 field2 = "{} -> {}".format(bs1[pkg]['nevr'], bs2[pkg]['nevr']) 127 field2 = "{} -> {}".format(bs1[pkg].nevr, bs2[pkg].nevr)
306 print(fmt_str.format(pkg, field1, field2, maxlen=maxlen)) 128 print(fmt_str.format(pkg, field1, field2, maxlen=maxlen))
307 129
308 if vchanged: 130 if vchanged:
309 print("\nVERSION CHANGED:") 131 print("\nVERSION CHANGED:")
310 print("----------------") 132 print("----------------")
311 for pkg in sorted(vchanged): 133 for pkg in sorted(vchanged):
312 field1 = "{} -> {}".format(bs1[pkg]['version'], bs2[pkg]['version']) 134 field1 = "{} -> {}".format(bs1[pkg].version, bs2[pkg].version)
313 field2 = "{} -> {}".format(bs1[pkg]['nevr'], bs2[pkg]['nevr']) 135 field2 = "{} -> {}".format(bs1[pkg].nevr, bs2[pkg].nevr)
314 print(fmt_str.format(pkg, field1, field2, maxlen=maxlen)) 136 print(fmt_str.format(pkg, field1, field2, maxlen=maxlen))
315 137
316 if echanged: 138 if echanged:
317 print("\nEPOCH CHANGED:") 139 print("\nEPOCH CHANGED:")
318 print("--------------") 140 print("--------------")
319 for pkg in sorted(echanged): 141 for pkg in sorted(echanged):
320 field1 = "{} -> {}".format(bs1[pkg]['epoch'], bs2[pkg]['epoch']) 142 field1 = "{} -> {}".format(bs1[pkg].epoch, bs2[pkg].epoch)
321 field2 = "{} -> {}".format(bs1[pkg]['nevr'], bs2[pkg]['nevr']) 143 field2 = "{} -> {}".format(bs1[pkg].nevr, bs2[pkg].nevr)
322 print(fmt_str.format(pkg, field1, field2, maxlen=maxlen)) 144 print(fmt_str.format(pkg, field1, field2, maxlen=maxlen))
323 145
324 146
@@ -359,12 +181,10 @@ def print_task_diff(bs1, bs2, val_type, min_val=0, min_absdiff=0, sort_by=('absd
359 """Get cumulative sum of all tasks""" 181 """Get cumulative sum of all tasks"""
360 total = 0.0 182 total = 0.0
361 for recipe_data in buildstats.values(): 183 for recipe_data in buildstats.values():
362 for bs_task in recipe_data['tasks'].values(): 184 for bs_task in recipe_data.tasks.values():
363 total += sum([getattr(b, val_type) for b in bs_task]) / len(bs_task) 185 total += getattr(bs_task, val_type)
364 return total 186 return total
365 187
366 tasks_diff = []
367
368 if min_val: 188 if min_val:
369 print("Ignoring tasks less than {} ({})".format( 189 print("Ignoring tasks less than {} ({})".format(
370 val_to_str(min_val, True), val_to_str(min_val))) 190 val_to_str(min_val, True), val_to_str(min_val)))
@@ -373,49 +193,7 @@ def print_task_diff(bs1, bs2, val_type, min_val=0, min_absdiff=0, sort_by=('absd
373 val_to_str(min_absdiff, True), val_to_str(min_absdiff))) 193 val_to_str(min_absdiff, True), val_to_str(min_absdiff)))
374 194
375 # Prepare the data 195 # Prepare the data
376 pkgs = set(bs1.keys()).union(set(bs2.keys())) 196 tasks_diff = diff_buildstats(bs1, bs2, val_type, min_val, min_absdiff)
377 for pkg in pkgs:
378 tasks1 = bs1[pkg]['tasks'] if pkg in bs1 else {}
379 tasks2 = bs2[pkg]['tasks'] if pkg in bs2 else {}
380 if not tasks1:
381 pkg_op = '+ '
382 elif not tasks2:
383 pkg_op = '- '
384 else:
385 pkg_op = ' '
386
387 for task in set(tasks1.keys()).union(set(tasks2.keys())):
388 task_op = ' '
389 if task in tasks1:
390 # Average over all values
391 val1 = [getattr(b, val_type) for b in bs1[pkg]['tasks'][task]]
392 val1 = sum(val1) / len(val1)
393 else:
394 task_op = '+ '
395 val1 = 0
396 if task in tasks2:
397 # Average over all values
398 val2 = [getattr(b, val_type) for b in bs2[pkg]['tasks'][task]]
399 val2 = sum(val2) / len(val2)
400 else:
401 val2 = 0
402 task_op = '- '
403
404 if val1 == 0:
405 reldiff = float('inf')
406 else:
407 reldiff = 100 * (val2 - val1) / val1
408
409 if max(val1, val2) < min_val:
410 log.debug("Filtering out %s:%s (%s)", pkg, task,
411 val_to_str(max(val1, val2)))
412 continue
413 if abs(val2 - val1) < min_absdiff:
414 log.debug("Filtering out %s:%s (difference of %s)", pkg, task,
415 val_to_str(val2-val1))
416 continue
417 tasks_diff.append(TaskDiff(pkg, pkg_op, task, task_op, val1, val2,
418 val2-val1, reldiff))
419 197
420 # Sort our list 198 # Sort our list
421 for field in reversed(sort_by): 199 for field in reversed(sort_by):
diff --git a/scripts/lib/buildstats.py b/scripts/lib/buildstats.py
new file mode 100644
index 0000000000..9eb60b1c69
--- /dev/null
+++ b/scripts/lib/buildstats.py
@@ -0,0 +1,301 @@
1#
2# Copyright (c) 2017, Intel Corporation.
3#
4# This program is free software; you can redistribute it and/or modify it
5# under the terms and conditions of the GNU General Public License,
6# version 2, as published by the Free Software Foundation.
7#
8# This program is distributed in the hope it will be useful, but WITHOUT
9# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11# more details.
12#
13"""Functionality for analyzing buildstats"""
14import json
15import logging
16import os
17import re
18from collections import namedtuple
19from statistics import mean
20
21
22log = logging.getLogger()
23
24
25taskdiff_fields = ('pkg', 'pkg_op', 'task', 'task_op', 'value1', 'value2',
26 'absdiff', 'reldiff')
27TaskDiff = namedtuple('TaskDiff', ' '.join(taskdiff_fields))
28
29
30class BSError(Exception):
31 """Error handling of buildstats"""
32 pass
33
34
35class BSTask(dict):
36 def __init__(self, *args, **kwargs):
37 self['start_time'] = None
38 self['elapsed_time'] = None
39 self['status'] = None
40 self['iostat'] = {}
41 self['rusage'] = {}
42 self['child_rusage'] = {}
43 super(BSTask, self).__init__(*args, **kwargs)
44
45 @property
46 def cputime(self):
47 """Sum of user and system time taken by the task"""
48 rusage = self['rusage']['ru_stime'] + self['rusage']['ru_utime']
49 if self['child_rusage']:
50 # Child rusage may have been optimized out
51 return rusage + self['child_rusage']['ru_stime'] + self['child_rusage']['ru_utime']
52 else:
53 return rusage
54
55 @property
56 def walltime(self):
57 """Elapsed wall clock time"""
58 return self['elapsed_time']
59
60 @property
61 def read_bytes(self):
62 """Bytes read from the block layer"""
63 return self['iostat']['read_bytes']
64
65 @property
66 def write_bytes(self):
67 """Bytes written to the block layer"""
68 return self['iostat']['write_bytes']
69
70 @property
71 def read_ops(self):
72 """Number of read operations on the block layer"""
73 if self['child_rusage']:
74 # Child rusage may have been optimized out
75 return self['rusage']['ru_inblock'] + self['child_rusage']['ru_inblock']
76 else:
77 return self['rusage']['ru_inblock']
78
79 @property
80 def write_ops(self):
81 """Number of write operations on the block layer"""
82 if self['child_rusage']:
83 # Child rusage may have been optimized out
84 return self['rusage']['ru_oublock'] + self['child_rusage']['ru_oublock']
85 else:
86 return self['rusage']['ru_oublock']
87
88 @classmethod
89 def from_file(cls, buildstat_file):
90 """Read buildstat text file"""
91 bs_task = cls()
92 log.debug("Reading task buildstats from %s", buildstat_file)
93 end_time = None
94 with open(buildstat_file) as fobj:
95 for line in fobj.readlines():
96 key, val = line.split(':', 1)
97 val = val.strip()
98 if key == 'Started':
99 start_time = float(val)
100 bs_task['start_time'] = start_time
101 elif key == 'Ended':
102 end_time = float(val)
103 elif key.startswith('IO '):
104 split = key.split()
105 bs_task['iostat'][split[1]] = int(val)
106 elif key.find('rusage') >= 0:
107 split = key.split()
108 ru_key = split[-1]
109 if ru_key in ('ru_stime', 'ru_utime'):
110 val = float(val)
111 else:
112 val = int(val)
113 ru_type = 'rusage' if split[0] == 'rusage' else \
114 'child_rusage'
115 bs_task[ru_type][ru_key] = val
116 elif key == 'Status':
117 bs_task['status'] = val
118 if end_time is not None and start_time is not None:
119 bs_task['elapsed_time'] = end_time - start_time
120 else:
121 raise BSError("{} looks like a invalid buildstats file".format(buildstat_file))
122 return bs_task
123
124
125class BSTaskAggregate(object):
126 """Class representing multiple runs of the same task"""
127 properties = ('cputime', 'walltime', 'read_bytes', 'write_bytes',
128 'read_ops', 'write_ops')
129
130 def __init__(self, tasks=None):
131 self._tasks = tasks or []
132 self._properties = {}
133
134 def __getattr__(self, name):
135 if name in self.properties:
136 if name not in self._properties:
137 # Calculate properties on demand only. We only provide mean
138 # value, so far
139 self._properties[name] = mean([getattr(t, name) for t in self._tasks])
140 return self._properties[name]
141 else:
142 raise AttributeError("'BSTaskAggregate' has no attribute '{}'".format(name))
143
144 def append(self, task):
145 """Append new task"""
146 # Reset pre-calculated properties
147 assert isinstance(task, BSTask), "Type is '{}' instead of 'BSTask'".format(type(task))
148 self._properties = {}
149 self._tasks.append(task)
150
151
152class BSRecipe(object):
153 """Class representing buildstats of one recipe"""
154 def __init__(self, name, epoch, version, revision):
155 self.name = name
156 self.epoch = epoch
157 self.version = version
158 self.revision = revision
159 if epoch is None:
160 self.nevr = "{}-{}-{}".format(name, version, revision)
161 else:
162 self.nevr = "{}-{}_{}-{}".format(name, epoch, version, revision)
163 self.tasks = {}
164
165 def aggregate(self, bsrecipe):
166 """Aggregate data of another recipe buildstats"""
167 if self.nevr != bsrecipe.nevr:
168 raise ValueError("Refusing to aggregate buildstats, recipe version "
169 "differs: {} vs. {}".format(self.nevr, bsrecipe.nevr))
170 if set(self.tasks.keys()) != set(bsrecipe.tasks.keys()):
171 raise ValueError("Refusing to aggregate buildstats, set of tasks "
172 "in {} differ".format(self.name))
173
174 for taskname, taskdata in bsrecipe.tasks.items():
175 if not isinstance(self.tasks[taskname], BSTaskAggregate):
176 self.tasks[taskname] = BSTaskAggregate([self.tasks[taskname]])
177 self.tasks[taskname].append(taskdata)
178
179
180class BuildStats(dict):
181 """Class representing buildstats of one build"""
182
183 @classmethod
184 def from_json(cls, bs_json):
185 """Create new BuildStats object from JSON object"""
186 buildstats = cls()
187 for recipe in bs_json:
188 if recipe['name'] in buildstats:
189 raise BSError("Cannot handle multiple versions of the same "
190 "package ({})".format(recipe['name']))
191 bsrecipe = BSRecipe(recipe['name'], recipe['epoch'],
192 recipe['version'], recipe['revision'])
193 for task, data in recipe['tasks'].items():
194 bsrecipe.tasks[task] = BSTask(data)
195
196 buildstats[recipe['name']] = bsrecipe
197
198 return buildstats
199
200 @staticmethod
201 def from_file_json(path):
202 """Load buildstats from a JSON file"""
203 with open(path) as fobj:
204 bs_json = json.load(fobj)
205 return BuildStats.from_json(bs_json)
206
207
208 @staticmethod
209 def split_nevr(nevr):
210 """Split name and version information from recipe "nevr" string"""
211 n_e_v, revision = nevr.rsplit('-', 1)
212 match = re.match(r'^(?P<name>\S+)-((?P<epoch>[0-9]{1,5})_)?(?P<version>[0-9]\S*)$',
213 n_e_v)
214 if not match:
215 # If we're not able to parse a version starting with a number, just
216 # take the part after last dash
217 match = re.match(r'^(?P<name>\S+)-((?P<epoch>[0-9]{1,5})_)?(?P<version>[^-]+)$',
218 n_e_v)
219 name = match.group('name')
220 version = match.group('version')
221 epoch = match.group('epoch')
222 return name, epoch, version, revision
223
224 @classmethod
225 def from_dir(cls, path):
226 """Load buildstats from a buildstats directory"""
227 if not os.path.isfile(os.path.join(path, 'build_stats')):
228 raise BSError("{} does not look like a buildstats directory".format(path))
229
230 log.debug("Reading buildstats directory %s", path)
231
232 buildstats = cls()
233 subdirs = os.listdir(path)
234 for dirname in subdirs:
235 recipe_dir = os.path.join(path, dirname)
236 if not os.path.isdir(recipe_dir):
237 continue
238 name, epoch, version, revision = cls.split_nevr(dirname)
239 bsrecipe = BSRecipe(name, epoch, version, revision)
240 for task in os.listdir(recipe_dir):
241 bsrecipe.tasks[task] = BSTask.from_file(
242 os.path.join(recipe_dir, task))
243 if name in buildstats:
244 raise BSError("Cannot handle multiple versions of the same "
245 "package ({})".format(name))
246 buildstats[name] = bsrecipe
247
248 return buildstats
249
250 def aggregate(self, buildstats):
251 """Aggregate other buildstats into this"""
252 if set(self.keys()) != set(buildstats.keys()):
253 raise ValueError("Refusing to aggregate buildstats, set of "
254 "recipes is different")
255 for pkg, data in buildstats.items():
256 self[pkg].aggregate(data)
257
258
259def diff_buildstats(bs1, bs2, stat_attr, min_val=None, min_absdiff=None):
260 """Compare the tasks of two buildstats"""
261 tasks_diff = []
262 pkgs = set(bs1.keys()).union(set(bs2.keys()))
263 for pkg in pkgs:
264 tasks1 = bs1[pkg].tasks if pkg in bs1 else {}
265 tasks2 = bs2[pkg].tasks if pkg in bs2 else {}
266 if not tasks1:
267 pkg_op = '+'
268 elif not tasks2:
269 pkg_op = '-'
270 else:
271 pkg_op = ' '
272
273 for task in set(tasks1.keys()).union(set(tasks2.keys())):
274 task_op = ' '
275 if task in tasks1:
276 val1 = getattr(bs1[pkg].tasks[task], stat_attr)
277 else:
278 task_op = '+'
279 val1 = 0
280 if task in tasks2:
281 val2 = getattr(bs2[pkg].tasks[task], stat_attr)
282 else:
283 val2 = 0
284 task_op = '-'
285
286 if val1 == 0:
287 reldiff = float('inf')
288 else:
289 reldiff = 100 * (val2 - val1) / val1
290
291 if min_val and max(val1, val2) < min_val:
292 log.debug("Filtering out %s:%s (%s)", pkg, task,
293 max(val1, val2))
294 continue
295 if min_absdiff and abs(val2 - val1) < min_absdiff:
296 log.debug("Filtering out %s:%s (difference of %s)", pkg, task,
297 val2-val1)
298 continue
299 tasks_diff.append(TaskDiff(pkg, pkg_op, task, task_op, val1, val2,
300 val2-val1, reldiff))
301 return tasks_diff