diff options
Diffstat (limited to 'meta/lib/oe/buildhistory_analysis.py')
-rw-r--r-- | meta/lib/oe/buildhistory_analysis.py | 453 |
1 files changed, 453 insertions, 0 deletions
diff --git a/meta/lib/oe/buildhistory_analysis.py b/meta/lib/oe/buildhistory_analysis.py new file mode 100644 index 0000000000..86b5a12347 --- /dev/null +++ b/meta/lib/oe/buildhistory_analysis.py | |||
@@ -0,0 +1,453 @@ | |||
1 | # Report significant differences in the buildhistory repository since a specific revision | ||
2 | # | ||
3 | # Copyright (C) 2012 Intel Corporation | ||
4 | # Author: Paul Eggleton <paul.eggleton@linux.intel.com> | ||
5 | # | ||
6 | # Note: requires GitPython 0.3.1+ | ||
7 | # | ||
8 | # You can use this from the command line by running scripts/buildhistory-diff | ||
9 | # | ||
10 | |||
11 | import sys | ||
12 | import os.path | ||
13 | import difflib | ||
14 | import git | ||
15 | import re | ||
16 | import bb.utils | ||
17 | |||
18 | |||
19 | # How to display fields | ||
20 | list_fields = ['DEPENDS', 'RPROVIDES', 'RDEPENDS', 'RRECOMMENDS', 'RSUGGESTS', 'RREPLACES', 'RCONFLICTS', 'FILES', 'FILELIST', 'USER_CLASSES', 'IMAGE_CLASSES', 'IMAGE_FEATURES', 'IMAGE_LINGUAS', 'IMAGE_INSTALL', 'BAD_RECOMMENDATIONS'] | ||
21 | list_order_fields = ['PACKAGES'] | ||
22 | defaultval_fields = ['PKG', 'PKGE', 'PKGV', 'PKGR'] | ||
23 | numeric_fields = ['PKGSIZE', 'IMAGESIZE'] | ||
24 | # Fields to monitor | ||
25 | monitor_fields = ['RPROVIDES', 'RDEPENDS', 'RRECOMMENDS', 'RREPLACES', 'RCONFLICTS', 'PACKAGES', 'FILELIST', 'PKGSIZE', 'IMAGESIZE', 'PKG', 'PKGE', 'PKGV', 'PKGR'] | ||
26 | # Percentage change to alert for numeric fields | ||
27 | monitor_numeric_threshold = 10 | ||
28 | # Image files to monitor (note that image-info.txt is handled separately) | ||
29 | img_monitor_files = ['installed-package-names.txt', 'files-in-image.txt'] | ||
30 | # Related context fields for reporting (note: PE, PV & PR are always reported for monitored package fields) | ||
31 | related_fields = {} | ||
32 | related_fields['RDEPENDS'] = ['DEPENDS'] | ||
33 | related_fields['RRECOMMENDS'] = ['DEPENDS'] | ||
34 | related_fields['FILELIST'] = ['FILES'] | ||
35 | related_fields['PKGSIZE'] = ['FILELIST'] | ||
36 | related_fields['files-in-image.txt'] = ['installed-package-names.txt', 'USER_CLASSES', 'IMAGE_CLASSES', 'ROOTFS_POSTPROCESS_COMMAND', 'IMAGE_POSTPROCESS_COMMAND'] | ||
37 | related_fields['installed-package-names.txt'] = ['IMAGE_FEATURES', 'IMAGE_LINGUAS', 'IMAGE_INSTALL', 'BAD_RECOMMENDATIONS'] | ||
38 | |||
39 | |||
40 | class ChangeRecord: | ||
41 | def __init__(self, path, fieldname, oldvalue, newvalue, monitored): | ||
42 | self.path = path | ||
43 | self.fieldname = fieldname | ||
44 | self.oldvalue = oldvalue | ||
45 | self.newvalue = newvalue | ||
46 | self.monitored = monitored | ||
47 | self.related = [] | ||
48 | self.filechanges = None | ||
49 | |||
50 | def __str__(self): | ||
51 | return self._str_internal(True) | ||
52 | |||
53 | def _str_internal(self, outer): | ||
54 | if outer: | ||
55 | if '/image-files/' in self.path: | ||
56 | prefix = '%s: ' % self.path.split('/image-files/')[0] | ||
57 | else: | ||
58 | prefix = '%s: ' % self.path | ||
59 | else: | ||
60 | prefix = '' | ||
61 | |||
62 | def pkglist_combine(depver): | ||
63 | pkglist = [] | ||
64 | for k,v in depver.iteritems(): | ||
65 | if v: | ||
66 | pkglist.append("%s (%s)" % (k,v)) | ||
67 | else: | ||
68 | pkglist.append(k) | ||
69 | return pkglist | ||
70 | |||
71 | if self.fieldname in list_fields or self.fieldname in list_order_fields: | ||
72 | if self.fieldname in ['RPROVIDES', 'RDEPENDS', 'RRECOMMENDS', 'RSUGGESTS', 'RREPLACES', 'RCONFLICTS']: | ||
73 | (depvera, depverb) = compare_pkg_lists(self.oldvalue, self.newvalue) | ||
74 | aitems = pkglist_combine(depvera) | ||
75 | bitems = pkglist_combine(depverb) | ||
76 | else: | ||
77 | aitems = self.oldvalue.split() | ||
78 | bitems = self.newvalue.split() | ||
79 | removed = list(set(aitems) - set(bitems)) | ||
80 | added = list(set(bitems) - set(aitems)) | ||
81 | |||
82 | if removed or added: | ||
83 | if removed and not bitems: | ||
84 | out = '%s: removed all items "%s"' % (self.fieldname, ' '.join(removed)) | ||
85 | else: | ||
86 | out = '%s:%s%s' % (self.fieldname, ' removed "%s"' % ' '.join(removed) if removed else '', ' added "%s"' % ' '.join(added) if added else '') | ||
87 | else: | ||
88 | out = '%s changed order' % self.fieldname | ||
89 | elif self.fieldname in numeric_fields: | ||
90 | aval = int(self.oldvalue or 0) | ||
91 | bval = int(self.newvalue or 0) | ||
92 | if aval != 0: | ||
93 | percentchg = ((bval - aval) / float(aval)) * 100 | ||
94 | else: | ||
95 | percentchg = 100 | ||
96 | out = '%s changed from %s to %s (%s%d%%)' % (self.fieldname, self.oldvalue or "''", self.newvalue or "''", '+' if percentchg > 0 else '', percentchg) | ||
97 | elif self.fieldname in defaultval_fields: | ||
98 | out = '%s changed from %s to %s' % (self.fieldname, self.oldvalue, self.newvalue) | ||
99 | if self.fieldname == 'PKG' and '[default]' in self.newvalue: | ||
100 | out += ' - may indicate debian renaming failure' | ||
101 | elif self.fieldname in ['pkg_preinst', 'pkg_postinst', 'pkg_prerm', 'pkg_postrm']: | ||
102 | if self.oldvalue and self.newvalue: | ||
103 | out = '%s changed:\n ' % self.fieldname | ||
104 | elif self.newvalue: | ||
105 | out = '%s added:\n ' % self.fieldname | ||
106 | elif self.oldvalue: | ||
107 | out = '%s cleared:\n ' % self.fieldname | ||
108 | alines = self.oldvalue.splitlines() | ||
109 | blines = self.newvalue.splitlines() | ||
110 | diff = difflib.unified_diff(alines, blines, self.fieldname, self.fieldname, lineterm='') | ||
111 | out += '\n '.join(list(diff)[2:]) | ||
112 | out += '\n --' | ||
113 | elif self.fieldname in img_monitor_files or '/image-files/' in self.path: | ||
114 | fieldname = self.fieldname | ||
115 | if '/image-files/' in self.path: | ||
116 | fieldname = os.path.join('/' + self.path.split('/image-files/')[1], self.fieldname) | ||
117 | out = 'Changes to %s:\n ' % fieldname | ||
118 | else: | ||
119 | if outer: | ||
120 | prefix = 'Changes to %s ' % self.path | ||
121 | out = '(%s):\n ' % self.fieldname | ||
122 | if self.filechanges: | ||
123 | out += '\n '.join(['%s' % i for i in self.filechanges]) | ||
124 | else: | ||
125 | alines = self.oldvalue.splitlines() | ||
126 | blines = self.newvalue.splitlines() | ||
127 | diff = difflib.unified_diff(alines, blines, fieldname, fieldname, lineterm='') | ||
128 | out += '\n '.join(list(diff)) | ||
129 | out += '\n --' | ||
130 | else: | ||
131 | out = '%s changed from "%s" to "%s"' % (self.fieldname, self.oldvalue, self.newvalue) | ||
132 | |||
133 | if self.related: | ||
134 | for chg in self.related: | ||
135 | if not outer and chg.fieldname in ['PE', 'PV', 'PR']: | ||
136 | continue | ||
137 | for line in chg._str_internal(False).splitlines(): | ||
138 | out += '\n * %s' % line | ||
139 | |||
140 | return '%s%s' % (prefix, out) | ||
141 | |||
142 | class FileChange: | ||
143 | changetype_add = 'A' | ||
144 | changetype_remove = 'R' | ||
145 | changetype_type = 'T' | ||
146 | changetype_perms = 'P' | ||
147 | changetype_ownergroup = 'O' | ||
148 | changetype_link = 'L' | ||
149 | |||
150 | def __init__(self, path, changetype, oldvalue = None, newvalue = None): | ||
151 | self.path = path | ||
152 | self.changetype = changetype | ||
153 | self.oldvalue = oldvalue | ||
154 | self.newvalue = newvalue | ||
155 | |||
156 | def _ftype_str(self, ftype): | ||
157 | if ftype == '-': | ||
158 | return 'file' | ||
159 | elif ftype == 'd': | ||
160 | return 'directory' | ||
161 | elif ftype == 'l': | ||
162 | return 'symlink' | ||
163 | elif ftype == 'c': | ||
164 | return 'char device' | ||
165 | elif ftype == 'b': | ||
166 | return 'block device' | ||
167 | elif ftype == 'p': | ||
168 | return 'fifo' | ||
169 | elif ftype == 's': | ||
170 | return 'socket' | ||
171 | else: | ||
172 | return 'unknown (%s)' % ftype | ||
173 | |||
174 | def __str__(self): | ||
175 | if self.changetype == self.changetype_add: | ||
176 | return '%s was added' % self.path | ||
177 | elif self.changetype == self.changetype_remove: | ||
178 | return '%s was removed' % self.path | ||
179 | elif self.changetype == self.changetype_type: | ||
180 | return '%s changed type from %s to %s' % (self.path, self._ftype_str(self.oldvalue), self._ftype_str(self.newvalue)) | ||
181 | elif self.changetype == self.changetype_perms: | ||
182 | return '%s changed permissions from %s to %s' % (self.path, self.oldvalue, self.newvalue) | ||
183 | elif self.changetype == self.changetype_ownergroup: | ||
184 | return '%s changed owner/group from %s to %s' % (self.path, self.oldvalue, self.newvalue) | ||
185 | elif self.changetype == self.changetype_link: | ||
186 | return '%s changed symlink target from %s to %s' % (self.path, self.oldvalue, self.newvalue) | ||
187 | else: | ||
188 | return '%s changed (unknown)' % self.path | ||
189 | |||
190 | |||
191 | def blob_to_dict(blob): | ||
192 | alines = blob.data_stream.read().splitlines() | ||
193 | adict = {} | ||
194 | for line in alines: | ||
195 | splitv = [i.strip() for i in line.split('=',1)] | ||
196 | if len(splitv) > 1: | ||
197 | adict[splitv[0]] = splitv[1] | ||
198 | return adict | ||
199 | |||
200 | |||
201 | def file_list_to_dict(lines): | ||
202 | adict = {} | ||
203 | for line in lines: | ||
204 | # Leave the last few fields intact so we handle file names containing spaces | ||
205 | splitv = line.split(None,4) | ||
206 | # Grab the path and remove the leading . | ||
207 | path = splitv[4][1:].strip() | ||
208 | # Handle symlinks | ||
209 | if(' -> ' in path): | ||
210 | target = path.split(' -> ')[1] | ||
211 | path = path.split(' -> ')[0] | ||
212 | adict[path] = splitv[0:3] + [target] | ||
213 | else: | ||
214 | adict[path] = splitv[0:3] | ||
215 | return adict | ||
216 | |||
217 | |||
218 | def compare_file_lists(alines, blines): | ||
219 | adict = file_list_to_dict(alines) | ||
220 | bdict = file_list_to_dict(blines) | ||
221 | filechanges = [] | ||
222 | for path, splitv in adict.iteritems(): | ||
223 | newsplitv = bdict.pop(path, None) | ||
224 | if newsplitv: | ||
225 | # Check type | ||
226 | oldvalue = splitv[0][0] | ||
227 | newvalue = newsplitv[0][0] | ||
228 | if oldvalue != newvalue: | ||
229 | filechanges.append(FileChange(path, FileChange.changetype_type, oldvalue, newvalue)) | ||
230 | # Check permissions | ||
231 | oldvalue = splitv[0][1:] | ||
232 | newvalue = newsplitv[0][1:] | ||
233 | if oldvalue != newvalue: | ||
234 | filechanges.append(FileChange(path, FileChange.changetype_perms, oldvalue, newvalue)) | ||
235 | # Check owner/group | ||
236 | oldvalue = '%s/%s' % (splitv[1], splitv[2]) | ||
237 | newvalue = '%s/%s' % (newsplitv[1], newsplitv[2]) | ||
238 | if oldvalue != newvalue: | ||
239 | filechanges.append(FileChange(path, FileChange.changetype_ownergroup, oldvalue, newvalue)) | ||
240 | # Check symlink target | ||
241 | if newsplitv[0][0] == 'l': | ||
242 | if len(splitv) > 3: | ||
243 | oldvalue = splitv[3] | ||
244 | else: | ||
245 | oldvalue = None | ||
246 | newvalue = newsplitv[3] | ||
247 | if oldvalue != newvalue: | ||
248 | filechanges.append(FileChange(path, FileChange.changetype_link, oldvalue, newvalue)) | ||
249 | else: | ||
250 | filechanges.append(FileChange(path, FileChange.changetype_remove)) | ||
251 | |||
252 | # Whatever is left over has been added | ||
253 | for path in bdict: | ||
254 | filechanges.append(FileChange(path, FileChange.changetype_add)) | ||
255 | |||
256 | return filechanges | ||
257 | |||
258 | |||
259 | def compare_lists(alines, blines): | ||
260 | removed = list(set(alines) - set(blines)) | ||
261 | added = list(set(blines) - set(alines)) | ||
262 | |||
263 | filechanges = [] | ||
264 | for pkg in removed: | ||
265 | filechanges.append(FileChange(pkg, FileChange.changetype_remove)) | ||
266 | for pkg in added: | ||
267 | filechanges.append(FileChange(pkg, FileChange.changetype_add)) | ||
268 | |||
269 | return filechanges | ||
270 | |||
271 | |||
272 | def compare_pkg_lists(astr, bstr): | ||
273 | depvera = bb.utils.explode_dep_versions2(astr) | ||
274 | depverb = bb.utils.explode_dep_versions2(bstr) | ||
275 | |||
276 | # Strip out changes where the version has increased | ||
277 | remove = [] | ||
278 | for k in depvera: | ||
279 | if k in depverb: | ||
280 | dva = depvera[k] | ||
281 | dvb = depverb[k] | ||
282 | if dva and dvb and len(dva) == len(dvb): | ||
283 | # Since length is the same, sort so that prefixes (e.g. >=) will line up | ||
284 | dva.sort() | ||
285 | dvb.sort() | ||
286 | removeit = True | ||
287 | for dvai, dvbi in zip(dva, dvb): | ||
288 | if dvai != dvbi: | ||
289 | aiprefix = dvai.split(' ')[0] | ||
290 | biprefix = dvbi.split(' ')[0] | ||
291 | if aiprefix == biprefix and aiprefix in ['>=', '=']: | ||
292 | if bb.utils.vercmp(bb.utils.split_version(dvai), bb.utils.split_version(dvbi)) > 0: | ||
293 | removeit = False | ||
294 | break | ||
295 | else: | ||
296 | removeit = False | ||
297 | break | ||
298 | if removeit: | ||
299 | remove.append(k) | ||
300 | |||
301 | for k in remove: | ||
302 | depvera.pop(k) | ||
303 | depverb.pop(k) | ||
304 | |||
305 | return (depvera, depverb) | ||
306 | |||
307 | |||
308 | def compare_dict_blobs(path, ablob, bblob, report_all): | ||
309 | adict = blob_to_dict(ablob) | ||
310 | bdict = blob_to_dict(bblob) | ||
311 | |||
312 | pkgname = os.path.basename(path) | ||
313 | defaultvals = {} | ||
314 | defaultvals['PKG'] = pkgname | ||
315 | defaultvals['PKGE'] = adict.get('PE', '0') | ||
316 | defaultvals['PKGV'] = adict.get('PV', '') | ||
317 | defaultvals['PKGR'] = adict.get('PR', '') | ||
318 | for key in defaultvals: | ||
319 | defaultvals[key] = '%s [default]' % defaultvals[key] | ||
320 | |||
321 | changes = [] | ||
322 | keys = list(set(adict.keys()) | set(bdict.keys())) | ||
323 | for key in keys: | ||
324 | astr = adict.get(key, '') | ||
325 | bstr = bdict.get(key, '') | ||
326 | if astr != bstr: | ||
327 | if (not report_all) and key in numeric_fields: | ||
328 | aval = int(astr or 0) | ||
329 | bval = int(bstr or 0) | ||
330 | if aval != 0: | ||
331 | percentchg = ((bval - aval) / float(aval)) * 100 | ||
332 | else: | ||
333 | percentchg = 100 | ||
334 | if abs(percentchg) < monitor_numeric_threshold: | ||
335 | continue | ||
336 | elif (not report_all) and key in list_fields: | ||
337 | if key == "FILELIST" and path.endswith("-dbg") and bstr.strip() != '': | ||
338 | continue | ||
339 | if key in ['RPROVIDES', 'RDEPENDS', 'RRECOMMENDS', 'RSUGGESTS', 'RREPLACES', 'RCONFLICTS']: | ||
340 | (depvera, depverb) = compare_pkg_lists(astr, bstr) | ||
341 | if depvera == depverb: | ||
342 | continue | ||
343 | alist = astr.split() | ||
344 | alist.sort() | ||
345 | blist = bstr.split() | ||
346 | blist.sort() | ||
347 | # We don't care about the removal of self-dependencies | ||
348 | if pkgname in alist and not pkgname in blist: | ||
349 | alist.remove(pkgname) | ||
350 | if ' '.join(alist) == ' '.join(blist): | ||
351 | continue | ||
352 | |||
353 | if key in defaultval_fields: | ||
354 | if not astr: | ||
355 | astr = defaultvals[key] | ||
356 | elif not bstr: | ||
357 | bstr = defaultvals[key] | ||
358 | |||
359 | chg = ChangeRecord(path, key, astr, bstr, key in monitor_fields) | ||
360 | changes.append(chg) | ||
361 | return changes | ||
362 | |||
363 | |||
364 | def process_changes(repopath, revision1, revision2 = 'HEAD', report_all = False): | ||
365 | repo = git.Repo(repopath) | ||
366 | assert repo.bare == False | ||
367 | commit = repo.commit(revision1) | ||
368 | diff = commit.diff(revision2) | ||
369 | |||
370 | changes = [] | ||
371 | for d in diff.iter_change_type('M'): | ||
372 | path = os.path.dirname(d.a_blob.path) | ||
373 | if path.startswith('packages/'): | ||
374 | filename = os.path.basename(d.a_blob.path) | ||
375 | if filename == 'latest': | ||
376 | changes.extend(compare_dict_blobs(path, d.a_blob, d.b_blob, report_all)) | ||
377 | elif filename.startswith('latest.'): | ||
378 | chg = ChangeRecord(path, filename, d.a_blob.data_stream.read(), d.b_blob.data_stream.read(), True) | ||
379 | changes.append(chg) | ||
380 | elif path.startswith('images/'): | ||
381 | filename = os.path.basename(d.a_blob.path) | ||
382 | if filename in img_monitor_files: | ||
383 | if filename == 'files-in-image.txt': | ||
384 | alines = d.a_blob.data_stream.read().splitlines() | ||
385 | blines = d.b_blob.data_stream.read().splitlines() | ||
386 | filechanges = compare_file_lists(alines,blines) | ||
387 | if filechanges: | ||
388 | chg = ChangeRecord(path, filename, None, None, True) | ||
389 | chg.filechanges = filechanges | ||
390 | changes.append(chg) | ||
391 | elif filename == 'installed-package-names.txt': | ||
392 | alines = d.a_blob.data_stream.read().splitlines() | ||
393 | blines = d.b_blob.data_stream.read().splitlines() | ||
394 | filechanges = compare_lists(alines,blines) | ||
395 | if filechanges: | ||
396 | chg = ChangeRecord(path, filename, None, None, True) | ||
397 | chg.filechanges = filechanges | ||
398 | changes.append(chg) | ||
399 | else: | ||
400 | chg = ChangeRecord(path, filename, d.a_blob.data_stream.read(), d.b_blob.data_stream.read(), True) | ||
401 | changes.append(chg) | ||
402 | elif filename == 'image-info.txt': | ||
403 | changes.extend(compare_dict_blobs(path, d.a_blob, d.b_blob, report_all)) | ||
404 | elif '/image-files/' in path: | ||
405 | chg = ChangeRecord(path, filename, d.a_blob.data_stream.read(), d.b_blob.data_stream.read(), True) | ||
406 | changes.append(chg) | ||
407 | |||
408 | # Look for added preinst/postinst/prerm/postrm | ||
409 | # (without reporting newly added recipes) | ||
410 | addedpkgs = [] | ||
411 | addedchanges = [] | ||
412 | for d in diff.iter_change_type('A'): | ||
413 | path = os.path.dirname(d.b_blob.path) | ||
414 | if path.startswith('packages/'): | ||
415 | filename = os.path.basename(d.b_blob.path) | ||
416 | if filename == 'latest': | ||
417 | addedpkgs.append(path) | ||
418 | elif filename.startswith('latest.'): | ||
419 | chg = ChangeRecord(path, filename[7:], '', d.b_blob.data_stream.read(), True) | ||
420 | addedchanges.append(chg) | ||
421 | for chg in addedchanges: | ||
422 | found = False | ||
423 | for pkg in addedpkgs: | ||
424 | if chg.path.startswith(pkg): | ||
425 | found = True | ||
426 | break | ||
427 | if not found: | ||
428 | changes.append(chg) | ||
429 | |||
430 | # Look for cleared preinst/postinst/prerm/postrm | ||
431 | for d in diff.iter_change_type('D'): | ||
432 | path = os.path.dirname(d.a_blob.path) | ||
433 | if path.startswith('packages/'): | ||
434 | filename = os.path.basename(d.a_blob.path) | ||
435 | if filename != 'latest' and filename.startswith('latest.'): | ||
436 | chg = ChangeRecord(path, filename[7:], d.a_blob.data_stream.read(), '', True) | ||
437 | changes.append(chg) | ||
438 | |||
439 | # Link related changes | ||
440 | for chg in changes: | ||
441 | if chg.monitored: | ||
442 | for chg2 in changes: | ||
443 | # (Check dirname in the case of fields from recipe info files) | ||
444 | if chg.path == chg2.path or os.path.dirname(chg.path) == chg2.path: | ||
445 | if chg2.fieldname in related_fields.get(chg.fieldname, []): | ||
446 | chg.related.append(chg2) | ||
447 | elif chg.path == chg2.path and chg.path.startswith('packages/') and chg2.fieldname in ['PE', 'PV', 'PR']: | ||
448 | chg.related.append(chg2) | ||
449 | |||
450 | if report_all: | ||
451 | return changes | ||
452 | else: | ||
453 | return [chg for chg in changes if chg.monitored] | ||