scripts/contrib/patchreview: add new script

This script analyses the patches we apply and can sanity check or output statistics. (From OE-Core rev: de7914954571ea8e717f56b6d6df13157b0973bc) Signed-off-by: Ross Burton <ross.burton@intel.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
author: Ross Burton <ross.burton@intel.com> 2017-06-29 12:43:10 +0100
committer: Richard Purdie <richard.purdie@linuxfoundation.org> 2017-06-29 13:01:38 +0100
commit: 027bdb056022612761fc833952deaff9989bb5fa (patch)
tree: a98f2e9f6d0d84c65b582b7bbab39513328ffb82 /scripts
parent: abc7131777e8ea8e6aacc5118bbb5c24883c0ead (diff)
download: poky-027bdb056022612761fc833952deaff9989bb5fa.tar.gz
1 files changed, 211 insertions, 0 deletions
diff --git a/scripts/contrib/patchreview.py b/scripts/contrib/patchreview.py
new file mode 100755
index 0000000000..4e3e73c7a8
--- /dev/null
+++ b/scripts/contrib/patchreview.py
@@ -0,0 +1,211 @@
+#! /usr/bin/env python3
+# TODO
+# - option to just list all broken files
+# - test suite
+# - validate signed-off-by
+class Result:
+    # Whether the patch has an Upstream-Status or not
+    missing_upstream_status = False
+    # If the Upstream-Status tag is malformed in some way (string for bad bit)
+    malformed_upstream_status = None
+    # If the Upstream-Status value is unknown (boolean)
+    unknown_upstream_status = False
+    # The upstream status value (Pending, etc)
+    upstream_status = None
+    # Whether the patch has a Signed-off-by or not
+    missing_sob = False
+    # Whether the Signed-off-by tag is malformed in some way
+    malformed_sob = False
+    # The Signed-off-by tag value
+    sob = None
+    # Whether a patch looks like a CVE but doesn't have a CVE tag
+    missing_cve = False
+def blame_patch(patch):
+    """
+    From a patch filename, return a list of "commit summary (author name <author
+    email>)" strings representing the history.
+    """
+    import subprocess
+    return subprocess.check_output(("git", "log",
+                                    "--follow", "--find-renames", "--diff-filter=A",
+                                    "--format=%s (%aN <%aE>)",
+                                    "--", patch)).decode("utf-8").splitlines()
+def patchreview(patches):
+    import re
+    # General pattern: start of line, optional whitespace, tag with optional
+    # hyphen or spaces, maybe a colon, some whitespace, then the value, all case
+    # insensitive.
+    sob_re = re.compile(r"^[\t ]*(Signed[-_ ]off[-_ ]by:?)[\t ]*(.+)", re.IGNORECASE | re.MULTILINE)
+    status_re = re.compile(r"^[\t ]*(Upstream[-_ ]Status:?)[\t ]*(\w*)", re.IGNORECASE | re.MULTILINE)
+    status_values = ("accepted", "pending", "inappropriate", "backport", "submitted", "denied")
+    cve_tag_re = re.compile(r"^[\t ]*(CVE:)[\t ]*(.*)", re.IGNORECASE | re.MULTILINE)
+    cve_re = re.compile(r"cve-[0-9]{4}-[0-9]{4,6}", re.IGNORECASE)
+    results = {}
+    for patch in patches:
+        result = Result()
+        results[patch] = result
+        content = open(patch, encoding='ascii', errors='ignore').read()
+        # Find the Signed-off-by tag
+        match = sob_re.search(content)
+        if match:
+            value = match.group(1)
+            if value != "Signed-off-by:":
+                result.malformed_sob = value
+            result.sob = match.group(2)
+        else:
+            result.missing_sob = True
+        # Find the Upstream-Status tag
+        match = status_re.search(content)
+        if match:
+            value = match.group(1)
+            if value != "Upstream-Status:":
+                result.malformed_upstream_status = value
+            value = match.group(2).lower()
+            # TODO: check case
+            if value not in status_values:
+                result.unknown_upstream_status = True
+            result.upstream_status = value
+        else:
+            result.missing_upstream_status = True
+        # Check that patches which looks like CVEs have CVE tags
+        if cve_re.search(patch) or cve_re.search(content):
+            if not cve_tag_re.search(content):
+                result.missing_cve = True
+        # TODO: extract CVE list
+    return results
+def analyse(results, want_blame=False, verbose=True):
+    """
+    want_blame: display blame data for each malformed patch
+    verbose: display per-file results instead of just summary
+    """
+    # want_blame requires verbose, so disable blame if we're not verbose
+    if want_blame and not verbose:
+        want_blame = False
+    total_patches = 0
+    missing_sob = 0
+    malformed_sob = 0
+    missing_status = 0
+    malformed_status = 0
+    missing_cve = 0
+    pending_patches = 0
+    for patch in sorted(results):
+        r = results[patch]
+        total_patches += 1
+        need_blame = False
+        # Build statistics
+        if r.missing_sob:
+            missing_sob += 1
+        if r.malformed_sob:
+            malformed_sob += 1
+        if r.missing_upstream_status:
+            missing_status += 1
+        if r.malformed_upstream_status or r.unknown_upstream_status:
+            malformed_status += 1
+        if r.missing_cve:
+            missing_cve += 1
+        if r.upstream_status == "pending":
+            pending_patches += 1
+        # Output warnings
+        if r.missing_sob:
+            need_blame = True
+            if verbose:
+                print("Missing Signed-off-by tag (%s)" % patch)
+        # TODO: disable this for now as too much fails
+        if False and r.malformed_sob:
+            need_blame = True
+            if verbose:
+                print("Malformed Signed-off-by '%s' (%s)" % (r.malformed_sob, patch))
+        if r.missing_cve:
+            need_blame = True
+            if verbose:
+                print("Missing CVE tag (%s)" % patch)
+        if r.missing_upstream_status:
+            need_blame = True
+            if verbose:
+                print("Missing Upstream-Status tag (%s)" % patch)
+        if r.malformed_upstream_status:
+            need_blame = True
+            if verbose:
+                print("Malformed Upstream-Status '%s' (%s)" % (r.malformed_upstream_status, patch))
+        if r.unknown_upstream_status:
+            need_blame = True
+            if verbose:
+                print("Unknown Upstream-Status value '%s' (%s)" % (r.upstream_status, patch))
+        if want_blame and need_blame:
+            print("\n".join(blame_patch(patch)) + "\n")
+    def percent(num):
+        try:
+            return "%d (%d%%)" % (num, round(num * 100.0 / total_patches))
+        except ZeroDivisionError:
+            return "N/A"
+    if verbose:
+        print()
+    print("""Total patches found: %d
+Patches missing Signed-off-by: %s
+Patches with malformed Signed-off-by: %s
+Patches missing CVE: %s
+Patches missing Upstream-Status: %s
+Patches with malformed Upstream-Status: %s
+Patches in Pending state: %s""" % (total_patches,
+                                   percent(missing_sob),
+                                   percent(malformed_sob),
+                                   percent(missing_cve),
+                                   percent(missing_status),
+                                   percent(malformed_status),
+                                   percent(pending_patches)))
+def histogram(results):
+    from toolz import recipes, dicttoolz
+    import math
+    counts = recipes.countby(lambda r: r.upstream_status, results.values())
+    bars = dicttoolz.valmap(lambda v: "#" * int(math.ceil(float(v) / len(results) * 100)), counts)
+    for k in bars:
+        print("%-20s %s (%d)" % (k.capitalize() if k else "No status", bars[k], counts[k]))
+if __name__ == "__main__":
+    import argparse, subprocess, os
+    args = argparse.ArgumentParser(description="Patch Review Tool")
+    args.add_argument("-b", "--blame", action="store_true", help="show blame for malformed patches")
+    args.add_argument("-v", "--verbose", action="store_true", help="show per-patch results")
+    args.add_argument("-g", "--histogram", action="store_true", help="show patch histogram")
+    args.add_argument("directory", nargs="?", help="directory to scan")
+    args = args.parse_args()
+    if args.directory:
+        os.chdir(args.directory)
+    patches = subprocess.check_output(("git", "ls-files", "*.patch", "*.diff")).decode("utf-8").split()
+    results = patchreview(patches)
+    analyse(results, want_blame=args.blame, verbose=args.verbose)
+    if args.histogram:
+        print()
+        histogram(results)
author	Ross Burton <ross.burton@intel.com>	2017-06-29 12:43:10 +0100
committer	Richard Purdie <richard.purdie@linuxfoundation.org>	2017-06-29 13:01:38 +0100
commit	027bdb056022612761fc833952deaff9989bb5fa (patch)
tree	a98f2e9f6d0d84c65b582b7bbab39513328ffb82 /scripts
parent	abc7131777e8ea8e6aacc5118bbb5c24883c0ead (diff)
download	poky-027bdb056022612761fc833952deaff9989bb5fa.tar.gz

diff --git a/scripts/contrib/patchreview.py b/scripts/contrib/patchreview.py new file mode 100755 index 0000000000..4e3e73c7a8 --- /dev/null +++ b/scripts/contrib/patchreview.py
@@ -0,0 +1,211 @@
	1	#! /usr/bin/env python3
	2
	3	# TODO
	4	# - option to just list all broken files
	5	# - test suite
	6	# - validate signed-off-by
	7
	8
	9	class Result:
	10	# Whether the patch has an Upstream-Status or not
	11	missing_upstream_status = False
	12	# If the Upstream-Status tag is malformed in some way (string for bad bit)
	13	malformed_upstream_status = None
	14	# If the Upstream-Status value is unknown (boolean)
	15	unknown_upstream_status = False
	16	# The upstream status value (Pending, etc)
	17	upstream_status = None
	18	# Whether the patch has a Signed-off-by or not
	19	missing_sob = False
	20	# Whether the Signed-off-by tag is malformed in some way
	21	malformed_sob = False
	22	# The Signed-off-by tag value
	23	sob = None
	24	# Whether a patch looks like a CVE but doesn't have a CVE tag
	25	missing_cve = False
	26
	27	def blame_patch(patch):
	28	"""
	29	From a patch filename, return a list of "commit summary (author name <author
	30	email>)" strings representing the history.
	31	"""
	32	import subprocess
	33	return subprocess.check_output(("git", "log",
	34	"--follow", "--find-renames", "--diff-filter=A",
	35	"--format=%s (%aN <%aE>)",
	36	"--", patch)).decode("utf-8").splitlines()
	37
	38	def patchreview(patches):
	39	import re
	40
	41	# General pattern: start of line, optional whitespace, tag with optional
	42	# hyphen or spaces, maybe a colon, some whitespace, then the value, all case
	43	# insensitive.
	44	sob_re = re.compile(r"^[\t ](Signed[-_ ]off[-_ ]by:?)[\t ](.+)", re.IGNORECASE \| re.MULTILINE)
	45	status_re = re.compile(r"^[\t ](Upstream[-_ ]Status:?)[\t ](\w*)", re.IGNORECASE \| re.MULTILINE)
	46	status_values = ("accepted", "pending", "inappropriate", "backport", "submitted", "denied")
	47	cve_tag_re = re.compile(r"^[\t ](CVE:)[\t ](.*)", re.IGNORECASE \| re.MULTILINE)
	48	cve_re = re.compile(r"cve-[0-9]{4}-[0-9]{4,6}", re.IGNORECASE)
	49
	50	results = {}
	51
	52	for patch in patches:
	53	result = Result()
	54	results[patch] = result
	55
	56	content = open(patch, encoding='ascii', errors='ignore').read()
	57
	58	# Find the Signed-off-by tag
	59	match = sob_re.search(content)
	60	if match:
	61	value = match.group(1)
	62	if value != "Signed-off-by:":
	63	result.malformed_sob = value
	64	result.sob = match.group(2)
	65	else:
	66	result.missing_sob = True
	67
	68
	69	# Find the Upstream-Status tag
	70	match = status_re.search(content)
	71	if match:
	72	value = match.group(1)
	73	if value != "Upstream-Status:":
	74	result.malformed_upstream_status = value
	75
	76	value = match.group(2).lower()
	77	# TODO: check case
	78	if value not in status_values:
	79	result.unknown_upstream_status = True
	80	result.upstream_status = value
	81	else:
	82	result.missing_upstream_status = True
	83
	84	# Check that patches which looks like CVEs have CVE tags
	85	if cve_re.search(patch) or cve_re.search(content):
	86	if not cve_tag_re.search(content):
	87	result.missing_cve = True
	88	# TODO: extract CVE list
	89
	90	return results
	91
	92
	93	def analyse(results, want_blame=False, verbose=True):
	94	"""
	95	want_blame: display blame data for each malformed patch
	96	verbose: display per-file results instead of just summary
	97	"""
	98
	99	# want_blame requires verbose, so disable blame if we're not verbose
	100	if want_blame and not verbose:
	101	want_blame = False
	102
	103	total_patches = 0
	104	missing_sob = 0
	105	malformed_sob = 0
	106	missing_status = 0
	107	malformed_status = 0
	108	missing_cve = 0
	109	pending_patches = 0
	110
	111	for patch in sorted(results):
	112	r = results[patch]
	113	total_patches += 1
	114	need_blame = False
	115
	116	# Build statistics
	117	if r.missing_sob:
	118	missing_sob += 1
	119	if r.malformed_sob:
	120	malformed_sob += 1
	121	if r.missing_upstream_status:
	122	missing_status += 1
	123	if r.malformed_upstream_status or r.unknown_upstream_status:
	124	malformed_status += 1
	125	if r.missing_cve:
	126	missing_cve += 1
	127	if r.upstream_status == "pending":
	128	pending_patches += 1
	129
	130	# Output warnings
	131	if r.missing_sob:
	132	need_blame = True
	133	if verbose:
	134	print("Missing Signed-off-by tag (%s)" % patch)
	135	# TODO: disable this for now as too much fails
	136	if False and r.malformed_sob:
	137	need_blame = True
	138	if verbose:
	139	print("Malformed Signed-off-by '%s' (%s)" % (r.malformed_sob, patch))
	140	if r.missing_cve:
	141	need_blame = True
	142	if verbose:
	143	print("Missing CVE tag (%s)" % patch)
	144	if r.missing_upstream_status:
	145	need_blame = True
	146	if verbose:
	147	print("Missing Upstream-Status tag (%s)" % patch)
	148	if r.malformed_upstream_status:
	149	need_blame = True
	150	if verbose:
	151	print("Malformed Upstream-Status '%s' (%s)" % (r.malformed_upstream_status, patch))
	152	if r.unknown_upstream_status:
	153	need_blame = True
	154	if verbose:
	155	print("Unknown Upstream-Status value '%s' (%s)" % (r.upstream_status, patch))
	156
	157	if want_blame and need_blame:
	158	print("\n".join(blame_patch(patch)) + "\n")
	159
	160	def percent(num):
	161	try:
	162	return "%d (%d%%)" % (num, round(num * 100.0 / total_patches))
	163	except ZeroDivisionError:
	164	return "N/A"
	165
	166	if verbose:
	167	print()
	168
	169	print("""Total patches found: %d
	170	Patches missing Signed-off-by: %s
	171	Patches with malformed Signed-off-by: %s
	172	Patches missing CVE: %s
	173	Patches missing Upstream-Status: %s
	174	Patches with malformed Upstream-Status: %s
	175	Patches in Pending state: %s""" % (total_patches,
	176	percent(missing_sob),
	177	percent(malformed_sob),
	178	percent(missing_cve),
	179	percent(missing_status),
	180	percent(malformed_status),
	181	percent(pending_patches)))
	182
	183
	184
	185	def histogram(results):
	186	from toolz import recipes, dicttoolz
	187	import math
	188	counts = recipes.countby(lambda r: r.upstream_status, results.values())
	189	bars = dicttoolz.valmap(lambda v: "#" * int(math.ceil(float(v) / len(results) * 100)), counts)
	190	for k in bars:
	191	print("%-20s %s (%d)" % (k.capitalize() if k else "No status", bars[k], counts[k]))
	192
	193
	194	if __name__ == "__main__":
	195	import argparse, subprocess, os
	196
	197	args = argparse.ArgumentParser(description="Patch Review Tool")
	198	args.add_argument("-b", "--blame", action="store_true", help="show blame for malformed patches")
	199	args.add_argument("-v", "--verbose", action="store_true", help="show per-patch results")
	200	args.add_argument("-g", "--histogram", action="store_true", help="show patch histogram")
	201	args.add_argument("directory", nargs="?", help="directory to scan")
	202	args = args.parse_args()
	203
	204	if args.directory:
	205	os.chdir(args.directory)
	206	patches = subprocess.check_output(("git", "ls-files", ".patch", ".diff")).decode("utf-8").split()
	207	results = patchreview(patches)
	208	analyse(results, want_blame=args.blame, verbose=args.verbose)
	209	if args.histogram:
	210	print()
	211	histogram(results)