summaryrefslogtreecommitdiffstats
path: root/scripts
diff options
context:
space:
mode:
authorPaul Eggleton <paul.eggleton@linux.intel.com>2016-03-09 17:48:51 +1300
committerRichard Purdie <richard.purdie@linuxfoundation.org>2016-03-09 17:00:29 +0000
commitd46827cfd322554b57c3fc774b4d914f6e449d75 (patch)
treec91dc554f2ada1091fa64e2a136edbc5bb23db9a /scripts
parent3fd244b94f6eee7fc05799b64613e3fe68d2e8f4 (diff)
downloadpoky-d46827cfd322554b57c3fc774b4d914f6e449d75.tar.gz
recipetool: create: add license file crunching
Matching license texts directly to md5sums only goes so far. Some licenses make the copyright statement an intrinsic part of the license statement (e.g. MIT) which of course varies between projects. Also, people often seem to take standard license texts such as GPLv2 and reformat them cosmetically - re-wrapping lines at a different width or changing quoting styles are seemingly popular examples. In order to match license files to their actual licenses more effectively, "crunch" out these elements before comparing to an md5sum. (The existing plain md5sum matching has been left in since it's a shortcut, and our list of crunched md5sums isn't a complete replacement for it.) As always, this code isn't providing any guarantees (legal or otherwise) that it will always get the license correct - as indicated by the accompanying comments the LICENSE values it writes out to the recipe are indicative and you should verify them yourself by looking at the documentation supplied from upstream for the software being built if you have any concerns. (From OE-Core rev: 553bb4ea5d51be5179e7d8c019740cf61ece76ea) Signed-off-by: Paul Eggleton <paul.eggleton@linux.intel.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'scripts')
-rw-r--r--scripts/lib/recipetool/create.py77
1 files changed, 76 insertions, 1 deletions
diff --git a/scripts/lib/recipetool/create.py b/scripts/lib/recipetool/create.py
index def2eea9fa..718f2aaf5b 100644
--- a/scripts/lib/recipetool/create.py
+++ b/scripts/lib/recipetool/create.py
@@ -25,6 +25,7 @@ import json
25import logging 25import logging
26import scriptutils 26import scriptutils
27import urlparse 27import urlparse
28import hashlib
28 29
29logger = logging.getLogger('recipetool') 30logger = logging.getLogger('recipetool')
30 31
@@ -717,6 +718,76 @@ def get_license_md5sums(d, static_only=False):
717 md5sums['54c7042be62e169199200bc6477f04d1'] = 'BSD-3-Clause' 718 md5sums['54c7042be62e169199200bc6477f04d1'] = 'BSD-3-Clause'
718 return md5sums 719 return md5sums
719 720
721def crunch_license(licfile):
722 '''
723 Remove non-material text from a license file and then check
724 its md5sum against a known list. This works well for licenses
725 which contain a copyright statement, but is also a useful way
726 to handle people's insistence upon reformatting the license text
727 slightly (with no material difference to the text of the
728 license).
729 '''
730
731 import oe.utils
732
733 # Note: these are carefully constructed!
734 license_title_re = re.compile('^\(?(#+ *)?(The )?.{1,10} [Ll]icen[sc]e( \(.{1,10}\))?\)?:?$')
735 license_statement_re = re.compile('^This (project|software) is( free software)? released under the .{1,10} [Ll]icen[sc]e:?$')
736 copyright_re = re.compile('^(#+)? *Copyright .*$')
737
738 crunched_md5sums = {}
739 # The following two were gleaned from the "forever" npm package
740 crunched_md5sums['0a97f8e4cbaf889d6fa51f84b89a79f6'] = 'ISC'
741 crunched_md5sums['eecf6429523cbc9693547cf2db790b5c'] = 'MIT'
742 # https://github.com/vasi/pixz/blob/master/LICENSE
743 crunched_md5sums['2f03392b40bbe663597b5bd3cc5ebdb9'] = 'BSD-2-Clause'
744 # https://github.com/waffle-gl/waffle/blob/master/LICENSE.txt
745 crunched_md5sums['e72e5dfef0b1a4ca8a3d26a60587db66'] = 'BSD-2-Clause'
746 # https://github.com/spigwitmer/fakeds1963s/blob/master/LICENSE
747 crunched_md5sums['8be76ac6d191671f347ee4916baa637e'] = 'GPLv2'
748 # https://github.com/datto/dattobd/blob/master/COPYING
749 # http://git.savannah.gnu.org/cgit/freetype/freetype2.git/tree/docs/GPLv2.TXT
750 crunched_md5sums['1d65c5ad4bf6489f85f4812bf08ae73d'] = 'GPLv2'
751 # http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt
752 # http://git.neil.brown.name/?p=mdadm.git;a=blob;f=COPYING;h=d159169d1050894d3ea3b98e1c965c4058208fe1;hb=HEAD
753 crunched_md5sums['fb530f66a7a89ce920f0e912b5b66d4b'] = 'GPLv2'
754 # https://github.com/gkos/nrf24/blob/master/COPYING
755 crunched_md5sums['7b6aaa4daeafdfa6ed5443fd2684581b'] = 'GPLv2'
756 # https://github.com/josch09/resetusb/blob/master/COPYING
757 crunched_md5sums['8b8ac1d631a4d220342e83bcf1a1fbc3'] = 'GPLv3'
758 # https://github.com/FFmpeg/FFmpeg/blob/master/COPYING.LGPLv2.1
759 crunched_md5sums['2ea316ed973ae176e502e2297b574bb3'] = 'LGPLv2.1'
760 # unixODBC-2.3.4 COPYING
761 crunched_md5sums['1daebd9491d1e8426900b4fa5a422814'] = 'LGPLv2.1'
762 # https://github.com/FFmpeg/FFmpeg/blob/master/COPYING.LGPLv3
763 crunched_md5sums['2ebfb3bb49b9a48a075cc1425e7f4129'] = 'LGPLv3'
764 lictext = []
765 with open(licfile, 'r') as f:
766 for line in f:
767 # Drop opening statements
768 if copyright_re.match(line):
769 continue
770 elif license_title_re.match(line):
771 continue
772 elif license_statement_re.match(line):
773 continue
774 # Squash spaces, and replace smart quotes, double quotes
775 # and backticks with single quotes
776 line = oe.utils.squashspaces(line.strip()).decode("utf-8")
777 line = line.replace(u"\u2018", "'").replace(u"\u2019", "'").replace(u"\u201c","'").replace(u"\u201d", "'").replace('"', '\'').replace('`', '\'')
778 if line:
779 lictext.append(line)
780
781 m = hashlib.md5()
782 try:
783 m.update(' '.join(lictext))
784 md5val = m.hexdigest()
785 except UnicodeEncodeError:
786 md5val = None
787 lictext = ''
788 license = crunched_md5sums.get(md5val, None)
789 return license, md5val, lictext
790
720def guess_license(srctree): 791def guess_license(srctree):
721 import bb 792 import bb
722 md5sums = get_license_md5sums(tinfoil.config_data) 793 md5sums = get_license_md5sums(tinfoil.config_data)
@@ -733,7 +804,11 @@ def guess_license(srctree):
733 licfiles.append(fullpath) 804 licfiles.append(fullpath)
734 for licfile in licfiles: 805 for licfile in licfiles:
735 md5value = bb.utils.md5_file(licfile) 806 md5value = bb.utils.md5_file(licfile)
736 license = md5sums.get(md5value, 'Unknown') 807 license = md5sums.get(md5value, None)
808 if not license:
809 license, crunched_md5, lictext = crunch_license(licfile)
810 if not license:
811 license = 'Unknown'
737 licenses.append((license, os.path.relpath(licfile, srctree), md5value)) 812 licenses.append((license, os.path.relpath(licfile, srctree), md5value))
738 813
739 # FIXME should we grab at least one source file with a license header and add that too? 814 # FIXME should we grab at least one source file with a license header and add that too?