summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--scripts/lib/recipetool/create.py77
1 files changed, 76 insertions, 1 deletions
diff --git a/scripts/lib/recipetool/create.py b/scripts/lib/recipetool/create.py
index def2eea9fa..718f2aaf5b 100644
--- a/scripts/lib/recipetool/create.py
+++ b/scripts/lib/recipetool/create.py
@@ -25,6 +25,7 @@ import json
25import logging 25import logging
26import scriptutils 26import scriptutils
27import urlparse 27import urlparse
28import hashlib
28 29
29logger = logging.getLogger('recipetool') 30logger = logging.getLogger('recipetool')
30 31
@@ -717,6 +718,76 @@ def get_license_md5sums(d, static_only=False):
717 md5sums['54c7042be62e169199200bc6477f04d1'] = 'BSD-3-Clause' 718 md5sums['54c7042be62e169199200bc6477f04d1'] = 'BSD-3-Clause'
718 return md5sums 719 return md5sums
719 720
721def crunch_license(licfile):
722 '''
723 Remove non-material text from a license file and then check
724 its md5sum against a known list. This works well for licenses
725 which contain a copyright statement, but is also a useful way
726 to handle people's insistence upon reformatting the license text
727 slightly (with no material difference to the text of the
728 license).
729 '''
730
731 import oe.utils
732
733 # Note: these are carefully constructed!
734 license_title_re = re.compile('^\(?(#+ *)?(The )?.{1,10} [Ll]icen[sc]e( \(.{1,10}\))?\)?:?$')
735 license_statement_re = re.compile('^This (project|software) is( free software)? released under the .{1,10} [Ll]icen[sc]e:?$')
736 copyright_re = re.compile('^(#+)? *Copyright .*$')
737
738 crunched_md5sums = {}
739 # The following two were gleaned from the "forever" npm package
740 crunched_md5sums['0a97f8e4cbaf889d6fa51f84b89a79f6'] = 'ISC'
741 crunched_md5sums['eecf6429523cbc9693547cf2db790b5c'] = 'MIT'
742 # https://github.com/vasi/pixz/blob/master/LICENSE
743 crunched_md5sums['2f03392b40bbe663597b5bd3cc5ebdb9'] = 'BSD-2-Clause'
744 # https://github.com/waffle-gl/waffle/blob/master/LICENSE.txt
745 crunched_md5sums['e72e5dfef0b1a4ca8a3d26a60587db66'] = 'BSD-2-Clause'
746 # https://github.com/spigwitmer/fakeds1963s/blob/master/LICENSE
747 crunched_md5sums['8be76ac6d191671f347ee4916baa637e'] = 'GPLv2'
748 # https://github.com/datto/dattobd/blob/master/COPYING
749 # http://git.savannah.gnu.org/cgit/freetype/freetype2.git/tree/docs/GPLv2.TXT
750 crunched_md5sums['1d65c5ad4bf6489f85f4812bf08ae73d'] = 'GPLv2'
751 # http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt
752 # http://git.neil.brown.name/?p=mdadm.git;a=blob;f=COPYING;h=d159169d1050894d3ea3b98e1c965c4058208fe1;hb=HEAD
753 crunched_md5sums['fb530f66a7a89ce920f0e912b5b66d4b'] = 'GPLv2'
754 # https://github.com/gkos/nrf24/blob/master/COPYING
755 crunched_md5sums['7b6aaa4daeafdfa6ed5443fd2684581b'] = 'GPLv2'
756 # https://github.com/josch09/resetusb/blob/master/COPYING
757 crunched_md5sums['8b8ac1d631a4d220342e83bcf1a1fbc3'] = 'GPLv3'
758 # https://github.com/FFmpeg/FFmpeg/blob/master/COPYING.LGPLv2.1
759 crunched_md5sums['2ea316ed973ae176e502e2297b574bb3'] = 'LGPLv2.1'
760 # unixODBC-2.3.4 COPYING
761 crunched_md5sums['1daebd9491d1e8426900b4fa5a422814'] = 'LGPLv2.1'
762 # https://github.com/FFmpeg/FFmpeg/blob/master/COPYING.LGPLv3
763 crunched_md5sums['2ebfb3bb49b9a48a075cc1425e7f4129'] = 'LGPLv3'
764 lictext = []
765 with open(licfile, 'r') as f:
766 for line in f:
767 # Drop opening statements
768 if copyright_re.match(line):
769 continue
770 elif license_title_re.match(line):
771 continue
772 elif license_statement_re.match(line):
773 continue
774 # Squash spaces, and replace smart quotes, double quotes
775 # and backticks with single quotes
776 line = oe.utils.squashspaces(line.strip()).decode("utf-8")
777 line = line.replace(u"\u2018", "'").replace(u"\u2019", "'").replace(u"\u201c","'").replace(u"\u201d", "'").replace('"', '\'').replace('`', '\'')
778 if line:
779 lictext.append(line)
780
781 m = hashlib.md5()
782 try:
783 m.update(' '.join(lictext))
784 md5val = m.hexdigest()
785 except UnicodeEncodeError:
786 md5val = None
787 lictext = ''
788 license = crunched_md5sums.get(md5val, None)
789 return license, md5val, lictext
790
720def guess_license(srctree): 791def guess_license(srctree):
721 import bb 792 import bb
722 md5sums = get_license_md5sums(tinfoil.config_data) 793 md5sums = get_license_md5sums(tinfoil.config_data)
@@ -733,7 +804,11 @@ def guess_license(srctree):
733 licfiles.append(fullpath) 804 licfiles.append(fullpath)
734 for licfile in licfiles: 805 for licfile in licfiles:
735 md5value = bb.utils.md5_file(licfile) 806 md5value = bb.utils.md5_file(licfile)
736 license = md5sums.get(md5value, 'Unknown') 807 license = md5sums.get(md5value, None)
808 if not license:
809 license, crunched_md5, lictext = crunch_license(licfile)
810 if not license:
811 license = 'Unknown'
737 licenses.append((license, os.path.relpath(licfile, srctree), md5value)) 812 licenses.append((license, os.path.relpath(licfile, srctree), md5value))
738 813
739 # FIXME should we grab at least one source file with a license header and add that too? 814 # FIXME should we grab at least one source file with a license header and add that too?