summaryrefslogtreecommitdiffstats
path: root/scripts/lib
diff options
context:
space:
mode:
authorPeter Kjellerstedt <peter.kjellerstedt@axis.com>2023-12-06 21:55:28 +0100
committerRichard Purdie <richard.purdie@linuxfoundation.org>2023-12-08 16:58:34 +0000
commitdecf6e66dfe3d688c93b3ff811077c46cb9883bb (patch)
treea20227da6e38cb96e45293a92d573359d20fa5f6 /scripts/lib
parent3ef9ea88f15c4d53ef2229e84e7db81a052e6946 (diff)
downloadpoky-decf6e66dfe3d688c93b3ff811077c46cb9883bb.tar.gz
recipetool: create: Improve identification of licenses
Rather than having a static list of crunched MD5 checksums for some of the most common licenses, calculate it for all common licenses. This should improve the identification of license text variantions. (From OE-Core rev: 377f9513dc56e9b8e5f5813c1535be0206756949) Signed-off-by: Peter Kjellerstedt <peter.kjellerstedt@axis.com> Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'scripts/lib')
-rw-r--r--scripts/lib/recipetool/create.py91
1 files changed, 45 insertions, 46 deletions
diff --git a/scripts/lib/recipetool/create.py b/scripts/lib/recipetool/create.py
index 963aa91421..6e15326c55 100644
--- a/scripts/lib/recipetool/create.py
+++ b/scripts/lib/recipetool/create.py
@@ -1071,54 +1071,18 @@ def get_license_md5sums(d, static_only=False, linenumbers=False):
1071 1071
1072 return md5sums 1072 return md5sums
1073 1073
1074def crunch_license(licfile): 1074def crunch_known_licenses(d):
1075 ''' 1075 '''
1076 Remove non-material text from a license file and then check 1076 Calculate the MD5 checksums for the crunched versions of all common
1077 its md5sum against a known list. This works well for licenses 1077 licenses. Also add additional known checksums.
1078 which contain a copyright statement, but is also a useful way
1079 to handle people's insistence upon reformatting the license text
1080 slightly (with no material difference to the text of the
1081 license).
1082 ''' 1078 '''
1083 1079
1084 import oe.utils
1085
1086 # Note: these are carefully constructed!
1087 license_title_re = re.compile(r'^#*\(? *(This is )?([Tt]he )?.{0,15} ?[Ll]icen[sc]e( \(.{1,10}\))?\)?[:\.]? ?#*$')
1088 license_statement_re = re.compile(r'^((This (project|software)|.{1,10}) is( free software)? (released|licen[sc]ed)|(Released|Licen[cs]ed)) under the .{1,10} [Ll]icen[sc]e:?$')
1089 copyright_re = re.compile('^ *[#\*]* *(Modified work |MIT LICENSED )?Copyright ?(\([cC]\))? .*$')
1090 disclaimer_re = re.compile('^ *\*? ?All [Rr]ights [Rr]eserved\.$')
1091 email_re = re.compile('^.*<[\w\.-]*@[\w\.\-]*>$')
1092 header_re = re.compile('^(\/\**!?)? ?[\-=\*]* ?(\*\/)?$')
1093 tag_re = re.compile('^ *@?\(?([Ll]icense|MIT)\)?$')
1094 url_re = re.compile('^ *[#\*]* *https?:\/\/[\w\.\/\-]+$')
1095
1096 crunched_md5sums = {} 1080 crunched_md5sums = {}
1097 1081
1098 # common licenses 1082 # common licenses
1099 crunched_md5sums['89f3bf322f30a1dcfe952e09945842f0'] = 'Apache-2.0' 1083 crunched_md5sums['ad4e9d34a2e966dfe9837f18de03266d'] = 'GFDL-1.1-only'
1100 crunched_md5sums['13b6fe3075f8f42f2270a748965bf3a1'] = '0BSD' 1084 crunched_md5sums['d014fb11a34eb67dc717fdcfc97e60ed'] = 'GFDL-1.2-only'
1101 crunched_md5sums['ba87a7d7c20719c8df4b8beed9b78c43'] = 'BSD-2-Clause' 1085 crunched_md5sums['e020ca655b06c112def28e597ab844f1'] = 'GFDL-1.3-only'
1102 crunched_md5sums['7f8892c03b72de419c27be4ebfa253f8'] = 'BSD-3-Clause'
1103 crunched_md5sums['21128c0790b23a8a9f9e260d5f6b3619'] = 'BSL-1.0'
1104 crunched_md5sums['975742a59ae1b8abdea63a97121f49f4'] = 'EDL-1.0'
1105 crunched_md5sums['5322cee4433d84fb3aafc9e253116447'] = 'EPL-1.0'
1106 crunched_md5sums['6922352e87de080f42419bed93063754'] = 'EPL-2.0'
1107 crunched_md5sums['793475baa22295cae1d3d4046a3a0ceb'] = 'GPL-2.0-only'
1108 crunched_md5sums['ff9047f969b02c20f0559470df5cb433'] = 'GPL-2.0-or-later'
1109 crunched_md5sums['ea6de5453fcadf534df246e6cdafadcd'] = 'GPL-3.0-only'
1110 crunched_md5sums['b419257d4d153a6fde92ddf96acf5b67'] = 'GPL-3.0-or-later'
1111 crunched_md5sums['228737f4c49d3ee75b8fb3706b090b84'] = 'ISC'
1112 crunched_md5sums['c6a782e826ca4e85bf7f8b89435a677d'] = 'LGPL-2.0-only'
1113 crunched_md5sums['32d8f758a066752f0db09bd7624b8090'] = 'LGPL-2.0-or-later'
1114 crunched_md5sums['4820937eb198b4f84c52217ed230be33'] = 'LGPL-2.1-only'
1115 crunched_md5sums['db13fe9f3a13af7adab2dc7a76f9e44a'] = 'LGPL-2.1-or-later'
1116 crunched_md5sums['d7a0f2e4e0950e837ac3eabf5bd1d246'] = 'LGPL-3.0-only'
1117 crunched_md5sums['abbf328e2b434f9153351f06b9f79d02'] = 'LGPL-3.0-or-later'
1118 crunched_md5sums['eecf6429523cbc9693547cf2db790b5c'] = 'MIT'
1119 crunched_md5sums['b218b0e94290b9b818c4be67c8e1cc82'] = 'MIT-0'
1120 crunched_md5sums['ddc18131d6748374f0f35a621c245b49'] = 'Unlicense'
1121 crunched_md5sums['51f9570ff32571fc0a443102285c5e33'] = 'WTFPL'
1122 1086
1123 # The following two were gleaned from the "forever" npm package 1087 # The following two were gleaned from the "forever" npm package
1124 crunched_md5sums['0a97f8e4cbaf889d6fa51f84b89a79f6'] = 'ISC' 1088 crunched_md5sums['0a97f8e4cbaf889d6fa51f84b89a79f6'] = 'ISC'
@@ -1174,6 +1138,39 @@ def crunch_license(licfile):
1174 # https://raw.githubusercontent.com/stackgl/gl-mat3/v2.0.0/LICENSE.md 1138 # https://raw.githubusercontent.com/stackgl/gl-mat3/v2.0.0/LICENSE.md
1175 crunched_md5sums['75512892d6f59dddb6d1c7e191957e9c'] = 'Zlib' 1139 crunched_md5sums['75512892d6f59dddb6d1c7e191957e9c'] = 'Zlib'
1176 1140
1141 commonlicdir = d.getVar('COMMON_LICENSE_DIR')
1142 for fn in sorted(os.listdir(commonlicdir)):
1143 md5value, lictext = crunch_license(os.path.join(commonlicdir, fn))
1144 if md5value not in crunched_md5sums:
1145 crunched_md5sums[md5value] = fn
1146 elif fn != crunched_md5sums[md5value]:
1147 bb.debug(2, "crunched_md5sums['%s'] is already set to '%s' rather than '%s'" % (md5value, crunched_md5sums[md5value], fn))
1148 else:
1149 bb.debug(2, "crunched_md5sums['%s'] is already set to '%s'" % (md5value, crunched_md5sums[md5value]))
1150
1151 return crunched_md5sums
1152
1153def crunch_license(licfile):
1154 '''
1155 Remove non-material text from a license file and then calculate its
1156 md5sum. This works well for licenses that contain a copyright statement,
1157 but is also a useful way to handle people's insistence upon reformatting
1158 the license text slightly (with no material difference to the text of the
1159 license).
1160 '''
1161
1162 import oe.utils
1163
1164 # Note: these are carefully constructed!
1165 license_title_re = re.compile(r'^#*\(? *(This is )?([Tt]he )?.{0,15} ?[Ll]icen[sc]e( \(.{1,10}\))?\)?[:\.]? ?#*$')
1166 license_statement_re = re.compile(r'^((This (project|software)|.{1,10}) is( free software)? (released|licen[sc]ed)|(Released|Licen[cs]ed)) under the .{1,10} [Ll]icen[sc]e:?$')
1167 copyright_re = re.compile('^ *[#\*]* *(Modified work |MIT LICENSED )?Copyright ?(\([cC]\))? .*$')
1168 disclaimer_re = re.compile('^ *\*? ?All [Rr]ights [Rr]eserved\.$')
1169 email_re = re.compile('^.*<[\w\.-]*@[\w\.\-]*>$')
1170 header_re = re.compile('^(\/\**!?)? ?[\-=\*]* ?(\*\/)?$')
1171 tag_re = re.compile('^ *@?\(?([Ll]icense|MIT)\)?$')
1172 url_re = re.compile('^ *[#\*]* *https?:\/\/[\w\.\/\-]+$')
1173
1177 lictext = [] 1174 lictext = []
1178 with open(licfile, 'r', errors='surrogateescape') as f: 1175 with open(licfile, 'r', errors='surrogateescape') as f:
1179 for line in f: 1176 for line in f:
@@ -1215,13 +1212,14 @@ def crunch_license(licfile):
1215 except UnicodeEncodeError: 1212 except UnicodeEncodeError:
1216 md5val = None 1213 md5val = None
1217 lictext = '' 1214 lictext = ''
1218 license = crunched_md5sums.get(md5val, None) 1215 return md5val, lictext
1219 return license, md5val, lictext
1220 1216
1221def guess_license(srctree, d): 1217def guess_license(srctree, d):
1222 import bb 1218 import bb
1223 md5sums = get_license_md5sums(d) 1219 md5sums = get_license_md5sums(d)
1224 1220
1221 crunched_md5sums = crunch_known_licenses(d)
1222
1225 licenses = [] 1223 licenses = []
1226 licspecs = ['*LICEN[CS]E*', 'COPYING*', '*[Ll]icense*', 'LEGAL*', '[Ll]egal*', '*GPL*', 'README.lic*', 'COPYRIGHT*', '[Cc]opyright*', 'e[dp]l-v10'] 1224 licspecs = ['*LICEN[CS]E*', 'COPYING*', '*[Ll]icense*', 'LEGAL*', '[Ll]egal*', '*GPL*', 'README.lic*', 'COPYRIGHT*', '[Cc]opyright*', 'e[dp]l-v10']
1227 skip_extensions = (".html", ".js", ".json", ".svg", ".ts", ".go") 1225 skip_extensions = (".html", ".js", ".json", ".svg", ".ts", ".go")
@@ -1239,7 +1237,8 @@ def guess_license(srctree, d):
1239 md5value = bb.utils.md5_file(licfile) 1237 md5value = bb.utils.md5_file(licfile)
1240 license = md5sums.get(md5value, None) 1238 license = md5sums.get(md5value, None)
1241 if not license: 1239 if not license:
1242 license, crunched_md5, lictext = crunch_license(licfile) 1240 crunched_md5, lictext = crunch_license(licfile)
1241 license = crunched_md5sums.get(crunched_md5, None)
1243 if lictext and not license: 1242 if lictext and not license:
1244 license = 'Unknown' 1243 license = 'Unknown'
1245 logger.info("Please add the following line for '%s' to a 'lib/recipetool/licenses.csv' " \ 1244 logger.info("Please add the following line for '%s' to a 'lib/recipetool/licenses.csv' " \