diff options
author | Peter Kjellerstedt <peter.kjellerstedt@axis.com> | 2023-12-06 21:55:28 +0100 |
---|---|---|
committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2023-12-08 16:58:34 +0000 |
commit | decf6e66dfe3d688c93b3ff811077c46cb9883bb (patch) | |
tree | a20227da6e38cb96e45293a92d573359d20fa5f6 /scripts/lib/recipetool/create.py | |
parent | 3ef9ea88f15c4d53ef2229e84e7db81a052e6946 (diff) | |
download | poky-decf6e66dfe3d688c93b3ff811077c46cb9883bb.tar.gz |
recipetool: create: Improve identification of licenses
Rather than having a static list of crunched MD5 checksums for some of
the most common licenses, calculate it for all common licenses. This
should improve the identification of license text variantions.
(From OE-Core rev: 377f9513dc56e9b8e5f5813c1535be0206756949)
Signed-off-by: Peter Kjellerstedt <peter.kjellerstedt@axis.com>
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'scripts/lib/recipetool/create.py')
-rw-r--r-- | scripts/lib/recipetool/create.py | 91 |
1 files changed, 45 insertions, 46 deletions
diff --git a/scripts/lib/recipetool/create.py b/scripts/lib/recipetool/create.py index 963aa91421..6e15326c55 100644 --- a/scripts/lib/recipetool/create.py +++ b/scripts/lib/recipetool/create.py | |||
@@ -1071,54 +1071,18 @@ def get_license_md5sums(d, static_only=False, linenumbers=False): | |||
1071 | 1071 | ||
1072 | return md5sums | 1072 | return md5sums |
1073 | 1073 | ||
1074 | def crunch_license(licfile): | 1074 | def crunch_known_licenses(d): |
1075 | ''' | 1075 | ''' |
1076 | Remove non-material text from a license file and then check | 1076 | Calculate the MD5 checksums for the crunched versions of all common |
1077 | its md5sum against a known list. This works well for licenses | 1077 | licenses. Also add additional known checksums. |
1078 | which contain a copyright statement, but is also a useful way | ||
1079 | to handle people's insistence upon reformatting the license text | ||
1080 | slightly (with no material difference to the text of the | ||
1081 | license). | ||
1082 | ''' | 1078 | ''' |
1083 | 1079 | ||
1084 | import oe.utils | ||
1085 | |||
1086 | # Note: these are carefully constructed! | ||
1087 | license_title_re = re.compile(r'^#*\(? *(This is )?([Tt]he )?.{0,15} ?[Ll]icen[sc]e( \(.{1,10}\))?\)?[:\.]? ?#*$') | ||
1088 | license_statement_re = re.compile(r'^((This (project|software)|.{1,10}) is( free software)? (released|licen[sc]ed)|(Released|Licen[cs]ed)) under the .{1,10} [Ll]icen[sc]e:?$') | ||
1089 | copyright_re = re.compile('^ *[#\*]* *(Modified work |MIT LICENSED )?Copyright ?(\([cC]\))? .*$') | ||
1090 | disclaimer_re = re.compile('^ *\*? ?All [Rr]ights [Rr]eserved\.$') | ||
1091 | email_re = re.compile('^.*<[\w\.-]*@[\w\.\-]*>$') | ||
1092 | header_re = re.compile('^(\/\**!?)? ?[\-=\*]* ?(\*\/)?$') | ||
1093 | tag_re = re.compile('^ *@?\(?([Ll]icense|MIT)\)?$') | ||
1094 | url_re = re.compile('^ *[#\*]* *https?:\/\/[\w\.\/\-]+$') | ||
1095 | |||
1096 | crunched_md5sums = {} | 1080 | crunched_md5sums = {} |
1097 | 1081 | ||
1098 | # common licenses | 1082 | # common licenses |
1099 | crunched_md5sums['89f3bf322f30a1dcfe952e09945842f0'] = 'Apache-2.0' | 1083 | crunched_md5sums['ad4e9d34a2e966dfe9837f18de03266d'] = 'GFDL-1.1-only' |
1100 | crunched_md5sums['13b6fe3075f8f42f2270a748965bf3a1'] = '0BSD' | 1084 | crunched_md5sums['d014fb11a34eb67dc717fdcfc97e60ed'] = 'GFDL-1.2-only' |
1101 | crunched_md5sums['ba87a7d7c20719c8df4b8beed9b78c43'] = 'BSD-2-Clause' | 1085 | crunched_md5sums['e020ca655b06c112def28e597ab844f1'] = 'GFDL-1.3-only' |
1102 | crunched_md5sums['7f8892c03b72de419c27be4ebfa253f8'] = 'BSD-3-Clause' | ||
1103 | crunched_md5sums['21128c0790b23a8a9f9e260d5f6b3619'] = 'BSL-1.0' | ||
1104 | crunched_md5sums['975742a59ae1b8abdea63a97121f49f4'] = 'EDL-1.0' | ||
1105 | crunched_md5sums['5322cee4433d84fb3aafc9e253116447'] = 'EPL-1.0' | ||
1106 | crunched_md5sums['6922352e87de080f42419bed93063754'] = 'EPL-2.0' | ||
1107 | crunched_md5sums['793475baa22295cae1d3d4046a3a0ceb'] = 'GPL-2.0-only' | ||
1108 | crunched_md5sums['ff9047f969b02c20f0559470df5cb433'] = 'GPL-2.0-or-later' | ||
1109 | crunched_md5sums['ea6de5453fcadf534df246e6cdafadcd'] = 'GPL-3.0-only' | ||
1110 | crunched_md5sums['b419257d4d153a6fde92ddf96acf5b67'] = 'GPL-3.0-or-later' | ||
1111 | crunched_md5sums['228737f4c49d3ee75b8fb3706b090b84'] = 'ISC' | ||
1112 | crunched_md5sums['c6a782e826ca4e85bf7f8b89435a677d'] = 'LGPL-2.0-only' | ||
1113 | crunched_md5sums['32d8f758a066752f0db09bd7624b8090'] = 'LGPL-2.0-or-later' | ||
1114 | crunched_md5sums['4820937eb198b4f84c52217ed230be33'] = 'LGPL-2.1-only' | ||
1115 | crunched_md5sums['db13fe9f3a13af7adab2dc7a76f9e44a'] = 'LGPL-2.1-or-later' | ||
1116 | crunched_md5sums['d7a0f2e4e0950e837ac3eabf5bd1d246'] = 'LGPL-3.0-only' | ||
1117 | crunched_md5sums['abbf328e2b434f9153351f06b9f79d02'] = 'LGPL-3.0-or-later' | ||
1118 | crunched_md5sums['eecf6429523cbc9693547cf2db790b5c'] = 'MIT' | ||
1119 | crunched_md5sums['b218b0e94290b9b818c4be67c8e1cc82'] = 'MIT-0' | ||
1120 | crunched_md5sums['ddc18131d6748374f0f35a621c245b49'] = 'Unlicense' | ||
1121 | crunched_md5sums['51f9570ff32571fc0a443102285c5e33'] = 'WTFPL' | ||
1122 | 1086 | ||
1123 | # The following two were gleaned from the "forever" npm package | 1087 | # The following two were gleaned from the "forever" npm package |
1124 | crunched_md5sums['0a97f8e4cbaf889d6fa51f84b89a79f6'] = 'ISC' | 1088 | crunched_md5sums['0a97f8e4cbaf889d6fa51f84b89a79f6'] = 'ISC' |
@@ -1174,6 +1138,39 @@ def crunch_license(licfile): | |||
1174 | # https://raw.githubusercontent.com/stackgl/gl-mat3/v2.0.0/LICENSE.md | 1138 | # https://raw.githubusercontent.com/stackgl/gl-mat3/v2.0.0/LICENSE.md |
1175 | crunched_md5sums['75512892d6f59dddb6d1c7e191957e9c'] = 'Zlib' | 1139 | crunched_md5sums['75512892d6f59dddb6d1c7e191957e9c'] = 'Zlib' |
1176 | 1140 | ||
1141 | commonlicdir = d.getVar('COMMON_LICENSE_DIR') | ||
1142 | for fn in sorted(os.listdir(commonlicdir)): | ||
1143 | md5value, lictext = crunch_license(os.path.join(commonlicdir, fn)) | ||
1144 | if md5value not in crunched_md5sums: | ||
1145 | crunched_md5sums[md5value] = fn | ||
1146 | elif fn != crunched_md5sums[md5value]: | ||
1147 | bb.debug(2, "crunched_md5sums['%s'] is already set to '%s' rather than '%s'" % (md5value, crunched_md5sums[md5value], fn)) | ||
1148 | else: | ||
1149 | bb.debug(2, "crunched_md5sums['%s'] is already set to '%s'" % (md5value, crunched_md5sums[md5value])) | ||
1150 | |||
1151 | return crunched_md5sums | ||
1152 | |||
1153 | def crunch_license(licfile): | ||
1154 | ''' | ||
1155 | Remove non-material text from a license file and then calculate its | ||
1156 | md5sum. This works well for licenses that contain a copyright statement, | ||
1157 | but is also a useful way to handle people's insistence upon reformatting | ||
1158 | the license text slightly (with no material difference to the text of the | ||
1159 | license). | ||
1160 | ''' | ||
1161 | |||
1162 | import oe.utils | ||
1163 | |||
1164 | # Note: these are carefully constructed! | ||
1165 | license_title_re = re.compile(r'^#*\(? *(This is )?([Tt]he )?.{0,15} ?[Ll]icen[sc]e( \(.{1,10}\))?\)?[:\.]? ?#*$') | ||
1166 | license_statement_re = re.compile(r'^((This (project|software)|.{1,10}) is( free software)? (released|licen[sc]ed)|(Released|Licen[cs]ed)) under the .{1,10} [Ll]icen[sc]e:?$') | ||
1167 | copyright_re = re.compile('^ *[#\*]* *(Modified work |MIT LICENSED )?Copyright ?(\([cC]\))? .*$') | ||
1168 | disclaimer_re = re.compile('^ *\*? ?All [Rr]ights [Rr]eserved\.$') | ||
1169 | email_re = re.compile('^.*<[\w\.-]*@[\w\.\-]*>$') | ||
1170 | header_re = re.compile('^(\/\**!?)? ?[\-=\*]* ?(\*\/)?$') | ||
1171 | tag_re = re.compile('^ *@?\(?([Ll]icense|MIT)\)?$') | ||
1172 | url_re = re.compile('^ *[#\*]* *https?:\/\/[\w\.\/\-]+$') | ||
1173 | |||
1177 | lictext = [] | 1174 | lictext = [] |
1178 | with open(licfile, 'r', errors='surrogateescape') as f: | 1175 | with open(licfile, 'r', errors='surrogateescape') as f: |
1179 | for line in f: | 1176 | for line in f: |
@@ -1215,13 +1212,14 @@ def crunch_license(licfile): | |||
1215 | except UnicodeEncodeError: | 1212 | except UnicodeEncodeError: |
1216 | md5val = None | 1213 | md5val = None |
1217 | lictext = '' | 1214 | lictext = '' |
1218 | license = crunched_md5sums.get(md5val, None) | 1215 | return md5val, lictext |
1219 | return license, md5val, lictext | ||
1220 | 1216 | ||
1221 | def guess_license(srctree, d): | 1217 | def guess_license(srctree, d): |
1222 | import bb | 1218 | import bb |
1223 | md5sums = get_license_md5sums(d) | 1219 | md5sums = get_license_md5sums(d) |
1224 | 1220 | ||
1221 | crunched_md5sums = crunch_known_licenses(d) | ||
1222 | |||
1225 | licenses = [] | 1223 | licenses = [] |
1226 | licspecs = ['*LICEN[CS]E*', 'COPYING*', '*[Ll]icense*', 'LEGAL*', '[Ll]egal*', '*GPL*', 'README.lic*', 'COPYRIGHT*', '[Cc]opyright*', 'e[dp]l-v10'] | 1224 | licspecs = ['*LICEN[CS]E*', 'COPYING*', '*[Ll]icense*', 'LEGAL*', '[Ll]egal*', '*GPL*', 'README.lic*', 'COPYRIGHT*', '[Cc]opyright*', 'e[dp]l-v10'] |
1227 | skip_extensions = (".html", ".js", ".json", ".svg", ".ts", ".go") | 1225 | skip_extensions = (".html", ".js", ".json", ".svg", ".ts", ".go") |
@@ -1239,7 +1237,8 @@ def guess_license(srctree, d): | |||
1239 | md5value = bb.utils.md5_file(licfile) | 1237 | md5value = bb.utils.md5_file(licfile) |
1240 | license = md5sums.get(md5value, None) | 1238 | license = md5sums.get(md5value, None) |
1241 | if not license: | 1239 | if not license: |
1242 | license, crunched_md5, lictext = crunch_license(licfile) | 1240 | crunched_md5, lictext = crunch_license(licfile) |
1241 | license = crunched_md5sums.get(crunched_md5, None) | ||
1243 | if lictext and not license: | 1242 | if lictext and not license: |
1244 | license = 'Unknown' | 1243 | license = 'Unknown' |
1245 | logger.info("Please add the following line for '%s' to a 'lib/recipetool/licenses.csv' " \ | 1244 | logger.info("Please add the following line for '%s' to a 'lib/recipetool/licenses.csv' " \ |