summaryrefslogtreecommitdiffstats
path: root/scripts/lib/recipetool/create.py
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/lib/recipetool/create.py')
-rw-r--r--scripts/lib/recipetool/create.py225
1 files changed, 1 insertions, 224 deletions
diff --git a/scripts/lib/recipetool/create.py b/scripts/lib/recipetool/create.py
index 94d52d6077..3c6ef6719f 100644
--- a/scripts/lib/recipetool/create.py
+++ b/scripts/lib/recipetool/create.py
@@ -18,6 +18,7 @@ from urllib.parse import urlparse, urldefrag, urlsplit
18import hashlib 18import hashlib
19import bb.fetch2 19import bb.fetch2
20logger = logging.getLogger('recipetool') 20logger = logging.getLogger('recipetool')
21from oe.license_finder import find_licenses
21 22
22tinfoil = None 23tinfoil = None
23plugins = None 24plugins = None
@@ -1040,230 +1041,6 @@ def handle_license_vars(srctree, lines_before, handled, extravalues, d):
1040 handled.append(('license', licvalues)) 1041 handled.append(('license', licvalues))
1041 return licvalues 1042 return licvalues
1042 1043
1043def get_license_md5sums(d, static_only=False, linenumbers=False):
1044 import bb.utils
1045 import csv
1046 md5sums = {}
1047 if not static_only and not linenumbers:
1048 # Gather md5sums of license files in common license dir
1049 commonlicdir = d.getVar('COMMON_LICENSE_DIR')
1050 for fn in os.listdir(commonlicdir):
1051 md5value = bb.utils.md5_file(os.path.join(commonlicdir, fn))
1052 md5sums[md5value] = fn
1053
1054 # The following were extracted from common values in various recipes
1055 # (double checking the license against the license file itself, not just
1056 # the LICENSE value in the recipe)
1057
1058 # Read license md5sums from csv file
1059 scripts_path = os.path.dirname(os.path.realpath(__file__))
1060 for path in (d.getVar('BBPATH').split(':')
1061 + [os.path.join(scripts_path, '..', '..')]):
1062 csv_path = os.path.join(path, 'lib', 'recipetool', 'licenses.csv')
1063 if os.path.isfile(csv_path):
1064 with open(csv_path, newline='') as csv_file:
1065 fieldnames = ['md5sum', 'license', 'beginline', 'endline', 'md5']
1066 reader = csv.DictReader(csv_file, delimiter=',', fieldnames=fieldnames)
1067 for row in reader:
1068 if linenumbers:
1069 md5sums[row['md5sum']] = (
1070 row['license'], row['beginline'], row['endline'], row['md5'])
1071 else:
1072 md5sums[row['md5sum']] = row['license']
1073
1074 return md5sums
1075
1076def crunch_known_licenses(d):
1077 '''
1078 Calculate the MD5 checksums for the crunched versions of all common
1079 licenses. Also add additional known checksums.
1080 '''
1081
1082 crunched_md5sums = {}
1083
1084 # common licenses
1085 crunched_md5sums['ad4e9d34a2e966dfe9837f18de03266d'] = 'GFDL-1.1-only'
1086 crunched_md5sums['d014fb11a34eb67dc717fdcfc97e60ed'] = 'GFDL-1.2-only'
1087 crunched_md5sums['e020ca655b06c112def28e597ab844f1'] = 'GFDL-1.3-only'
1088
1089 # The following two were gleaned from the "forever" npm package
1090 crunched_md5sums['0a97f8e4cbaf889d6fa51f84b89a79f6'] = 'ISC'
1091 # https://github.com/waffle-gl/waffle/blob/master/LICENSE.txt
1092 crunched_md5sums['50fab24ce589d69af8964fdbfe414c60'] = 'BSD-2-Clause'
1093 # https://github.com/spigwitmer/fakeds1963s/blob/master/LICENSE
1094 crunched_md5sums['88a4355858a1433fea99fae34a44da88'] = 'GPL-2.0-only'
1095 # http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt
1096 crunched_md5sums['063b5c3ebb5f3aa4c85a2ed18a31fbe7'] = 'GPL-2.0-only'
1097 # https://github.com/FFmpeg/FFmpeg/blob/master/COPYING.LGPLv2.1
1098 crunched_md5sums['7f5202f4d44ed15dcd4915f5210417d8'] = 'LGPL-2.1-only'
1099 # unixODBC-2.3.4 COPYING
1100 crunched_md5sums['3debde09238a8c8e1f6a847e1ec9055b'] = 'LGPL-2.1-only'
1101 # https://github.com/FFmpeg/FFmpeg/blob/master/COPYING.LGPLv3
1102 crunched_md5sums['f90c613c51aa35da4d79dd55fc724ceb'] = 'LGPL-3.0-only'
1103 # https://raw.githubusercontent.com/eclipse/mosquitto/v1.4.14/epl-v10
1104 crunched_md5sums['efe2cb9a35826992b9df68224e3c2628'] = 'EPL-1.0'
1105
1106 # https://raw.githubusercontent.com/jquery/esprima/3.1.3/LICENSE.BSD
1107 crunched_md5sums['80fa7b56a28e8c902e6af194003220a5'] = 'BSD-2-Clause'
1108 # https://raw.githubusercontent.com/npm/npm-install-checks/master/LICENSE
1109 crunched_md5sums['e659f77bfd9002659e112d0d3d59b2c1'] = 'BSD-2-Clause'
1110 # https://raw.githubusercontent.com/silverwind/default-gateway/4.2.0/LICENSE
1111 crunched_md5sums['4c641f2d995c47f5cb08bdb4b5b6ea05'] = 'BSD-2-Clause'
1112 # https://raw.githubusercontent.com/tad-lispy/node-damerau-levenshtein/v1.0.5/LICENSE
1113 crunched_md5sums['2b8c039b2b9a25f0feb4410c4542d346'] = 'BSD-2-Clause'
1114 # https://raw.githubusercontent.com/terser/terser/v3.17.0/LICENSE
1115 crunched_md5sums['8bd23871802951c9ad63855151204c2c'] = 'BSD-2-Clause'
1116 # https://raw.githubusercontent.com/alexei/sprintf.js/1.0.3/LICENSE
1117 crunched_md5sums['008c22318c8ea65928bf730ddd0273e3'] = 'BSD-3-Clause'
1118 # https://raw.githubusercontent.com/Caligatio/jsSHA/v3.2.0/LICENSE
1119 crunched_md5sums['0e46634a01bfef056892949acaea85b1'] = 'BSD-3-Clause'
1120 # https://raw.githubusercontent.com/d3/d3-path/v1.0.9/LICENSE
1121 crunched_md5sums['b5f72aef53d3b2b432702c30b0215666'] = 'BSD-3-Clause'
1122 # https://raw.githubusercontent.com/feross/ieee754/v1.1.13/LICENSE
1123 crunched_md5sums['a39327c997c20da0937955192d86232d'] = 'BSD-3-Clause'
1124 # https://raw.githubusercontent.com/joyent/node-extsprintf/v1.3.0/LICENSE
1125 crunched_md5sums['721f23a96ff4161ca3a5f071bbe18108'] = 'MIT'
1126 # https://raw.githubusercontent.com/pvorb/clone/v0.2.0/LICENSE
1127 crunched_md5sums['b376d29a53c9573006b9970709231431'] = 'MIT'
1128 # https://raw.githubusercontent.com/andris9/encoding/v0.1.12/LICENSE
1129 crunched_md5sums['85d8a977ee9d7c5ab4ac03c9b95431c4'] = 'MIT-0'
1130 # https://raw.githubusercontent.com/faye/websocket-driver-node/0.7.3/LICENSE.md
1131 crunched_md5sums['b66384e7137e41a9b1904ef4d39703b6'] = 'Apache-2.0'
1132 # https://raw.githubusercontent.com/less/less.js/v4.1.1/LICENSE
1133 crunched_md5sums['b27575459e02221ccef97ec0bfd457ae'] = 'Apache-2.0'
1134 # https://raw.githubusercontent.com/microsoft/TypeScript/v3.5.3/LICENSE.txt
1135 crunched_md5sums['a54a1a6a39e7f9dbb4a23a42f5c7fd1c'] = 'Apache-2.0'
1136 # https://raw.githubusercontent.com/request/request/v2.87.0/LICENSE
1137 crunched_md5sums['1034431802e57486b393d00c5d262b8a'] = 'Apache-2.0'
1138 # https://raw.githubusercontent.com/dchest/tweetnacl-js/v0.14.5/LICENSE
1139 crunched_md5sums['75605e6bdd564791ab698fca65c94a4f'] = 'Unlicense'
1140 # https://raw.githubusercontent.com/stackgl/gl-mat3/v2.0.0/LICENSE.md
1141 crunched_md5sums['75512892d6f59dddb6d1c7e191957e9c'] = 'Zlib'
1142
1143 commonlicdir = d.getVar('COMMON_LICENSE_DIR')
1144 for fn in sorted(os.listdir(commonlicdir)):
1145 md5value, lictext = crunch_license(os.path.join(commonlicdir, fn))
1146 if md5value not in crunched_md5sums:
1147 crunched_md5sums[md5value] = fn
1148 elif fn != crunched_md5sums[md5value]:
1149 bb.debug(2, "crunched_md5sums['%s'] is already set to '%s' rather than '%s'" % (md5value, crunched_md5sums[md5value], fn))
1150 else:
1151 bb.debug(2, "crunched_md5sums['%s'] is already set to '%s'" % (md5value, crunched_md5sums[md5value]))
1152
1153 return crunched_md5sums
1154
1155def crunch_license(licfile):
1156 '''
1157 Remove non-material text from a license file and then calculate its
1158 md5sum. This works well for licenses that contain a copyright statement,
1159 but is also a useful way to handle people's insistence upon reformatting
1160 the license text slightly (with no material difference to the text of the
1161 license).
1162 '''
1163
1164 import oe.utils
1165
1166 # Note: these are carefully constructed!
1167 license_title_re = re.compile(r'^#*\(? *(This is )?([Tt]he )?.{0,15} ?[Ll]icen[sc]e( \(.{1,10}\))?\)?[:\.]? ?#*$')
1168 license_statement_re = re.compile(r'^((This (project|software)|.{1,10}) is( free software)? (released|licen[sc]ed)|(Released|Licen[cs]ed)) under the .{1,10} [Ll]icen[sc]e:?$')
1169 copyright_re = re.compile(r'^ *[#\*]* *(Modified work |MIT LICENSED )?Copyright ?(\([cC]\))? .*$')
1170 disclaimer_re = re.compile(r'^ *\*? ?All [Rr]ights [Rr]eserved\.$')
1171 email_re = re.compile(r'^.*<[\w\.-]*@[\w\.\-]*>$')
1172 header_re = re.compile(r'^(\/\**!?)? ?[\-=\*]* ?(\*\/)?$')
1173 tag_re = re.compile(r'^ *@?\(?([Ll]icense|MIT)\)?$')
1174 url_re = re.compile(r'^ *[#\*]* *https?:\/\/[\w\.\/\-]+$')
1175
1176 lictext = []
1177 with open(licfile, 'r', errors='surrogateescape') as f:
1178 for line in f:
1179 # Drop opening statements
1180 if copyright_re.match(line):
1181 continue
1182 elif disclaimer_re.match(line):
1183 continue
1184 elif email_re.match(line):
1185 continue
1186 elif header_re.match(line):
1187 continue
1188 elif tag_re.match(line):
1189 continue
1190 elif url_re.match(line):
1191 continue
1192 elif license_title_re.match(line):
1193 continue
1194 elif license_statement_re.match(line):
1195 continue
1196 # Strip comment symbols
1197 line = line.replace('*', '') \
1198 .replace('#', '')
1199 # Unify spelling
1200 line = line.replace('sub-license', 'sublicense')
1201 # Squash spaces
1202 line = oe.utils.squashspaces(line.strip())
1203 # Replace smart quotes, double quotes and backticks with single quotes
1204 line = line.replace(u"\u2018", "'").replace(u"\u2019", "'").replace(u"\u201c","'").replace(u"\u201d", "'").replace('"', '\'').replace('`', '\'')
1205 # Unify brackets
1206 line = line.replace("{", "[").replace("}", "]")
1207 if line:
1208 lictext.append(line)
1209
1210 m = hashlib.md5()
1211 try:
1212 m.update(' '.join(lictext).encode('utf-8'))
1213 md5val = m.hexdigest()
1214 except UnicodeEncodeError:
1215 md5val = None
1216 lictext = ''
1217 return md5val, lictext
1218
1219def find_license_files(srctree):
1220 licspecs = ['*LICEN[CS]E*', 'COPYING*', '*[Ll]icense*', 'LEGAL*', '[Ll]egal*', '*GPL*', 'README.lic*', 'COPYRIGHT*', '[Cc]opyright*', 'e[dp]l-v10']
1221 skip_extensions = (".html", ".js", ".json", ".svg", ".ts", ".go")
1222 licfiles = []
1223 for root, dirs, files in os.walk(srctree):
1224 for fn in files:
1225 if fn.endswith(skip_extensions):
1226 continue
1227 for spec in licspecs:
1228 if fnmatch.fnmatch(fn, spec):
1229 fullpath = os.path.join(root, fn)
1230 if not fullpath in licfiles:
1231 licfiles.append(fullpath)
1232
1233 return licfiles
1234
1235def match_licenses(licfiles, srctree, d):
1236 import bb
1237 md5sums = get_license_md5sums(d)
1238
1239 crunched_md5sums = crunch_known_licenses(d)
1240
1241 licenses = []
1242 for licfile in sorted(licfiles):
1243 resolved_licfile = d.expand(licfile)
1244 md5value = bb.utils.md5_file(resolved_licfile)
1245 license = md5sums.get(md5value, None)
1246 if not license:
1247 crunched_md5, lictext = crunch_license(resolved_licfile)
1248 license = crunched_md5sums.get(crunched_md5, None)
1249 if lictext and not license:
1250 license = 'Unknown'
1251 logger.info("Please add the following line for '%s' to a 'lib/recipetool/licenses.csv' " \
1252 "and replace `Unknown` with the license:\n" \
1253 "%s,Unknown" % (os.path.relpath(licfile, srctree + "/.."), md5value))
1254 if license:
1255 licenses.append((license, os.path.relpath(licfile, srctree), md5value))
1256
1257 return licenses
1258
1259def find_licenses(srctree, d):
1260 licfiles = find_license_files(srctree)
1261 licenses = match_licenses(licfiles, srctree, d)
1262
1263 # FIXME should we grab at least one source file with a license header and add that too?
1264
1265 return licenses
1266
1267def split_pkg_licenses(licvalues, packages, outlines, fallback_licenses=None, pn='${PN}'): 1044def split_pkg_licenses(licvalues, packages, outlines, fallback_licenses=None, pn='${PN}'):
1268 """ 1045 """
1269 Given a list of (license, path, md5sum) as returned by match_licenses(), 1046 Given a list of (license, path, md5sum) as returned by match_licenses(),