summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTobias Olausson <tobias.olausson@pelagicore.com>2014-10-20 16:09:15 +0200
committerRichard Purdie <richard.purdie@linuxfoundation.org>2014-10-24 17:36:15 +0100
commitf348071efd9419de3fa7f4e4ad78dfa5f8445412 (patch)
tree7210e42b132f8cfd8df9a89dde184629a7b3fb81
parent8882eaaf97821fd96a72dc1382517689912d357b (diff)
downloadpoky-f348071efd9419de3fa7f4e4ad78dfa5f8445412.tar.gz
spdx.bbclass: improved stability, fixed SPDX compliance issues. Changes are reflected in licenses.conf.
The previous version could crash on dead links in the rootfs, or if the manifest directory did not exist. The generated files were also not compliant with the SPDX specification, for example file entries did not always start with the FileName tag, time stamps were incorrectly formatted etc. Stability issues are addressed by added checks, originally written by Johan Thelin <johan.thelin@pelagicore.com>, who never upstreamed them. I've also added an option for getting full SPDX output from FOSSology, i.e. not only for all files, but for the package as well, including license references. License refs are required in order to process the output by SPDXTools. For that reason, this option defaults to true. (From OE-Core rev: 5d3a4f4f57e4d8581fd88a14324f94e93104a690) Signed-off-by: Tobias Olausson <tobias.olausson@pelagicore.com> Signed-off-by: Ross Burton <ross.burton@intel.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
-rw-r--r--meta/classes/spdx.bbclass154
-rw-r--r--meta/conf/licenses.conf24
2 files changed, 126 insertions, 52 deletions
diff --git a/meta/classes/spdx.bbclass b/meta/classes/spdx.bbclass
index bccc230d8c..c0050f394d 100644
--- a/meta/classes/spdx.bbclass
+++ b/meta/classes/spdx.bbclass
@@ -43,6 +43,9 @@ python do_spdx () {
43 info['spdx_temp_dir'] = (d.getVar('SPDX_TEMP_DIR', True) or "") 43 info['spdx_temp_dir'] = (d.getVar('SPDX_TEMP_DIR', True) or "")
44 info['tar_file'] = os.path.join( info['workdir'], info['pn'] + ".tar.gz" ) 44 info['tar_file'] = os.path.join( info['workdir'], info['pn'] + ".tar.gz" )
45 45
46 # Make sure manifest dir exists
47 if not os.path.exists( manifest_dir ):
48 bb.utils.mkdirhier( manifest_dir )
46 49
47 ## get everything from cache. use it to decide if 50 ## get everything from cache. use it to decide if
48 ## something needs to be rerun 51 ## something needs to be rerun
@@ -67,24 +70,27 @@ python do_spdx () {
67 70
68 if cache_cur: 71 if cache_cur:
69 spdx_file_info = cached_spdx['Files'] 72 spdx_file_info = cached_spdx['Files']
73 foss_package_info = cached_spdx['Package']
74 foss_license_info = cached_spdx['Licenses']
70 else: 75 else:
71 ## setup fossology command 76 ## setup fossology command
72 foss_server = (d.getVar('FOSS_SERVER', True) or "") 77 foss_server = (d.getVar('FOSS_SERVER', True) or "")
73 foss_flags = (d.getVar('FOSS_WGET_FLAGS', True) or "") 78 foss_flags = (d.getVar('FOSS_WGET_FLAGS', True) or "")
79 foss_full_spdx = (d.getVar('FOSS_FULL_SPDX', True) == "true" or false)
74 foss_command = "wget %s --post-file=%s %s"\ 80 foss_command = "wget %s --post-file=%s %s"\
75 % (foss_flags,info['tar_file'],foss_server) 81 % (foss_flags,info['tar_file'],foss_server)
76 82
77 #bb.warn(info['pn'] + json.dumps(local_file_info)) 83 (foss_package_info, foss_file_info, foss_license_info) = run_fossology( foss_command, foss_full_spdx )
78 foss_file_info = run_fossology( foss_command )
79 spdx_file_info = create_spdx_doc( local_file_info, foss_file_info ) 84 spdx_file_info = create_spdx_doc( local_file_info, foss_file_info )
80 ## write to cache 85 ## write to cache
81 write_cached_spdx(sstatefile,cur_ver_code,spdx_file_info) 86 write_cached_spdx(sstatefile, cur_ver_code, foss_package_info,
87 spdx_file_info, foss_license_info)
82 88
83 ## Get document and package level information 89 ## Get document and package level information
84 spdx_header_info = get_header_info(info, cur_ver_code, spdx_file_info) 90 spdx_header_info = get_header_info(info, cur_ver_code, foss_package_info)
85 91
86 ## CREATE MANIFEST 92 ## CREATE MANIFEST
87 create_manifest(info,spdx_header_info,spdx_file_info) 93 create_manifest(info,spdx_header_info,spdx_file_info, foss_license_info)
88 94
89 ## clean up the temp stuff 95 ## clean up the temp stuff
90 remove_dir_tree( info['spdx_temp_dir'] ) 96 remove_dir_tree( info['spdx_temp_dir'] )
@@ -93,32 +99,50 @@ python do_spdx () {
93} 99}
94addtask spdx after do_patch before do_configure 100addtask spdx after do_patch before do_configure
95 101
96def create_manifest(info,header,files): 102def create_manifest(info, header, files, licenses):
97 with open(info['outfile'], 'w') as f: 103 import codecs
104 with codecs.open(info['outfile'], mode='w', encoding='utf-8') as f:
105 # Write header
98 f.write(header + '\n') 106 f.write(header + '\n')
107
108 # Write file data
99 for chksum, block in files.iteritems(): 109 for chksum, block in files.iteritems():
110 f.write("FileName: " + block['FileName'] + '\n')
100 for key, value in block.iteritems(): 111 for key, value in block.iteritems():
101 f.write(key + ": " + value) 112 if not key == 'FileName':
102 f.write('\n') 113 f.write(key + ": " + value + '\n')
114 f.write('\n')
115
116 # Write license data
117 for id, block in licenses.iteritems():
118 f.write("LicenseID: " + id + '\n')
119 for key, value in block.iteritems():
120 f.write(key + ": " + value + '\n')
103 f.write('\n') 121 f.write('\n')
104 122
105def get_cached_spdx( sstatefile ): 123def get_cached_spdx( sstatefile ):
106 import json 124 import json
125 import codecs
107 cached_spdx_info = {} 126 cached_spdx_info = {}
108 with open( sstatefile, 'r' ) as f: 127 with codecs.open( sstatefile, mode='r', encoding='utf-8' ) as f:
109 try: 128 try:
110 cached_spdx_info = json.load(f) 129 cached_spdx_info = json.load(f)
111 except ValueError as e: 130 except ValueError as e:
112 cached_spdx_info = None 131 cached_spdx_info = None
113 return cached_spdx_info 132 return cached_spdx_info
114 133
115def write_cached_spdx( sstatefile, ver_code, files ): 134def write_cached_spdx( sstatefile, ver_code, package_info, files, license_info):
116 import json 135 import json
136 import codecs
117 spdx_doc = {} 137 spdx_doc = {}
118 spdx_doc['PackageVerificationCode'] = ver_code 138 spdx_doc['PackageVerificationCode'] = ver_code
119 spdx_doc['Files'] = {} 139 spdx_doc['Files'] = {}
120 spdx_doc['Files'] = files 140 spdx_doc['Files'] = files
121 with open( sstatefile, 'w' ) as f: 141 spdx_doc['Package'] = {}
142 spdx_doc['Package'] = package_info
143 spdx_doc['Licenses'] = {}
144 spdx_doc['Licenses'] = license_info
145 with codecs.open( sstatefile, mode='w', encoding='utf-8' ) as f:
122 f.write(json.dumps(spdx_doc)) 146 f.write(json.dumps(spdx_doc))
123 147
124def setup_foss_scan( info, cache, cached_files ): 148def setup_foss_scan( info, cache, cached_files ):
@@ -139,7 +163,8 @@ def setup_foss_scan( info, cache, cached_files ):
139 continue 163 continue
140 164
141 checksum = hash_file( abs_path ) 165 checksum = hash_file( abs_path )
142 mtime = time.asctime(time.localtime(stats.st_mtime)) 166 if not checksum is None:
167 mtime = time.asctime(time.localtime(stats.st_mtime))
143 168
144 ## retain cache information if it exists 169 ## retain cache information if it exists
145 file_info[checksum] = {} 170 file_info[checksum] = {}
@@ -147,27 +172,25 @@ def setup_foss_scan( info, cache, cached_files ):
147 file_info[checksum] = cached_files[checksum] 172 file_info[checksum] = cached_files[checksum]
148 else: 173 else:
149 file_info[checksum]['FileName'] = full_path 174 file_info[checksum]['FileName'] = full_path
150
151 try:
152 os.makedirs( dest_dir )
153 except OSError as e:
154 if e.errno == errno.EEXIST and os.path.isdir(dest_dir):
155 pass
156 else:
157 bb.warn( "mkdir failed " + str(e) + "\n" )
158 continue
159
160 if(cache and checksum not in cached_files) or not cache:
161 try: 175 try:
162 shutil.copyfile( abs_path, dest_path ) 176 os.makedirs(dest_dir)
163 except shutil.Error as e: 177 except OSError as e:
164 bb.warn( str(e) + "\n" ) 178 if e.errno == errno.EEXIST and os.path.isdir(dest_dir):
165 except IOError as e: 179 pass
166 bb.warn( str(e) + "\n" ) 180 else:
181 bb.warn( "mkdir failed " + str(e) + "\n" )
182 continue
183
184 if (cache and checksum not in cached_files) or not cache:
185 try:
186 shutil.copyfile( abs_path, dest_path )
187 except shutil.Error as e:
188 bb.warn( str(e) + "\n" )
189 except IOError as e:
190 bb.warn( str(e) + "\n" )
167 191
168 with tarfile.open( info['tar_file'], "w:gz" ) as tar: 192 with tarfile.open( info['tar_file'], "w:gz" ) as tar:
169 tar.add( info['spdx_temp_dir'], arcname=os.path.basename(info['spdx_temp_dir']) ) 193 tar.add( info['spdx_temp_dir'], arcname=os.path.basename(info['spdx_temp_dir']) )
170 tar.close()
171 194
172 return file_info 195 return file_info
173 196
@@ -193,13 +216,15 @@ def list_files( dir ):
193 return 216 return
194 217
195def hash_file( file_name ): 218def hash_file( file_name ):
219 f = None
196 try: 220 try:
197 f = open( file_name, 'rb' ) 221 f = open( file_name, 'rb' )
198 data_string = f.read() 222 data_string = f.read()
199 except: 223 except:
200 return None 224 return None
201 finally: 225 finally:
202 f.close() 226 if not f is None:
227 f.close()
203 sha1 = hash_string( data_string ) 228 sha1 = hash_string( data_string )
204 return sha1 229 return sha1
205 230
@@ -209,30 +234,58 @@ def hash_string( data ):
209 sha1.update( data ) 234 sha1.update( data )
210 return sha1.hexdigest() 235 return sha1.hexdigest()
211 236
212def run_fossology( foss_command ): 237def run_fossology( foss_command, full_spdx ):
213 import string, re 238 import string, re
214 import subprocess 239 import subprocess
215 240
216 p = subprocess.Popen(foss_command.split(), 241 p = subprocess.Popen(foss_command.split(),
217 stdout=subprocess.PIPE, stderr=subprocess.PIPE) 242 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
218 foss_output, foss_error = p.communicate() 243 foss_output, foss_error = p.communicate()
219 244 foss_output = unicode(foss_output, "utf-8")
220 records = [] 245 foss_output = string.replace(foss_output, '\r', '')
221 records = re.findall('FileName:.*?</text>', foss_output, re.S) 246
247 # Package info
248 package_info = {}
249 if full_spdx:
250 # All mandatory, only one occurance
251 package_info['PackageCopyrightText'] = re.findall('PackageCopyrightText: (.*?</text>)', foss_output, re.S)[0]
252 package_info['PackageLicenseDeclared'] = re.findall('PackageLicenseDeclared: (.*)', foss_output)[0]
253 package_info['PackageLicenseConcluded'] = re.findall('PackageLicenseConcluded: (.*)', foss_output)[0]
254 # These may be more than one
255 package_info['PackageLicenseInfoFromFiles'] = re.findall('PackageLicenseInfoFromFiles: (.*)', foss_output)
256 else:
257 DEFAULT = "NOASSERTION"
258 package_info['PackageCopyrightText'] = "<text>" + DEFAULT + "</text>"
259 package_info['PackageLicenseDeclared'] = DEFAULT
260 package_info['PackageLicenseConcluded'] = DEFAULT
261 package_info['PackageLicenseInfoFromFiles'] = []
222 262
263 # File info
223 file_info = {} 264 file_info = {}
265 records = []
266 # FileName is also in PackageFileName, so we match on FileType as well.
267 records = re.findall('FileName:.*?FileType:.*?</text>', foss_output, re.S)
268
224 for rec in records: 269 for rec in records:
225 rec = string.replace( rec, '\r', '' )
226 chksum = re.findall( 'FileChecksum: SHA1: (.*)\n', rec)[0] 270 chksum = re.findall( 'FileChecksum: SHA1: (.*)\n', rec)[0]
227 file_info[chksum] = {} 271 file_info[chksum] = {}
228 file_info[chksum]['FileCopyrightText'] = re.findall( 'FileCopyrightText: ' 272 file_info[chksum]['FileCopyrightText'] = re.findall( 'FileCopyrightText: '
229 + '(.*?</text>)', rec, re.S )[0] 273 + '(.*?</text>)', rec, re.S )[0]
230 fields = ['FileType','LicenseConcluded', 274 fields = ['FileName', 'FileType', 'LicenseConcluded', 'LicenseInfoInFile']
231 'LicenseInfoInFile','FileName']
232 for field in fields: 275 for field in fields:
233 file_info[chksum][field] = re.findall(field + ': (.*)', rec)[0] 276 file_info[chksum][field] = re.findall(field + ': (.*)', rec)[0]
234 277
235 return file_info 278 # Licenses
279 license_info = {}
280 licenses = []
281 licenses = re.findall('LicenseID:.*?LicenseName:.*?\n', foss_output, re.S)
282 for lic in licenses:
283 license_id = re.findall('LicenseID: (.*)\n', lic)[0]
284 license_info[license_id] = {}
285 license_info[license_id]['ExtractedText'] = re.findall('ExtractedText: (.*?</text>)',lic, re.S)[0]
286 license_info[license_id]['LicenseName'] = re.findall('LicenseName: (.*)', lic)[0]
287
288 return (package_info, file_info, license_info)
236 289
237def create_spdx_doc( file_info, scanned_files ): 290def create_spdx_doc( file_info, scanned_files ):
238 import json 291 import json
@@ -259,12 +312,14 @@ def get_ver_code( dirname ):
259 except OSError as e: 312 except OSError as e:
260 bb.warn( "Stat failed" + str(e) + "\n") 313 bb.warn( "Stat failed" + str(e) + "\n")
261 continue 314 continue
262 chksums.append(hash_file(os.path.join(dirname,f_dir,f))) 315 hash = hash_file(os.path.join(dirname,f_dir,f))
316 if not hash is None:
317 chksums.append(hash)
263 ver_code_string = ''.join( chksums ).lower() 318 ver_code_string = ''.join( chksums ).lower()
264 ver_code = hash_string( ver_code_string ) 319 ver_code = hash_string( ver_code_string )
265 return ver_code 320 return ver_code
266 321
267def get_header_info( info, spdx_verification_code, spdx_files ): 322def get_header_info( info, spdx_verification_code, package_info):
268 """ 323 """
269 Put together the header SPDX information. 324 Put together the header SPDX information.
270 Eventually this needs to become a lot less 325 Eventually this needs to become a lot less
@@ -290,9 +345,9 @@ def get_header_info( info, spdx_verification_code, spdx_files ):
290 head.append("") 345 head.append("")
291 346
292 ## Creator information 347 ## Creator information
293 now = datetime.now().strftime('%Y-%m-%dT%H:%M:%S') 348 now = datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ')
294 head.append("## Creation Information") 349 head.append("## Creation Information")
295 head.append("Creator: fossology-spdx") 350 head.append("Creator: Tool: fossology-spdx")
296 head.append("Created: " + now) 351 head.append("Created: " + now)
297 head.append("CreatorComment: <text>UNO</text>") 352 head.append("CreatorComment: <text>UNO</text>")
298 head.append("") 353 head.append("")
@@ -301,21 +356,22 @@ def get_header_info( info, spdx_verification_code, spdx_files ):
301 head.append("## Package Information") 356 head.append("## Package Information")
302 head.append("PackageName: " + info['pn']) 357 head.append("PackageName: " + info['pn'])
303 head.append("PackageVersion: " + info['pv']) 358 head.append("PackageVersion: " + info['pv'])
304 head.append("PackageDownloadLocation: " + DEFAULT)
305 head.append("PackageSummary: <text></text>")
306 head.append("PackageFileName: " + os.path.basename(info['tar_file'])) 359 head.append("PackageFileName: " + os.path.basename(info['tar_file']))
307 head.append("PackageSupplier: Person:" + DEFAULT) 360 head.append("PackageSupplier: Person:" + DEFAULT)
361 head.append("PackageDownloadLocation: " + DEFAULT)
362 head.append("PackageSummary: <text></text>")
308 head.append("PackageOriginator: Person:" + DEFAULT) 363 head.append("PackageOriginator: Person:" + DEFAULT)
309 head.append("PackageChecksum: SHA1: " + package_checksum) 364 head.append("PackageChecksum: SHA1: " + package_checksum)
310 head.append("PackageVerificationCode: " + spdx_verification_code) 365 head.append("PackageVerificationCode: " + spdx_verification_code)
311 head.append("PackageDescription: <text>" + info['pn'] 366 head.append("PackageDescription: <text>" + info['pn']
312 + " version " + info['pv'] + "</text>") 367 + " version " + info['pv'] + "</text>")
313 head.append("") 368 head.append("")
314 head.append("PackageCopyrightText: <text>" + DEFAULT + "</text>") 369 head.append("PackageCopyrightText: " + package_info['PackageCopyrightText'])
315 head.append("") 370 head.append("")
316 head.append("PackageLicenseDeclared: " + DEFAULT) 371 head.append("PackageLicenseDeclared: " + package_info['PackageLicenseDeclared'])
317 head.append("PackageLicenseConcluded: " + DEFAULT) 372 head.append("PackageLicenseConcluded: " + package_info['PackageLicenseConcluded'])
318 head.append("PackageLicenseInfoFromFiles: " + DEFAULT) 373 for licref in package_info['PackageLicenseInfoFromFiles']:
374 head.append("PackageLicenseInfoFromFiles: " + licref)
319 head.append("") 375 head.append("")
320 376
321 ## header for file level 377 ## header for file level
diff --git a/meta/conf/licenses.conf b/meta/conf/licenses.conf
index fe96066e4e..629916b6a5 100644
--- a/meta/conf/licenses.conf
+++ b/meta/conf/licenses.conf
@@ -143,7 +143,7 @@ DATA_LICENSE = "CC0-1.0"
143# information. 143# information.
144# 144#
145 145
146FOSS_COPYRIGHT = "true" 146FOSS_NO_COPYRIGHT = "true"
147 147
148# A option defined as[FOSS_RECURSIVE_UNPACK] in ./meta/conf/licenses.conf. is 148# A option defined as[FOSS_RECURSIVE_UNPACK] in ./meta/conf/licenses.conf. is
149# used to control if FOSSology server need recursively unpack tar.gz file which 149# used to control if FOSSology server need recursively unpack tar.gz file which
@@ -159,12 +159,30 @@ FOSS_COPYRIGHT = "true"
159 159
160FOSS_RECURSIVE_UNPACK = "false" 160FOSS_RECURSIVE_UNPACK = "false"
161 161
162# FOSSologySPDX instance server. 162# An option defined as [FOSS_FULL_SPDX] in ./meta/conf/licenses.conf is used to
163# control what kind of SPDX output to get from the FOSSology server.
164#
165# FOSS_FULL_SPDX = "true":
166# Tell FOSSology server to return full SPDX output, like if the program was
167# run from the command line. This is needed in order to get license refs for
168# the full package rather than individual files only.
169#
170# FOSS_FULL_SPDX = "false":
171# Tell FOSSology to only process license information for files. All package
172# license tags in the report will be "NOASSERTION"
173#
174
175FOSS_FULL_SPDX = "true"
176
177# FOSSologySPDX instance server. http://localhost/repo is the default
178# installation location for FOSSology.
179#
163# For more information on FOSSologySPDX commandline: 180# For more information on FOSSologySPDX commandline:
164# https://github.com/spdx-tools/fossology-spdx/wiki/Fossology-SPDX-Web-API 181# https://github.com/spdx-tools/fossology-spdx/wiki/Fossology-SPDX-Web-API
165# 182#
166 183
167FOSS_SERVER = "http://localhost//?mod=spdx_license_once&noCopyright=${FOSS_COPYRIGHT}&recursiveUnpack=${FOSS_RECURSIVE_UNPACK}" 184FOSS_BASE_URL = "http://localhost/repo/?mod=spdx_license_once"
185FOSS_SERVER = "${FOSS_BASE_URL}&fullSPDXFlag=${FOSS_FULL_SPDX}&noCopyright=${FOSS_NO_COPYRIGHT}&recursiveUnpack=${FOSS_RECURSIVE_UNPACK}"
168 186
169FOSS_WGET_FLAGS = "-qO - --no-check-certificate --timeout=0" 187FOSS_WGET_FLAGS = "-qO - --no-check-certificate --timeout=0"
170 188