diff options
author | Tobias Olausson <tobias.olausson@pelagicore.com> | 2014-10-20 16:09:15 +0200 |
---|---|---|
committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2014-10-24 17:36:15 +0100 |
commit | f348071efd9419de3fa7f4e4ad78dfa5f8445412 (patch) | |
tree | 7210e42b132f8cfd8df9a89dde184629a7b3fb81 /meta/classes/spdx.bbclass | |
parent | 8882eaaf97821fd96a72dc1382517689912d357b (diff) | |
download | poky-f348071efd9419de3fa7f4e4ad78dfa5f8445412.tar.gz |
spdx.bbclass: improved stability, fixed SPDX compliance issues. Changes are reflected in licenses.conf.
The previous version could crash on dead links in the rootfs, or if the manifest directory did not
exist. The generated files were also not compliant with the SPDX specification, for example file
entries did not always start with the FileName tag, time stamps were incorrectly formatted etc.
Stability issues are addressed by added checks, originally written by Johan Thelin
<johan.thelin@pelagicore.com>, who never upstreamed them. I've also added an option for getting full
SPDX output from FOSSology, i.e. not only for all files, but for the package as well, including
license references. License refs are required in order to process the output by SPDXTools. For that
reason, this option defaults to true.
(From OE-Core rev: 5d3a4f4f57e4d8581fd88a14324f94e93104a690)
Signed-off-by: Tobias Olausson <tobias.olausson@pelagicore.com>
Signed-off-by: Ross Burton <ross.burton@intel.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'meta/classes/spdx.bbclass')
-rw-r--r-- | meta/classes/spdx.bbclass | 154 |
1 files changed, 105 insertions, 49 deletions
diff --git a/meta/classes/spdx.bbclass b/meta/classes/spdx.bbclass index bccc230d8c..c0050f394d 100644 --- a/meta/classes/spdx.bbclass +++ b/meta/classes/spdx.bbclass | |||
@@ -43,6 +43,9 @@ python do_spdx () { | |||
43 | info['spdx_temp_dir'] = (d.getVar('SPDX_TEMP_DIR', True) or "") | 43 | info['spdx_temp_dir'] = (d.getVar('SPDX_TEMP_DIR', True) or "") |
44 | info['tar_file'] = os.path.join( info['workdir'], info['pn'] + ".tar.gz" ) | 44 | info['tar_file'] = os.path.join( info['workdir'], info['pn'] + ".tar.gz" ) |
45 | 45 | ||
46 | # Make sure manifest dir exists | ||
47 | if not os.path.exists( manifest_dir ): | ||
48 | bb.utils.mkdirhier( manifest_dir ) | ||
46 | 49 | ||
47 | ## get everything from cache. use it to decide if | 50 | ## get everything from cache. use it to decide if |
48 | ## something needs to be rerun | 51 | ## something needs to be rerun |
@@ -67,24 +70,27 @@ python do_spdx () { | |||
67 | 70 | ||
68 | if cache_cur: | 71 | if cache_cur: |
69 | spdx_file_info = cached_spdx['Files'] | 72 | spdx_file_info = cached_spdx['Files'] |
73 | foss_package_info = cached_spdx['Package'] | ||
74 | foss_license_info = cached_spdx['Licenses'] | ||
70 | else: | 75 | else: |
71 | ## setup fossology command | 76 | ## setup fossology command |
72 | foss_server = (d.getVar('FOSS_SERVER', True) or "") | 77 | foss_server = (d.getVar('FOSS_SERVER', True) or "") |
73 | foss_flags = (d.getVar('FOSS_WGET_FLAGS', True) or "") | 78 | foss_flags = (d.getVar('FOSS_WGET_FLAGS', True) or "") |
79 | foss_full_spdx = (d.getVar('FOSS_FULL_SPDX', True) == "true" or false) | ||
74 | foss_command = "wget %s --post-file=%s %s"\ | 80 | foss_command = "wget %s --post-file=%s %s"\ |
75 | % (foss_flags,info['tar_file'],foss_server) | 81 | % (foss_flags,info['tar_file'],foss_server) |
76 | 82 | ||
77 | #bb.warn(info['pn'] + json.dumps(local_file_info)) | 83 | (foss_package_info, foss_file_info, foss_license_info) = run_fossology( foss_command, foss_full_spdx ) |
78 | foss_file_info = run_fossology( foss_command ) | ||
79 | spdx_file_info = create_spdx_doc( local_file_info, foss_file_info ) | 84 | spdx_file_info = create_spdx_doc( local_file_info, foss_file_info ) |
80 | ## write to cache | 85 | ## write to cache |
81 | write_cached_spdx(sstatefile,cur_ver_code,spdx_file_info) | 86 | write_cached_spdx(sstatefile, cur_ver_code, foss_package_info, |
87 | spdx_file_info, foss_license_info) | ||
82 | 88 | ||
83 | ## Get document and package level information | 89 | ## Get document and package level information |
84 | spdx_header_info = get_header_info(info, cur_ver_code, spdx_file_info) | 90 | spdx_header_info = get_header_info(info, cur_ver_code, foss_package_info) |
85 | 91 | ||
86 | ## CREATE MANIFEST | 92 | ## CREATE MANIFEST |
87 | create_manifest(info,spdx_header_info,spdx_file_info) | 93 | create_manifest(info,spdx_header_info,spdx_file_info, foss_license_info) |
88 | 94 | ||
89 | ## clean up the temp stuff | 95 | ## clean up the temp stuff |
90 | remove_dir_tree( info['spdx_temp_dir'] ) | 96 | remove_dir_tree( info['spdx_temp_dir'] ) |
@@ -93,32 +99,50 @@ python do_spdx () { | |||
93 | } | 99 | } |
94 | addtask spdx after do_patch before do_configure | 100 | addtask spdx after do_patch before do_configure |
95 | 101 | ||
96 | def create_manifest(info,header,files): | 102 | def create_manifest(info, header, files, licenses): |
97 | with open(info['outfile'], 'w') as f: | 103 | import codecs |
104 | with codecs.open(info['outfile'], mode='w', encoding='utf-8') as f: | ||
105 | # Write header | ||
98 | f.write(header + '\n') | 106 | f.write(header + '\n') |
107 | |||
108 | # Write file data | ||
99 | for chksum, block in files.iteritems(): | 109 | for chksum, block in files.iteritems(): |
110 | f.write("FileName: " + block['FileName'] + '\n') | ||
100 | for key, value in block.iteritems(): | 111 | for key, value in block.iteritems(): |
101 | f.write(key + ": " + value) | 112 | if not key == 'FileName': |
102 | f.write('\n') | 113 | f.write(key + ": " + value + '\n') |
114 | f.write('\n') | ||
115 | |||
116 | # Write license data | ||
117 | for id, block in licenses.iteritems(): | ||
118 | f.write("LicenseID: " + id + '\n') | ||
119 | for key, value in block.iteritems(): | ||
120 | f.write(key + ": " + value + '\n') | ||
103 | f.write('\n') | 121 | f.write('\n') |
104 | 122 | ||
105 | def get_cached_spdx( sstatefile ): | 123 | def get_cached_spdx( sstatefile ): |
106 | import json | 124 | import json |
125 | import codecs | ||
107 | cached_spdx_info = {} | 126 | cached_spdx_info = {} |
108 | with open( sstatefile, 'r' ) as f: | 127 | with codecs.open( sstatefile, mode='r', encoding='utf-8' ) as f: |
109 | try: | 128 | try: |
110 | cached_spdx_info = json.load(f) | 129 | cached_spdx_info = json.load(f) |
111 | except ValueError as e: | 130 | except ValueError as e: |
112 | cached_spdx_info = None | 131 | cached_spdx_info = None |
113 | return cached_spdx_info | 132 | return cached_spdx_info |
114 | 133 | ||
115 | def write_cached_spdx( sstatefile, ver_code, files ): | 134 | def write_cached_spdx( sstatefile, ver_code, package_info, files, license_info): |
116 | import json | 135 | import json |
136 | import codecs | ||
117 | spdx_doc = {} | 137 | spdx_doc = {} |
118 | spdx_doc['PackageVerificationCode'] = ver_code | 138 | spdx_doc['PackageVerificationCode'] = ver_code |
119 | spdx_doc['Files'] = {} | 139 | spdx_doc['Files'] = {} |
120 | spdx_doc['Files'] = files | 140 | spdx_doc['Files'] = files |
121 | with open( sstatefile, 'w' ) as f: | 141 | spdx_doc['Package'] = {} |
142 | spdx_doc['Package'] = package_info | ||
143 | spdx_doc['Licenses'] = {} | ||
144 | spdx_doc['Licenses'] = license_info | ||
145 | with codecs.open( sstatefile, mode='w', encoding='utf-8' ) as f: | ||
122 | f.write(json.dumps(spdx_doc)) | 146 | f.write(json.dumps(spdx_doc)) |
123 | 147 | ||
124 | def setup_foss_scan( info, cache, cached_files ): | 148 | def setup_foss_scan( info, cache, cached_files ): |
@@ -139,7 +163,8 @@ def setup_foss_scan( info, cache, cached_files ): | |||
139 | continue | 163 | continue |
140 | 164 | ||
141 | checksum = hash_file( abs_path ) | 165 | checksum = hash_file( abs_path ) |
142 | mtime = time.asctime(time.localtime(stats.st_mtime)) | 166 | if not checksum is None: |
167 | mtime = time.asctime(time.localtime(stats.st_mtime)) | ||
143 | 168 | ||
144 | ## retain cache information if it exists | 169 | ## retain cache information if it exists |
145 | file_info[checksum] = {} | 170 | file_info[checksum] = {} |
@@ -147,27 +172,25 @@ def setup_foss_scan( info, cache, cached_files ): | |||
147 | file_info[checksum] = cached_files[checksum] | 172 | file_info[checksum] = cached_files[checksum] |
148 | else: | 173 | else: |
149 | file_info[checksum]['FileName'] = full_path | 174 | file_info[checksum]['FileName'] = full_path |
150 | |||
151 | try: | ||
152 | os.makedirs( dest_dir ) | ||
153 | except OSError as e: | ||
154 | if e.errno == errno.EEXIST and os.path.isdir(dest_dir): | ||
155 | pass | ||
156 | else: | ||
157 | bb.warn( "mkdir failed " + str(e) + "\n" ) | ||
158 | continue | ||
159 | |||
160 | if(cache and checksum not in cached_files) or not cache: | ||
161 | try: | 175 | try: |
162 | shutil.copyfile( abs_path, dest_path ) | 176 | os.makedirs(dest_dir) |
163 | except shutil.Error as e: | 177 | except OSError as e: |
164 | bb.warn( str(e) + "\n" ) | 178 | if e.errno == errno.EEXIST and os.path.isdir(dest_dir): |
165 | except IOError as e: | 179 | pass |
166 | bb.warn( str(e) + "\n" ) | 180 | else: |
181 | bb.warn( "mkdir failed " + str(e) + "\n" ) | ||
182 | continue | ||
183 | |||
184 | if (cache and checksum not in cached_files) or not cache: | ||
185 | try: | ||
186 | shutil.copyfile( abs_path, dest_path ) | ||
187 | except shutil.Error as e: | ||
188 | bb.warn( str(e) + "\n" ) | ||
189 | except IOError as e: | ||
190 | bb.warn( str(e) + "\n" ) | ||
167 | 191 | ||
168 | with tarfile.open( info['tar_file'], "w:gz" ) as tar: | 192 | with tarfile.open( info['tar_file'], "w:gz" ) as tar: |
169 | tar.add( info['spdx_temp_dir'], arcname=os.path.basename(info['spdx_temp_dir']) ) | 193 | tar.add( info['spdx_temp_dir'], arcname=os.path.basename(info['spdx_temp_dir']) ) |
170 | tar.close() | ||
171 | 194 | ||
172 | return file_info | 195 | return file_info |
173 | 196 | ||
@@ -193,13 +216,15 @@ def list_files( dir ): | |||
193 | return | 216 | return |
194 | 217 | ||
195 | def hash_file( file_name ): | 218 | def hash_file( file_name ): |
219 | f = None | ||
196 | try: | 220 | try: |
197 | f = open( file_name, 'rb' ) | 221 | f = open( file_name, 'rb' ) |
198 | data_string = f.read() | 222 | data_string = f.read() |
199 | except: | 223 | except: |
200 | return None | 224 | return None |
201 | finally: | 225 | finally: |
202 | f.close() | 226 | if not f is None: |
227 | f.close() | ||
203 | sha1 = hash_string( data_string ) | 228 | sha1 = hash_string( data_string ) |
204 | return sha1 | 229 | return sha1 |
205 | 230 | ||
@@ -209,30 +234,58 @@ def hash_string( data ): | |||
209 | sha1.update( data ) | 234 | sha1.update( data ) |
210 | return sha1.hexdigest() | 235 | return sha1.hexdigest() |
211 | 236 | ||
212 | def run_fossology( foss_command ): | 237 | def run_fossology( foss_command, full_spdx ): |
213 | import string, re | 238 | import string, re |
214 | import subprocess | 239 | import subprocess |
215 | 240 | ||
216 | p = subprocess.Popen(foss_command.split(), | 241 | p = subprocess.Popen(foss_command.split(), |
217 | stdout=subprocess.PIPE, stderr=subprocess.PIPE) | 242 | stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
218 | foss_output, foss_error = p.communicate() | 243 | foss_output, foss_error = p.communicate() |
219 | 244 | foss_output = unicode(foss_output, "utf-8") | |
220 | records = [] | 245 | foss_output = string.replace(foss_output, '\r', '') |
221 | records = re.findall('FileName:.*?</text>', foss_output, re.S) | 246 | |
247 | # Package info | ||
248 | package_info = {} | ||
249 | if full_spdx: | ||
250 | # All mandatory, only one occurance | ||
251 | package_info['PackageCopyrightText'] = re.findall('PackageCopyrightText: (.*?</text>)', foss_output, re.S)[0] | ||
252 | package_info['PackageLicenseDeclared'] = re.findall('PackageLicenseDeclared: (.*)', foss_output)[0] | ||
253 | package_info['PackageLicenseConcluded'] = re.findall('PackageLicenseConcluded: (.*)', foss_output)[0] | ||
254 | # These may be more than one | ||
255 | package_info['PackageLicenseInfoFromFiles'] = re.findall('PackageLicenseInfoFromFiles: (.*)', foss_output) | ||
256 | else: | ||
257 | DEFAULT = "NOASSERTION" | ||
258 | package_info['PackageCopyrightText'] = "<text>" + DEFAULT + "</text>" | ||
259 | package_info['PackageLicenseDeclared'] = DEFAULT | ||
260 | package_info['PackageLicenseConcluded'] = DEFAULT | ||
261 | package_info['PackageLicenseInfoFromFiles'] = [] | ||
222 | 262 | ||
263 | # File info | ||
223 | file_info = {} | 264 | file_info = {} |
265 | records = [] | ||
266 | # FileName is also in PackageFileName, so we match on FileType as well. | ||
267 | records = re.findall('FileName:.*?FileType:.*?</text>', foss_output, re.S) | ||
268 | |||
224 | for rec in records: | 269 | for rec in records: |
225 | rec = string.replace( rec, '\r', '' ) | ||
226 | chksum = re.findall( 'FileChecksum: SHA1: (.*)\n', rec)[0] | 270 | chksum = re.findall( 'FileChecksum: SHA1: (.*)\n', rec)[0] |
227 | file_info[chksum] = {} | 271 | file_info[chksum] = {} |
228 | file_info[chksum]['FileCopyrightText'] = re.findall( 'FileCopyrightText: ' | 272 | file_info[chksum]['FileCopyrightText'] = re.findall( 'FileCopyrightText: ' |
229 | + '(.*?</text>)', rec, re.S )[0] | 273 | + '(.*?</text>)', rec, re.S )[0] |
230 | fields = ['FileType','LicenseConcluded', | 274 | fields = ['FileName', 'FileType', 'LicenseConcluded', 'LicenseInfoInFile'] |
231 | 'LicenseInfoInFile','FileName'] | ||
232 | for field in fields: | 275 | for field in fields: |
233 | file_info[chksum][field] = re.findall(field + ': (.*)', rec)[0] | 276 | file_info[chksum][field] = re.findall(field + ': (.*)', rec)[0] |
234 | 277 | ||
235 | return file_info | 278 | # Licenses |
279 | license_info = {} | ||
280 | licenses = [] | ||
281 | licenses = re.findall('LicenseID:.*?LicenseName:.*?\n', foss_output, re.S) | ||
282 | for lic in licenses: | ||
283 | license_id = re.findall('LicenseID: (.*)\n', lic)[0] | ||
284 | license_info[license_id] = {} | ||
285 | license_info[license_id]['ExtractedText'] = re.findall('ExtractedText: (.*?</text>)',lic, re.S)[0] | ||
286 | license_info[license_id]['LicenseName'] = re.findall('LicenseName: (.*)', lic)[0] | ||
287 | |||
288 | return (package_info, file_info, license_info) | ||
236 | 289 | ||
237 | def create_spdx_doc( file_info, scanned_files ): | 290 | def create_spdx_doc( file_info, scanned_files ): |
238 | import json | 291 | import json |
@@ -259,12 +312,14 @@ def get_ver_code( dirname ): | |||
259 | except OSError as e: | 312 | except OSError as e: |
260 | bb.warn( "Stat failed" + str(e) + "\n") | 313 | bb.warn( "Stat failed" + str(e) + "\n") |
261 | continue | 314 | continue |
262 | chksums.append(hash_file(os.path.join(dirname,f_dir,f))) | 315 | hash = hash_file(os.path.join(dirname,f_dir,f)) |
316 | if not hash is None: | ||
317 | chksums.append(hash) | ||
263 | ver_code_string = ''.join( chksums ).lower() | 318 | ver_code_string = ''.join( chksums ).lower() |
264 | ver_code = hash_string( ver_code_string ) | 319 | ver_code = hash_string( ver_code_string ) |
265 | return ver_code | 320 | return ver_code |
266 | 321 | ||
267 | def get_header_info( info, spdx_verification_code, spdx_files ): | 322 | def get_header_info( info, spdx_verification_code, package_info): |
268 | """ | 323 | """ |
269 | Put together the header SPDX information. | 324 | Put together the header SPDX information. |
270 | Eventually this needs to become a lot less | 325 | Eventually this needs to become a lot less |
@@ -290,9 +345,9 @@ def get_header_info( info, spdx_verification_code, spdx_files ): | |||
290 | head.append("") | 345 | head.append("") |
291 | 346 | ||
292 | ## Creator information | 347 | ## Creator information |
293 | now = datetime.now().strftime('%Y-%m-%dT%H:%M:%S') | 348 | now = datetime.now().strftime('%Y-%m-%dT%H:%M:%SZ') |
294 | head.append("## Creation Information") | 349 | head.append("## Creation Information") |
295 | head.append("Creator: fossology-spdx") | 350 | head.append("Creator: Tool: fossology-spdx") |
296 | head.append("Created: " + now) | 351 | head.append("Created: " + now) |
297 | head.append("CreatorComment: <text>UNO</text>") | 352 | head.append("CreatorComment: <text>UNO</text>") |
298 | head.append("") | 353 | head.append("") |
@@ -301,21 +356,22 @@ def get_header_info( info, spdx_verification_code, spdx_files ): | |||
301 | head.append("## Package Information") | 356 | head.append("## Package Information") |
302 | head.append("PackageName: " + info['pn']) | 357 | head.append("PackageName: " + info['pn']) |
303 | head.append("PackageVersion: " + info['pv']) | 358 | head.append("PackageVersion: " + info['pv']) |
304 | head.append("PackageDownloadLocation: " + DEFAULT) | ||
305 | head.append("PackageSummary: <text></text>") | ||
306 | head.append("PackageFileName: " + os.path.basename(info['tar_file'])) | 359 | head.append("PackageFileName: " + os.path.basename(info['tar_file'])) |
307 | head.append("PackageSupplier: Person:" + DEFAULT) | 360 | head.append("PackageSupplier: Person:" + DEFAULT) |
361 | head.append("PackageDownloadLocation: " + DEFAULT) | ||
362 | head.append("PackageSummary: <text></text>") | ||
308 | head.append("PackageOriginator: Person:" + DEFAULT) | 363 | head.append("PackageOriginator: Person:" + DEFAULT) |
309 | head.append("PackageChecksum: SHA1: " + package_checksum) | 364 | head.append("PackageChecksum: SHA1: " + package_checksum) |
310 | head.append("PackageVerificationCode: " + spdx_verification_code) | 365 | head.append("PackageVerificationCode: " + spdx_verification_code) |
311 | head.append("PackageDescription: <text>" + info['pn'] | 366 | head.append("PackageDescription: <text>" + info['pn'] |
312 | + " version " + info['pv'] + "</text>") | 367 | + " version " + info['pv'] + "</text>") |
313 | head.append("") | 368 | head.append("") |
314 | head.append("PackageCopyrightText: <text>" + DEFAULT + "</text>") | 369 | head.append("PackageCopyrightText: " + package_info['PackageCopyrightText']) |
315 | head.append("") | 370 | head.append("") |
316 | head.append("PackageLicenseDeclared: " + DEFAULT) | 371 | head.append("PackageLicenseDeclared: " + package_info['PackageLicenseDeclared']) |
317 | head.append("PackageLicenseConcluded: " + DEFAULT) | 372 | head.append("PackageLicenseConcluded: " + package_info['PackageLicenseConcluded']) |
318 | head.append("PackageLicenseInfoFromFiles: " + DEFAULT) | 373 | for licref in package_info['PackageLicenseInfoFromFiles']: |
374 | head.append("PackageLicenseInfoFromFiles: " + licref) | ||
319 | head.append("") | 375 | head.append("") |
320 | 376 | ||
321 | ## header for file level | 377 | ## header for file level |