summaryrefslogtreecommitdiffstats
path: root/meta/lib/oe/spdx30_tasks.py
diff options
context:
space:
mode:
Diffstat (limited to 'meta/lib/oe/spdx30_tasks.py')
-rw-r--r--meta/lib/oe/spdx30_tasks.py1367
1 files changed, 1367 insertions, 0 deletions
diff --git a/meta/lib/oe/spdx30_tasks.py b/meta/lib/oe/spdx30_tasks.py
new file mode 100644
index 0000000000..c352dab152
--- /dev/null
+++ b/meta/lib/oe/spdx30_tasks.py
@@ -0,0 +1,1367 @@
1#
2# Copyright OpenEmbedded Contributors
3#
4# SPDX-License-Identifier: GPL-2.0-only
5#
6
7import json
8import oe.cve_check
9import oe.packagedata
10import oe.patch
11import oe.sbom30
12import oe.spdx30
13import oe.spdx_common
14import oe.sdk
15import os
16
17from contextlib import contextmanager
18from datetime import datetime, timezone
19from pathlib import Path
20
21
22def walk_error(err):
23 bb.error(f"ERROR walking {err.filename}: {err}")
24
25
26def set_timestamp_now(d, o, prop):
27 if d.getVar("SPDX_INCLUDE_TIMESTAMPS") == "1":
28 setattr(o, prop, datetime.now(timezone.utc))
29 else:
30 # Doing this helps to validated that the property actually exists, and
31 # also that it is not mandatory
32 delattr(o, prop)
33
34
35def add_license_expression(d, objset, license_expression, license_data):
36 simple_license_text = {}
37 license_text_map = {}
38 license_ref_idx = 0
39
40 def add_license_text(name):
41 nonlocal objset
42 nonlocal simple_license_text
43
44 if name in simple_license_text:
45 return simple_license_text[name]
46
47 lic = objset.find_filter(
48 oe.spdx30.simplelicensing_SimpleLicensingText,
49 name=name,
50 )
51
52 if lic is not None:
53 simple_license_text[name] = lic
54 return lic
55
56 lic = objset.add(
57 oe.spdx30.simplelicensing_SimpleLicensingText(
58 _id=objset.new_spdxid("license-text", name),
59 creationInfo=objset.doc.creationInfo,
60 name=name,
61 )
62 )
63 objset.set_element_alias(lic)
64 simple_license_text[name] = lic
65
66 if name == "PD":
67 lic.simplelicensing_licenseText = "Software released to the public domain"
68 return lic
69
70 # Seach for the license in COMMON_LICENSE_DIR and LICENSE_PATH
71 for directory in [d.getVar("COMMON_LICENSE_DIR")] + (
72 d.getVar("LICENSE_PATH") or ""
73 ).split():
74 try:
75 with (Path(directory) / name).open(errors="replace") as f:
76 lic.simplelicensing_licenseText = f.read()
77 return lic
78
79 except FileNotFoundError:
80 pass
81
82 # If it's not SPDX or PD, then NO_GENERIC_LICENSE must be set
83 filename = d.getVarFlag("NO_GENERIC_LICENSE", name)
84 if filename:
85 filename = d.expand("${S}/" + filename)
86 with open(filename, errors="replace") as f:
87 lic.simplelicensing_licenseText = f.read()
88 return lic
89 else:
90 bb.fatal("Cannot find any text for license %s" % name)
91
92 def convert(l):
93 nonlocal license_text_map
94 nonlocal license_ref_idx
95
96 if l == "(" or l == ")":
97 return l
98
99 if l == "&":
100 return "AND"
101
102 if l == "|":
103 return "OR"
104
105 if l == "CLOSED":
106 return "NONE"
107
108 spdx_license = d.getVarFlag("SPDXLICENSEMAP", l) or l
109 if spdx_license in license_data["licenses"]:
110 return spdx_license
111
112 spdx_license = "LicenseRef-" + l
113 if spdx_license not in license_text_map:
114 license_text_map[spdx_license] = oe.sbom30.get_element_link_id(
115 add_license_text(l)
116 )
117
118 return spdx_license
119
120 lic_split = (
121 license_expression.replace("(", " ( ")
122 .replace(")", " ) ")
123 .replace("|", " | ")
124 .replace("&", " & ")
125 .split()
126 )
127 spdx_license_expression = " ".join(convert(l) for l in lic_split)
128
129 o = objset.new_license_expression(
130 spdx_license_expression, license_data, license_text_map
131 )
132 objset.set_element_alias(o)
133 return o
134
135
136def add_package_files(
137 d,
138 objset,
139 topdir,
140 get_spdxid,
141 get_purposes,
142 license_data=None,
143 *,
144 archive=None,
145 ignore_dirs=[],
146 ignore_top_level_dirs=[],
147):
148 source_date_epoch = d.getVar("SOURCE_DATE_EPOCH")
149 if source_date_epoch:
150 source_date_epoch = int(source_date_epoch)
151
152 spdx_files = set()
153
154 file_counter = 1
155 if not os.path.exists(topdir):
156 bb.note(f"Skip {topdir}")
157 return spdx_files
158
159 check_compiled_sources = d.getVar("SPDX_INCLUDE_COMPILED_SOURCES") == "1"
160 if check_compiled_sources:
161 compiled_sources, types = oe.spdx_common.get_compiled_sources(d)
162 bb.debug(1, f"Total compiled files: {len(compiled_sources)}")
163
164 for subdir, dirs, files in os.walk(topdir, onerror=walk_error):
165 dirs[:] = [d for d in dirs if d not in ignore_dirs]
166 if subdir == str(topdir):
167 dirs[:] = [d for d in dirs if d not in ignore_top_level_dirs]
168
169 dirs.sort()
170 files.sort()
171 for file in files:
172 filepath = Path(subdir) / file
173 if filepath.is_symlink() or not filepath.is_file():
174 continue
175
176 filename = str(filepath.relative_to(topdir))
177 file_purposes = get_purposes(filepath)
178
179 # Check if file is compiled
180 if check_compiled_sources:
181 if not oe.spdx_common.is_compiled_source(filename, compiled_sources, types):
182 continue
183
184 spdx_file = objset.new_file(
185 get_spdxid(file_counter),
186 filename,
187 filepath,
188 purposes=file_purposes,
189 )
190 spdx_files.add(spdx_file)
191
192 if (
193 oe.spdx30.software_SoftwarePurpose.source in file_purposes
194 and license_data is not None
195 ):
196 objset.scan_declared_licenses(spdx_file, filepath, license_data)
197
198 if archive is not None:
199 with filepath.open("rb") as f:
200 info = archive.gettarinfo(fileobj=f)
201 info.name = filename
202 info.uid = 0
203 info.gid = 0
204 info.uname = "root"
205 info.gname = "root"
206
207 if source_date_epoch is not None and info.mtime > source_date_epoch:
208 info.mtime = source_date_epoch
209
210 archive.addfile(info, f)
211
212 file_counter += 1
213
214 bb.debug(1, "Added %d files to %s" % (len(spdx_files), objset.doc._id))
215
216 return spdx_files
217
218
219def get_package_sources_from_debug(
220 d, package, package_files, sources, source_hash_cache
221):
222 def file_path_match(file_path, pkg_file):
223 if file_path.lstrip("/") == pkg_file.name.lstrip("/"):
224 return True
225
226 for e in pkg_file.extension:
227 if isinstance(e, oe.sbom30.OEFileNameAliasExtension):
228 for a in e.aliases:
229 if file_path.lstrip("/") == a.lstrip("/"):
230 return True
231
232 return False
233
234 debug_search_paths = [
235 Path(d.getVar("SPDXWORK")),
236 Path(d.getVar("PKGD")),
237 Path(d.getVar("STAGING_DIR_TARGET")),
238 Path(d.getVar("STAGING_DIR_NATIVE")),
239 Path(d.getVar("STAGING_KERNEL_DIR")),
240 ]
241
242 pkg_data = oe.packagedata.read_subpkgdata_extended(package, d)
243
244 if pkg_data is None:
245 return
246
247 dep_source_files = set()
248
249 for file_path, file_data in pkg_data["files_info"].items():
250 if not "debugsrc" in file_data:
251 continue
252
253 if not any(file_path_match(file_path, pkg_file) for pkg_file in package_files):
254 bb.fatal(
255 "No package file found for %s in %s; SPDX found: %s"
256 % (str(file_path), package, " ".join(p.name for p in package_files))
257 )
258 continue
259
260 for debugsrc in file_data["debugsrc"]:
261 for search in debug_search_paths:
262 if debugsrc.startswith("/usr/src/kernel"):
263 debugsrc_path = search / debugsrc.replace("/usr/src/kernel/", "")
264 else:
265 debugsrc_path = search / debugsrc.lstrip("/")
266
267 if debugsrc_path in source_hash_cache:
268 file_sha256 = source_hash_cache[debugsrc_path]
269 if file_sha256 is None:
270 continue
271 else:
272 # We can only hash files below, skip directories, links, etc.
273 if not debugsrc_path.is_file():
274 source_hash_cache[debugsrc_path] = None
275 continue
276
277 file_sha256 = bb.utils.sha256_file(debugsrc_path)
278 source_hash_cache[debugsrc_path] = file_sha256
279
280 if file_sha256 in sources:
281 source_file = sources[file_sha256]
282 dep_source_files.add(source_file)
283 else:
284 bb.debug(
285 1,
286 "Debug source %s with SHA256 %s not found in any dependency"
287 % (str(debugsrc_path), file_sha256),
288 )
289 break
290 else:
291 bb.debug(1, "Debug source %s not found" % debugsrc)
292
293 return dep_source_files
294
295
296def collect_dep_objsets(d, build):
297 deps = oe.spdx_common.get_spdx_deps(d)
298
299 dep_objsets = []
300 dep_builds = set()
301
302 dep_build_spdxids = set()
303 for dep in deps:
304 bb.debug(1, "Fetching SPDX for dependency %s" % (dep.pn))
305 dep_build, dep_objset = oe.sbom30.find_root_obj_in_jsonld(
306 d, "recipes", "recipe-" + dep.pn, oe.spdx30.build_Build
307 )
308 # If the dependency is part of the taskhash, return it to be linked
309 # against. Otherwise, it cannot be linked against because this recipe
310 # will not rebuilt if dependency changes
311 if dep.in_taskhash:
312 dep_objsets.append(dep_objset)
313
314 # The build _can_ be linked against (by alias)
315 dep_builds.add(dep_build)
316
317 return dep_objsets, dep_builds
318
319
320def index_sources_by_hash(sources, dest):
321 for s in sources:
322 if not isinstance(s, oe.spdx30.software_File):
323 continue
324
325 if s.software_primaryPurpose != oe.spdx30.software_SoftwarePurpose.source:
326 continue
327
328 for v in s.verifiedUsing:
329 if v.algorithm == oe.spdx30.HashAlgorithm.sha256:
330 if not v.hashValue in dest:
331 dest[v.hashValue] = s
332 break
333 else:
334 bb.fatal(f"No SHA256 found for {s.name}")
335
336
337def collect_dep_sources(dep_objsets, dest):
338 for objset in dep_objsets:
339 # Don't collect sources from native recipes as they
340 # match non-native sources also.
341 if objset.is_native():
342 continue
343
344 bb.debug(1, "Fetching Sources for dependency %s" % (objset.doc.name))
345
346 dep_build = objset.find_root(oe.spdx30.build_Build)
347 if not dep_build:
348 bb.fatal("Unable to find a build")
349
350 for e in objset.foreach_type(oe.spdx30.Relationship):
351 if dep_build is not e.from_:
352 continue
353
354 if e.relationshipType != oe.spdx30.RelationshipType.hasInput:
355 continue
356
357 index_sources_by_hash(e.to, dest)
358
359
360def add_download_files(d, objset):
361 inputs = set()
362
363 urls = d.getVar("SRC_URI").split()
364 fetch = bb.fetch2.Fetch(urls, d)
365
366 for download_idx, src_uri in enumerate(urls):
367 fd = fetch.ud[src_uri]
368
369 file_name = os.path.basename(fetch.localpath(src_uri))
370 if oe.patch.patch_path(src_uri, fetch, "", expand=False):
371 primary_purpose = oe.spdx30.software_SoftwarePurpose.patch
372 else:
373 primary_purpose = oe.spdx30.software_SoftwarePurpose.source
374
375 if fd.type == "file":
376 if os.path.isdir(fd.localpath):
377 walk_idx = 1
378 for root, dirs, files in os.walk(fd.localpath, onerror=walk_error):
379 dirs.sort()
380 files.sort()
381 for f in files:
382 f_path = os.path.join(root, f)
383 if os.path.islink(f_path):
384 # TODO: SPDX doesn't support symlinks yet
385 continue
386
387 file = objset.new_file(
388 objset.new_spdxid(
389 "source", str(download_idx + 1), str(walk_idx)
390 ),
391 os.path.join(
392 file_name, os.path.relpath(f_path, fd.localpath)
393 ),
394 f_path,
395 purposes=[primary_purpose],
396 )
397
398 inputs.add(file)
399 walk_idx += 1
400
401 else:
402 file = objset.new_file(
403 objset.new_spdxid("source", str(download_idx + 1)),
404 file_name,
405 fd.localpath,
406 purposes=[primary_purpose],
407 )
408 inputs.add(file)
409
410 else:
411 dl = objset.add(
412 oe.spdx30.software_Package(
413 _id=objset.new_spdxid("source", str(download_idx + 1)),
414 creationInfo=objset.doc.creationInfo,
415 name=file_name,
416 software_primaryPurpose=primary_purpose,
417 software_downloadLocation=oe.spdx_common.fetch_data_to_uri(
418 fd, fd.name
419 ),
420 )
421 )
422
423 if fd.method.supports_checksum(fd):
424 # TODO Need something better than hard coding this
425 for checksum_id in ["sha256", "sha1"]:
426 expected_checksum = getattr(
427 fd, "%s_expected" % checksum_id, None
428 )
429 if expected_checksum is None:
430 continue
431
432 dl.verifiedUsing.append(
433 oe.spdx30.Hash(
434 algorithm=getattr(oe.spdx30.HashAlgorithm, checksum_id),
435 hashValue=expected_checksum,
436 )
437 )
438
439 inputs.add(dl)
440
441 return inputs
442
443
444def set_purposes(d, element, *var_names, force_purposes=[]):
445 purposes = force_purposes[:]
446
447 for var_name in var_names:
448 val = d.getVar(var_name)
449 if val:
450 purposes.extend(val.split())
451 break
452
453 if not purposes:
454 bb.warn("No SPDX purposes found in %s" % " ".join(var_names))
455 return
456
457 element.software_primaryPurpose = getattr(
458 oe.spdx30.software_SoftwarePurpose, purposes[0]
459 )
460 element.software_additionalPurpose = [
461 getattr(oe.spdx30.software_SoftwarePurpose, p) for p in purposes[1:]
462 ]
463
464
465def create_spdx(d):
466 def set_var_field(var, obj, name, package=None):
467 val = None
468 if package:
469 val = d.getVar("%s:%s" % (var, package))
470
471 if not val:
472 val = d.getVar(var)
473
474 if val:
475 setattr(obj, name, val)
476
477 license_data = oe.spdx_common.load_spdx_license_data(d)
478
479 deploydir = Path(d.getVar("SPDXDEPLOY"))
480 deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
481 spdx_workdir = Path(d.getVar("SPDXWORK"))
482 include_sources = d.getVar("SPDX_INCLUDE_SOURCES") == "1"
483 pkg_arch = d.getVar("SSTATE_PKGARCH")
484 is_native = bb.data.inherits_class("native", d) or bb.data.inherits_class(
485 "cross", d
486 )
487 include_vex = d.getVar("SPDX_INCLUDE_VEX")
488 if not include_vex in ("none", "current", "all"):
489 bb.fatal("SPDX_INCLUDE_VEX must be one of 'none', 'current', 'all'")
490
491 build_objset = oe.sbom30.ObjectSet.new_objset(d, "recipe-" + d.getVar("PN"))
492
493 build = build_objset.new_task_build("recipe", "recipe")
494 build_objset.set_element_alias(build)
495
496 build_objset.doc.rootElement.append(build)
497
498 build_objset.set_is_native(is_native)
499
500 for var in (d.getVar("SPDX_CUSTOM_ANNOTATION_VARS") or "").split():
501 new_annotation(
502 d,
503 build_objset,
504 build,
505 "%s=%s" % (var, d.getVar(var)),
506 oe.spdx30.AnnotationType.other,
507 )
508
509 build_inputs = set()
510
511 # Add CVEs
512 cve_by_status = {}
513 if include_vex != "none":
514 patched_cves = oe.cve_check.get_patched_cves(d)
515 for cve, patched_cve in patched_cves.items():
516 decoded_status = {
517 "mapping": patched_cve["abbrev-status"],
518 "detail": patched_cve["status"],
519 "description": patched_cve.get("justification", None)
520 }
521
522 # If this CVE is fixed upstream, skip it unless all CVEs are
523 # specified.
524 if (
525 include_vex != "all"
526 and "detail" in decoded_status
527 and decoded_status["detail"]
528 in (
529 "fixed-version",
530 "cpe-stable-backport",
531 )
532 ):
533 bb.debug(1, "Skipping %s since it is already fixed upstream" % cve)
534 continue
535
536 spdx_cve = build_objset.new_cve_vuln(cve)
537 build_objset.set_element_alias(spdx_cve)
538
539 cve_by_status.setdefault(decoded_status["mapping"], {})[cve] = (
540 spdx_cve,
541 decoded_status["detail"],
542 decoded_status["description"],
543 )
544
545 cpe_ids = oe.cve_check.get_cpe_ids(d.getVar("CVE_PRODUCT"), d.getVar("CVE_VERSION"))
546
547 source_files = add_download_files(d, build_objset)
548 build_inputs |= source_files
549
550 recipe_spdx_license = add_license_expression(
551 d, build_objset, d.getVar("LICENSE"), license_data
552 )
553 build_objset.new_relationship(
554 source_files,
555 oe.spdx30.RelationshipType.hasDeclaredLicense,
556 [oe.sbom30.get_element_link_id(recipe_spdx_license)],
557 )
558
559 dep_sources = {}
560 if oe.spdx_common.process_sources(d) and include_sources:
561 bb.debug(1, "Adding source files to SPDX")
562 oe.spdx_common.get_patched_src(d)
563
564 files = add_package_files(
565 d,
566 build_objset,
567 spdx_workdir,
568 lambda file_counter: build_objset.new_spdxid(
569 "sourcefile", str(file_counter)
570 ),
571 lambda filepath: [oe.spdx30.software_SoftwarePurpose.source],
572 license_data,
573 ignore_dirs=[".git"],
574 ignore_top_level_dirs=["temp"],
575 archive=None,
576 )
577 build_inputs |= files
578 index_sources_by_hash(files, dep_sources)
579
580 dep_objsets, dep_builds = collect_dep_objsets(d, build)
581 if dep_builds:
582 build_objset.new_scoped_relationship(
583 [build],
584 oe.spdx30.RelationshipType.dependsOn,
585 oe.spdx30.LifecycleScopeType.build,
586 sorted(oe.sbom30.get_element_link_id(b) for b in dep_builds),
587 )
588
589 debug_source_ids = set()
590 source_hash_cache = {}
591
592 # Write out the package SPDX data now. It is not complete as we cannot
593 # write the runtime data, so write it to a staging area and a later task
594 # will write out the final collection
595
596 # TODO: Handle native recipe output
597 if not is_native:
598 bb.debug(1, "Collecting Dependency sources files")
599 collect_dep_sources(dep_objsets, dep_sources)
600
601 bb.build.exec_func("read_subpackage_metadata", d)
602
603 pkgdest = Path(d.getVar("PKGDEST"))
604 for package in d.getVar("PACKAGES").split():
605 if not oe.packagedata.packaged(package, d):
606 continue
607
608 pkg_name = d.getVar("PKG:%s" % package) or package
609
610 bb.debug(1, "Creating SPDX for package %s" % pkg_name)
611
612 pkg_objset = oe.sbom30.ObjectSet.new_objset(d, "package-" + pkg_name)
613
614 spdx_package = pkg_objset.add_root(
615 oe.spdx30.software_Package(
616 _id=pkg_objset.new_spdxid("package", pkg_name),
617 creationInfo=pkg_objset.doc.creationInfo,
618 name=pkg_name,
619 software_packageVersion=d.getVar("SPDX_PACKAGE_VERSION"),
620 )
621 )
622 set_timestamp_now(d, spdx_package, "builtTime")
623
624 set_purposes(
625 d,
626 spdx_package,
627 "SPDX_PACKAGE_ADDITIONAL_PURPOSE:%s" % package,
628 "SPDX_PACKAGE_ADDITIONAL_PURPOSE",
629 force_purposes=["install"],
630 )
631
632 supplier = build_objset.new_agent("SPDX_PACKAGE_SUPPLIER")
633 if supplier is not None:
634 spdx_package.suppliedBy = (
635 supplier if isinstance(supplier, str) else supplier._id
636 )
637
638 set_var_field(
639 "HOMEPAGE", spdx_package, "software_homePage", package=package
640 )
641 set_var_field("SUMMARY", spdx_package, "summary", package=package)
642 set_var_field("DESCRIPTION", spdx_package, "description", package=package)
643
644 if d.getVar("SPDX_PACKAGE_URL:%s" % package) or d.getVar("SPDX_PACKAGE_URL"):
645 set_var_field(
646 "SPDX_PACKAGE_URL",
647 spdx_package,
648 "software_packageUrl",
649 package=package
650 )
651
652 pkg_objset.new_scoped_relationship(
653 [oe.sbom30.get_element_link_id(build)],
654 oe.spdx30.RelationshipType.hasOutput,
655 oe.spdx30.LifecycleScopeType.build,
656 [spdx_package],
657 )
658
659 for cpe_id in cpe_ids:
660 spdx_package.externalIdentifier.append(
661 oe.spdx30.ExternalIdentifier(
662 externalIdentifierType=oe.spdx30.ExternalIdentifierType.cpe23,
663 identifier=cpe_id,
664 )
665 )
666
667 # TODO: Generate a file for each actual IPK/DEB/RPM/TGZ file
668 # generated and link it to the package
669 # spdx_package_file = pkg_objset.add(oe.spdx30.software_File(
670 # _id=pkg_objset.new_spdxid("distribution", pkg_name),
671 # creationInfo=pkg_objset.doc.creationInfo,
672 # name=pkg_name,
673 # software_primaryPurpose=spdx_package.software_primaryPurpose,
674 # software_additionalPurpose=spdx_package.software_additionalPurpose,
675 # ))
676 # set_timestamp_now(d, spdx_package_file, "builtTime")
677
678 ## TODO add hashes
679 # pkg_objset.new_relationship(
680 # [spdx_package],
681 # oe.spdx30.RelationshipType.hasDistributionArtifact,
682 # [spdx_package_file],
683 # )
684
685 # NOTE: licenses live in the recipe collection and are referenced
686 # by ID in the package collection(s). This helps reduce duplication
687 # (since a lot of packages will have the same license), and also
688 # prevents duplicate license SPDX IDs in the packages
689 package_license = d.getVar("LICENSE:%s" % package)
690 if package_license and package_license != d.getVar("LICENSE"):
691 package_spdx_license = add_license_expression(
692 d, build_objset, package_license, license_data
693 )
694 else:
695 package_spdx_license = recipe_spdx_license
696
697 pkg_objset.new_relationship(
698 [spdx_package],
699 oe.spdx30.RelationshipType.hasConcludedLicense,
700 [oe.sbom30.get_element_link_id(package_spdx_license)],
701 )
702
703 # NOTE: CVE Elements live in the recipe collection
704 all_cves = set()
705 for status, cves in cve_by_status.items():
706 for cve, items in cves.items():
707 spdx_cve, detail, description = items
708 spdx_cve_id = oe.sbom30.get_element_link_id(spdx_cve)
709
710 all_cves.add(spdx_cve_id)
711
712 if status == "Patched":
713 pkg_objset.new_vex_patched_relationship(
714 [spdx_cve_id], [spdx_package]
715 )
716 elif status == "Unpatched":
717 pkg_objset.new_vex_unpatched_relationship(
718 [spdx_cve_id], [spdx_package]
719 )
720 elif status == "Ignored":
721 spdx_vex = pkg_objset.new_vex_ignored_relationship(
722 [spdx_cve_id],
723 [spdx_package],
724 impact_statement=description,
725 )
726
727 vex_just_type = d.getVarFlag(
728 "CVE_CHECK_VEX_JUSTIFICATION", detail
729 )
730 if vex_just_type:
731 if (
732 vex_just_type
733 not in oe.spdx30.security_VexJustificationType.NAMED_INDIVIDUALS
734 ):
735 bb.fatal(
736 f"Unknown vex justification '{vex_just_type}', detail '{detail}', for ignored {cve}"
737 )
738
739 for v in spdx_vex:
740 v.security_justificationType = oe.spdx30.security_VexJustificationType.NAMED_INDIVIDUALS[
741 vex_just_type
742 ]
743
744 elif status == "Unknown":
745 bb.note(f"Skipping {cve} with status 'Unknown'")
746 else:
747 bb.fatal(f"Unknown {cve} status '{status}'")
748
749 if all_cves:
750 pkg_objset.new_relationship(
751 [spdx_package],
752 oe.spdx30.RelationshipType.hasAssociatedVulnerability,
753 sorted(list(all_cves)),
754 )
755
756 bb.debug(1, "Adding package files to SPDX for package %s" % pkg_name)
757 package_files = add_package_files(
758 d,
759 pkg_objset,
760 pkgdest / package,
761 lambda file_counter: pkg_objset.new_spdxid(
762 "package", pkg_name, "file", str(file_counter)
763 ),
764 # TODO: Can we know the purpose here?
765 lambda filepath: [],
766 license_data,
767 ignore_top_level_dirs=["CONTROL", "DEBIAN"],
768 archive=None,
769 )
770
771 if package_files:
772 pkg_objset.new_relationship(
773 [spdx_package],
774 oe.spdx30.RelationshipType.contains,
775 sorted(list(package_files)),
776 )
777
778 if include_sources:
779 debug_sources = get_package_sources_from_debug(
780 d, package, package_files, dep_sources, source_hash_cache
781 )
782 debug_source_ids |= set(
783 oe.sbom30.get_element_link_id(d) for d in debug_sources
784 )
785
786 oe.sbom30.write_recipe_jsonld_doc(
787 d, pkg_objset, "packages-staging", deploydir, create_spdx_id_links=False
788 )
789
790 if include_sources:
791 bb.debug(1, "Adding sysroot files to SPDX")
792 sysroot_files = add_package_files(
793 d,
794 build_objset,
795 d.expand("${COMPONENTS_DIR}/${PACKAGE_ARCH}/${PN}"),
796 lambda file_counter: build_objset.new_spdxid("sysroot", str(file_counter)),
797 lambda filepath: [],
798 license_data,
799 archive=None,
800 )
801
802 if sysroot_files:
803 build_objset.new_scoped_relationship(
804 [build],
805 oe.spdx30.RelationshipType.hasOutput,
806 oe.spdx30.LifecycleScopeType.build,
807 sorted(list(sysroot_files)),
808 )
809
810 if build_inputs or debug_source_ids:
811 build_objset.new_scoped_relationship(
812 [build],
813 oe.spdx30.RelationshipType.hasInput,
814 oe.spdx30.LifecycleScopeType.build,
815 sorted(list(build_inputs)) + sorted(list(debug_source_ids)),
816 )
817
818 oe.sbom30.write_recipe_jsonld_doc(d, build_objset, "recipes", deploydir)
819
820
821def create_package_spdx(d):
822 deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
823 deploydir = Path(d.getVar("SPDXRUNTIMEDEPLOY"))
824 is_native = bb.data.inherits_class("native", d) or bb.data.inherits_class(
825 "cross", d
826 )
827
828 providers = oe.spdx_common.collect_package_providers(d)
829 pkg_arch = d.getVar("SSTATE_PKGARCH")
830
831 if is_native:
832 return
833
834 bb.build.exec_func("read_subpackage_metadata", d)
835
836 dep_package_cache = {}
837
838 # Any element common to all packages that need to be referenced by ID
839 # should be written into this objset set
840 common_objset = oe.sbom30.ObjectSet.new_objset(
841 d, "%s-package-common" % d.getVar("PN")
842 )
843
844 pkgdest = Path(d.getVar("PKGDEST"))
845 for package in d.getVar("PACKAGES").split():
846 localdata = bb.data.createCopy(d)
847 pkg_name = d.getVar("PKG:%s" % package) or package
848 localdata.setVar("PKG", pkg_name)
849 localdata.setVar("OVERRIDES", d.getVar("OVERRIDES", False) + ":" + package)
850
851 if not oe.packagedata.packaged(package, localdata):
852 continue
853
854 spdx_package, pkg_objset = oe.sbom30.load_obj_in_jsonld(
855 d,
856 pkg_arch,
857 "packages-staging",
858 "package-" + pkg_name,
859 oe.spdx30.software_Package,
860 software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.install,
861 )
862
863 # We will write out a new collection, so link it to the new
864 # creation info in the common package data. The old creation info
865 # should still exist and be referenced by all the existing elements
866 # in the package
867 pkg_objset.creationInfo = pkg_objset.copy_creation_info(
868 common_objset.doc.creationInfo
869 )
870
871 runtime_spdx_deps = set()
872
873 deps = bb.utils.explode_dep_versions2(localdata.getVar("RDEPENDS") or "")
874 seen_deps = set()
875 for dep, _ in deps.items():
876 if dep in seen_deps:
877 continue
878
879 if dep not in providers:
880 continue
881
882 (dep, _) = providers[dep]
883
884 if not oe.packagedata.packaged(dep, localdata):
885 continue
886
887 dep_pkg_data = oe.packagedata.read_subpkgdata_dict(dep, d)
888 dep_pkg = dep_pkg_data["PKG"]
889
890 if dep in dep_package_cache:
891 dep_spdx_package = dep_package_cache[dep]
892 else:
893 bb.debug(1, "Searching for %s" % dep_pkg)
894 dep_spdx_package, _ = oe.sbom30.find_root_obj_in_jsonld(
895 d,
896 "packages-staging",
897 "package-" + dep_pkg,
898 oe.spdx30.software_Package,
899 software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.install,
900 )
901 dep_package_cache[dep] = dep_spdx_package
902
903 runtime_spdx_deps.add(dep_spdx_package)
904 seen_deps.add(dep)
905
906 if runtime_spdx_deps:
907 pkg_objset.new_scoped_relationship(
908 [spdx_package],
909 oe.spdx30.RelationshipType.dependsOn,
910 oe.spdx30.LifecycleScopeType.runtime,
911 [oe.sbom30.get_element_link_id(dep) for dep in runtime_spdx_deps],
912 )
913
914 oe.sbom30.write_recipe_jsonld_doc(d, pkg_objset, "packages", deploydir)
915
916 oe.sbom30.write_recipe_jsonld_doc(d, common_objset, "common-package", deploydir)
917
918
919def write_bitbake_spdx(d):
920 # Set PN to "bitbake" so that SPDX IDs can be generated
921 d.setVar("PN", "bitbake")
922 d.setVar("BB_TASKHASH", "bitbake")
923 oe.spdx_common.load_spdx_license_data(d)
924
925 deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
926
927 objset = oe.sbom30.ObjectSet.new_objset(d, "bitbake", False)
928
929 host_import_key = d.getVar("SPDX_BUILD_HOST")
930 invoked_by = objset.new_agent("SPDX_INVOKED_BY", add=False)
931 on_behalf_of = objset.new_agent("SPDX_ON_BEHALF_OF", add=False)
932
933 if d.getVar("SPDX_INCLUDE_BITBAKE_PARENT_BUILD") == "1":
934 # Since the Build objects are unique, we may as well set the creation
935 # time to the current time instead of the fallback SDE
936 objset.doc.creationInfo.created = datetime.now(timezone.utc)
937
938 # Each invocation of bitbake should have a unique ID since it is a
939 # unique build
940 nonce = os.urandom(16).hex()
941
942 build = objset.add_root(
943 oe.spdx30.build_Build(
944 _id=objset.new_spdxid(nonce, include_unihash=False),
945 creationInfo=objset.doc.creationInfo,
946 build_buildType=oe.sbom30.SPDX_BUILD_TYPE,
947 )
948 )
949 set_timestamp_now(d, build, "build_buildStartTime")
950
951 if host_import_key:
952 objset.new_scoped_relationship(
953 [build],
954 oe.spdx30.RelationshipType.hasHost,
955 oe.spdx30.LifecycleScopeType.build,
956 [objset.new_import(host_import_key)],
957 )
958
959 if invoked_by:
960 objset.add(invoked_by)
961 invoked_by_spdx = objset.new_scoped_relationship(
962 [build],
963 oe.spdx30.RelationshipType.invokedBy,
964 oe.spdx30.LifecycleScopeType.build,
965 [invoked_by],
966 )
967
968 if on_behalf_of:
969 objset.add(on_behalf_of)
970 objset.new_scoped_relationship(
971 [on_behalf_of],
972 oe.spdx30.RelationshipType.delegatedTo,
973 oe.spdx30.LifecycleScopeType.build,
974 invoked_by_spdx,
975 )
976
977 elif on_behalf_of:
978 bb.warn("SPDX_ON_BEHALF_OF has no effect if SPDX_INVOKED_BY is not set")
979
980 else:
981 if host_import_key:
982 bb.warn(
983 "SPDX_BUILD_HOST has no effect if SPDX_INCLUDE_BITBAKE_PARENT_BUILD is not set"
984 )
985
986 if invoked_by:
987 bb.warn(
988 "SPDX_INVOKED_BY has no effect if SPDX_INCLUDE_BITBAKE_PARENT_BUILD is not set"
989 )
990
991 if on_behalf_of:
992 bb.warn(
993 "SPDX_ON_BEHALF_OF has no effect if SPDX_INCLUDE_BITBAKE_PARENT_BUILD is not set"
994 )
995
996 for obj in objset.foreach_type(oe.spdx30.Element):
997 obj.extension.append(oe.sbom30.OEIdAliasExtension())
998
999 oe.sbom30.write_jsonld_doc(d, objset, deploy_dir_spdx / "bitbake.spdx.json")
1000
1001
1002def collect_build_package_inputs(d, objset, build, packages, files_by_hash=None):
1003 import oe.sbom30
1004
1005 providers = oe.spdx_common.collect_package_providers(d)
1006
1007 build_deps = set()
1008 missing_providers = set()
1009
1010 for name in sorted(packages.keys()):
1011 if name not in providers:
1012 missing_providers.add(name)
1013 continue
1014
1015 pkg_name, pkg_hashfn = providers[name]
1016
1017 # Copy all of the package SPDX files into the Sbom elements
1018 pkg_spdx, pkg_objset = oe.sbom30.find_root_obj_in_jsonld(
1019 d,
1020 "packages",
1021 "package-" + pkg_name,
1022 oe.spdx30.software_Package,
1023 software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.install,
1024 )
1025 build_deps.add(oe.sbom30.get_element_link_id(pkg_spdx))
1026
1027 if files_by_hash is not None:
1028 for h, f in pkg_objset.by_sha256_hash.items():
1029 files_by_hash.setdefault(h, set()).update(f)
1030
1031 if missing_providers:
1032 bb.fatal(
1033 f"Unable to find SPDX provider(s) for: {', '.join(sorted(missing_providers))}"
1034 )
1035
1036 if build_deps:
1037 objset.new_scoped_relationship(
1038 [build],
1039 oe.spdx30.RelationshipType.hasInput,
1040 oe.spdx30.LifecycleScopeType.build,
1041 sorted(list(build_deps)),
1042 )
1043
1044
1045def create_rootfs_spdx(d):
1046 deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
1047 deploydir = Path(d.getVar("SPDXROOTFSDEPLOY"))
1048 root_packages_file = Path(d.getVar("SPDX_ROOTFS_PACKAGES"))
1049 image_basename = d.getVar("IMAGE_BASENAME")
1050 image_rootfs = d.getVar("IMAGE_ROOTFS")
1051 machine = d.getVar("MACHINE")
1052
1053 with root_packages_file.open("r") as f:
1054 packages = json.load(f)
1055
1056 objset = oe.sbom30.ObjectSet.new_objset(
1057 d, "%s-%s-rootfs" % (image_basename, machine)
1058 )
1059
1060 rootfs = objset.add_root(
1061 oe.spdx30.software_Package(
1062 _id=objset.new_spdxid("rootfs", image_basename),
1063 creationInfo=objset.doc.creationInfo,
1064 name=image_basename,
1065 software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.archive,
1066 )
1067 )
1068 set_timestamp_now(d, rootfs, "builtTime")
1069
1070 rootfs_build = objset.add_root(objset.new_task_build("rootfs", "rootfs"))
1071 set_timestamp_now(d, rootfs_build, "build_buildEndTime")
1072
1073 objset.new_scoped_relationship(
1074 [rootfs_build],
1075 oe.spdx30.RelationshipType.hasOutput,
1076 oe.spdx30.LifecycleScopeType.build,
1077 [rootfs],
1078 )
1079
1080 files_by_hash = {}
1081 collect_build_package_inputs(d, objset, rootfs_build, packages, files_by_hash)
1082
1083 files = set()
1084 for dirpath, dirnames, filenames in os.walk(image_rootfs, onerror=walk_error):
1085 dirnames.sort()
1086 filenames.sort()
1087 for fn in filenames:
1088 fpath = Path(dirpath) / fn
1089 if fpath.is_symlink() or not fpath.is_file():
1090 continue
1091
1092 relpath = str(fpath.relative_to(image_rootfs))
1093 h = bb.utils.sha256_file(fpath)
1094
1095 found = False
1096 if h in files_by_hash:
1097 for f in files_by_hash[h]:
1098 if isinstance(f, oe.spdx30.software_File) and f.name == relpath:
1099 files.add(oe.sbom30.get_element_link_id(f))
1100 found = True
1101 break
1102
1103 if not found:
1104 files.add(
1105 objset.new_file(
1106 objset.new_spdxid("rootfs-file", relpath),
1107 relpath,
1108 fpath,
1109 )
1110 )
1111
1112 if files:
1113 objset.new_relationship(
1114 [rootfs],
1115 oe.spdx30.RelationshipType.contains,
1116 sorted(list(files)),
1117 )
1118
1119 oe.sbom30.write_recipe_jsonld_doc(d, objset, "rootfs", deploydir)
1120
1121
1122def create_image_spdx(d):
1123 import oe.sbom30
1124
1125 image_deploy_dir = Path(d.getVar("IMGDEPLOYDIR"))
1126 manifest_path = Path(d.getVar("IMAGE_OUTPUT_MANIFEST"))
1127 spdx_work_dir = Path(d.getVar("SPDXIMAGEWORK"))
1128
1129 image_basename = d.getVar("IMAGE_BASENAME")
1130 machine = d.getVar("MACHINE")
1131
1132 objset = oe.sbom30.ObjectSet.new_objset(
1133 d, "%s-%s-image" % (image_basename, machine)
1134 )
1135
1136 with manifest_path.open("r") as f:
1137 manifest = json.load(f)
1138
1139 builds = []
1140 for task in manifest:
1141 imagetype = task["imagetype"]
1142 taskname = task["taskname"]
1143
1144 image_build = objset.add_root(
1145 objset.new_task_build(taskname, "image/%s" % imagetype)
1146 )
1147 set_timestamp_now(d, image_build, "build_buildEndTime")
1148 builds.append(image_build)
1149
1150 artifacts = []
1151
1152 for image in task["images"]:
1153 image_filename = image["filename"]
1154 image_path = image_deploy_dir / image_filename
1155 if os.path.isdir(image_path):
1156 a = add_package_files(
1157 d,
1158 objset,
1159 image_path,
1160 lambda file_counter: objset.new_spdxid(
1161 "imagefile", str(file_counter)
1162 ),
1163 lambda filepath: [],
1164 license_data=None,
1165 ignore_dirs=[],
1166 ignore_top_level_dirs=[],
1167 archive=None,
1168 )
1169 artifacts.extend(a)
1170 else:
1171 a = objset.add_root(
1172 oe.spdx30.software_File(
1173 _id=objset.new_spdxid("image", image_filename),
1174 creationInfo=objset.doc.creationInfo,
1175 name=image_filename,
1176 verifiedUsing=[
1177 oe.spdx30.Hash(
1178 algorithm=oe.spdx30.HashAlgorithm.sha256,
1179 hashValue=bb.utils.sha256_file(image_path),
1180 )
1181 ],
1182 )
1183 )
1184
1185 artifacts.append(a)
1186
1187 for a in artifacts:
1188 set_purposes(
1189 d, a, "SPDX_IMAGE_PURPOSE:%s" % imagetype, "SPDX_IMAGE_PURPOSE"
1190 )
1191
1192 set_timestamp_now(d, a, "builtTime")
1193
1194
1195 if artifacts:
1196 objset.new_scoped_relationship(
1197 [image_build],
1198 oe.spdx30.RelationshipType.hasOutput,
1199 oe.spdx30.LifecycleScopeType.build,
1200 artifacts,
1201 )
1202
1203 if builds:
1204 rootfs_image, _ = oe.sbom30.find_root_obj_in_jsonld(
1205 d,
1206 "rootfs",
1207 "%s-%s-rootfs" % (image_basename, machine),
1208 oe.spdx30.software_Package,
1209 # TODO: Should use a purpose to filter here?
1210 )
1211 objset.new_scoped_relationship(
1212 builds,
1213 oe.spdx30.RelationshipType.hasInput,
1214 oe.spdx30.LifecycleScopeType.build,
1215 [oe.sbom30.get_element_link_id(rootfs_image)],
1216 )
1217
1218 objset.add_aliases()
1219 objset.link()
1220 oe.sbom30.write_recipe_jsonld_doc(d, objset, "image", spdx_work_dir)
1221
1222
1223def create_image_sbom_spdx(d):
1224 import oe.sbom30
1225
1226 image_name = d.getVar("IMAGE_NAME")
1227 image_basename = d.getVar("IMAGE_BASENAME")
1228 image_link_name = d.getVar("IMAGE_LINK_NAME")
1229 imgdeploydir = Path(d.getVar("SPDXIMAGEDEPLOYDIR"))
1230 machine = d.getVar("MACHINE")
1231
1232 spdx_path = imgdeploydir / (image_name + ".spdx.json")
1233
1234 root_elements = []
1235
1236 # TODO: Do we need to add the rootfs or are the image files sufficient?
1237 rootfs_image, _ = oe.sbom30.find_root_obj_in_jsonld(
1238 d,
1239 "rootfs",
1240 "%s-%s-rootfs" % (image_basename, machine),
1241 oe.spdx30.software_Package,
1242 # TODO: Should use a purpose here?
1243 )
1244 root_elements.append(oe.sbom30.get_element_link_id(rootfs_image))
1245
1246 image_objset, _ = oe.sbom30.find_jsonld(
1247 d, "image", "%s-%s-image" % (image_basename, machine), required=True
1248 )
1249 for o in image_objset.foreach_root(oe.spdx30.software_File):
1250 root_elements.append(oe.sbom30.get_element_link_id(o))
1251
1252 objset, sbom = oe.sbom30.create_sbom(d, image_name, root_elements)
1253
1254 oe.sbom30.write_jsonld_doc(d, objset, spdx_path)
1255
1256 def make_image_link(target_path, suffix):
1257 if image_link_name:
1258 link = imgdeploydir / (image_link_name + suffix)
1259 if link != target_path:
1260 link.symlink_to(os.path.relpath(target_path, link.parent))
1261
1262 make_image_link(spdx_path, ".spdx.json")
1263
1264
1265def sdk_create_spdx(d, sdk_type, spdx_work_dir, toolchain_outputname):
1266 sdk_name = toolchain_outputname + "-" + sdk_type
1267 sdk_packages = oe.sdk.sdk_list_installed_packages(d, sdk_type == "target")
1268
1269 objset = oe.sbom30.ObjectSet.new_objset(d, sdk_name)
1270
1271 sdk_rootfs = objset.add_root(
1272 oe.spdx30.software_Package(
1273 _id=objset.new_spdxid("sdk-rootfs", sdk_name),
1274 creationInfo=objset.doc.creationInfo,
1275 name=sdk_name,
1276 software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.archive,
1277 )
1278 )
1279 set_timestamp_now(d, sdk_rootfs, "builtTime")
1280
1281 sdk_build = objset.add_root(objset.new_task_build("sdk-rootfs", "sdk-rootfs"))
1282 set_timestamp_now(d, sdk_build, "build_buildEndTime")
1283
1284 objset.new_scoped_relationship(
1285 [sdk_build],
1286 oe.spdx30.RelationshipType.hasOutput,
1287 oe.spdx30.LifecycleScopeType.build,
1288 [sdk_rootfs],
1289 )
1290
1291 collect_build_package_inputs(d, objset, sdk_build, sdk_packages)
1292
1293 objset.add_aliases()
1294 oe.sbom30.write_jsonld_doc(d, objset, spdx_work_dir / "sdk-rootfs.spdx.json")
1295
1296
1297def create_sdk_sbom(d, sdk_deploydir, spdx_work_dir, toolchain_outputname):
1298 # Load the document written earlier
1299 rootfs_objset = oe.sbom30.load_jsonld(
1300 d, spdx_work_dir / "sdk-rootfs.spdx.json", required=True
1301 )
1302
1303 # Create a new build for the SDK installer
1304 sdk_build = rootfs_objset.new_task_build("sdk-populate", "sdk-populate")
1305 set_timestamp_now(d, sdk_build, "build_buildEndTime")
1306
1307 rootfs = rootfs_objset.find_root(oe.spdx30.software_Package)
1308 if rootfs is None:
1309 bb.fatal("Unable to find rootfs artifact")
1310
1311 rootfs_objset.new_scoped_relationship(
1312 [sdk_build],
1313 oe.spdx30.RelationshipType.hasInput,
1314 oe.spdx30.LifecycleScopeType.build,
1315 [rootfs],
1316 )
1317
1318 files = set()
1319 root_files = []
1320
1321 # NOTE: os.walk() doesn't return symlinks
1322 for dirpath, dirnames, filenames in os.walk(sdk_deploydir, onerror=walk_error):
1323 dirnames.sort()
1324 filenames.sort()
1325 for fn in filenames:
1326 fpath = Path(dirpath) / fn
1327 if not fpath.is_file() or fpath.is_symlink():
1328 continue
1329
1330 relpath = str(fpath.relative_to(sdk_deploydir))
1331
1332 f = rootfs_objset.new_file(
1333 rootfs_objset.new_spdxid("sdk-installer", relpath),
1334 relpath,
1335 fpath,
1336 )
1337 set_timestamp_now(d, f, "builtTime")
1338
1339 if fn.endswith(".manifest"):
1340 f.software_primaryPurpose = oe.spdx30.software_SoftwarePurpose.manifest
1341 elif fn.endswith(".testdata.json"):
1342 f.software_primaryPurpose = (
1343 oe.spdx30.software_SoftwarePurpose.configuration
1344 )
1345 else:
1346 set_purposes(d, f, "SPDX_SDK_PURPOSE")
1347 root_files.append(f)
1348
1349 files.add(f)
1350
1351 if files:
1352 rootfs_objset.new_scoped_relationship(
1353 [sdk_build],
1354 oe.spdx30.RelationshipType.hasOutput,
1355 oe.spdx30.LifecycleScopeType.build,
1356 files,
1357 )
1358 else:
1359 bb.warn(f"No SDK output files found in {sdk_deploydir}")
1360
1361 objset, sbom = oe.sbom30.create_sbom(
1362 d, toolchain_outputname, sorted(list(files)), [rootfs_objset]
1363 )
1364
1365 oe.sbom30.write_jsonld_doc(
1366 d, objset, sdk_deploydir / (toolchain_outputname + ".spdx.json")
1367 )