summaryrefslogtreecommitdiffstats
path: root/meta/lib/oe/spdx30_tasks.py
diff options
context:
space:
mode:
Diffstat (limited to 'meta/lib/oe/spdx30_tasks.py')
-rw-r--r--meta/lib/oe/spdx30_tasks.py1368
1 files changed, 1368 insertions, 0 deletions
diff --git a/meta/lib/oe/spdx30_tasks.py b/meta/lib/oe/spdx30_tasks.py
new file mode 100644
index 0000000000..5d9f3168d9
--- /dev/null
+++ b/meta/lib/oe/spdx30_tasks.py
@@ -0,0 +1,1368 @@
1#
2# Copyright OpenEmbedded Contributors
3#
4# SPDX-License-Identifier: GPL-2.0-only
5#
6
7import json
8import oe.cve_check
9import oe.packagedata
10import oe.patch
11import oe.sbom30
12import oe.spdx30
13import oe.spdx_common
14import oe.sdk
15import os
16
17from contextlib import contextmanager
18from datetime import datetime, timezone
19from pathlib import Path
20
21
22def walk_error(err):
23 bb.error(f"ERROR walking {err.filename}: {err}")
24
25
26def set_timestamp_now(d, o, prop):
27 if d.getVar("SPDX_INCLUDE_TIMESTAMPS") == "1":
28 setattr(o, prop, datetime.now(timezone.utc))
29 else:
30 # Doing this helps to validated that the property actually exists, and
31 # also that it is not mandatory
32 delattr(o, prop)
33
34
35def add_license_expression(d, objset, license_expression, license_data):
36 simple_license_text = {}
37 license_text_map = {}
38 license_ref_idx = 0
39
40 def add_license_text(name):
41 nonlocal objset
42 nonlocal simple_license_text
43
44 if name in simple_license_text:
45 return simple_license_text[name]
46
47 lic = objset.find_filter(
48 oe.spdx30.simplelicensing_SimpleLicensingText,
49 name=name,
50 )
51
52 if lic is not None:
53 simple_license_text[name] = lic
54 return lic
55
56 lic = objset.add(
57 oe.spdx30.simplelicensing_SimpleLicensingText(
58 _id=objset.new_spdxid("license-text", name),
59 creationInfo=objset.doc.creationInfo,
60 name=name,
61 )
62 )
63 objset.set_element_alias(lic)
64 simple_license_text[name] = lic
65
66 if name == "PD":
67 lic.simplelicensing_licenseText = "Software released to the public domain"
68 return lic
69
70 # Seach for the license in COMMON_LICENSE_DIR and LICENSE_PATH
71 for directory in [d.getVar("COMMON_LICENSE_DIR")] + (
72 d.getVar("LICENSE_PATH") or ""
73 ).split():
74 try:
75 with (Path(directory) / name).open(errors="replace") as f:
76 lic.simplelicensing_licenseText = f.read()
77 return lic
78
79 except FileNotFoundError:
80 pass
81
82 # If it's not SPDX or PD, then NO_GENERIC_LICENSE must be set
83 filename = d.getVarFlag("NO_GENERIC_LICENSE", name)
84 if filename:
85 filename = d.expand("${S}/" + filename)
86 with open(filename, errors="replace") as f:
87 lic.simplelicensing_licenseText = f.read()
88 return lic
89 else:
90 bb.fatal("Cannot find any text for license %s" % name)
91
92 def convert(l):
93 nonlocal license_text_map
94 nonlocal license_ref_idx
95
96 if l == "(" or l == ")":
97 return l
98
99 if l == "&":
100 return "AND"
101
102 if l == "|":
103 return "OR"
104
105 if l == "CLOSED":
106 return "NONE"
107
108 spdx_license = d.getVarFlag("SPDXLICENSEMAP", l) or l
109 if spdx_license in license_data["licenses"]:
110 return spdx_license
111
112 spdx_license = "LicenseRef-" + l
113 if spdx_license not in license_text_map:
114 license_text_map[spdx_license] = oe.sbom30.get_element_link_id(
115 add_license_text(l)
116 )
117
118 return spdx_license
119
120 lic_split = (
121 license_expression.replace("(", " ( ")
122 .replace(")", " ) ")
123 .replace("|", " | ")
124 .replace("&", " & ")
125 .split()
126 )
127 spdx_license_expression = " ".join(convert(l) for l in lic_split)
128
129 o = objset.new_license_expression(
130 spdx_license_expression, license_data, license_text_map
131 )
132 objset.set_element_alias(o)
133 return o
134
135
136def add_package_files(
137 d,
138 objset,
139 topdir,
140 get_spdxid,
141 get_purposes,
142 license_data=None,
143 *,
144 archive=None,
145 ignore_dirs=[],
146 ignore_top_level_dirs=[],
147):
148 source_date_epoch = d.getVar("SOURCE_DATE_EPOCH")
149 if source_date_epoch:
150 source_date_epoch = int(source_date_epoch)
151
152 spdx_files = set()
153
154 file_counter = 1
155 if not os.path.exists(topdir):
156 bb.note(f"Skip {topdir}")
157 return spdx_files
158
159 check_compiled_sources = d.getVar("SPDX_INCLUDE_COMPILED_SOURCES") == "1"
160 if check_compiled_sources:
161 compiled_sources, types = oe.spdx_common.get_compiled_sources(d)
162 bb.debug(1, f"Total compiled files: {len(compiled_sources)}")
163
164 for subdir, dirs, files in os.walk(topdir, onerror=walk_error):
165 dirs[:] = [d for d in dirs if d not in ignore_dirs]
166 if subdir == str(topdir):
167 dirs[:] = [d for d in dirs if d not in ignore_top_level_dirs]
168
169 dirs.sort()
170 files.sort()
171 for file in files:
172 filepath = Path(subdir) / file
173 if filepath.is_symlink() or not filepath.is_file():
174 continue
175
176 filename = str(filepath.relative_to(topdir))
177 file_purposes = get_purposes(filepath)
178
179 # Check if file is compiled
180 if check_compiled_sources:
181 if not oe.spdx_common.is_compiled_source(filename, compiled_sources, types):
182 continue
183
184 spdx_file = objset.new_file(
185 get_spdxid(file_counter),
186 filename,
187 filepath,
188 purposes=file_purposes,
189 )
190 spdx_files.add(spdx_file)
191
192 if (
193 oe.spdx30.software_SoftwarePurpose.source in file_purposes
194 and license_data is not None
195 ):
196 objset.scan_declared_licenses(spdx_file, filepath, license_data)
197
198 if archive is not None:
199 with filepath.open("rb") as f:
200 info = archive.gettarinfo(fileobj=f)
201 info.name = filename
202 info.uid = 0
203 info.gid = 0
204 info.uname = "root"
205 info.gname = "root"
206
207 if source_date_epoch is not None and info.mtime > source_date_epoch:
208 info.mtime = source_date_epoch
209
210 archive.addfile(info, f)
211
212 file_counter += 1
213
214 bb.debug(1, "Added %d files to %s" % (len(spdx_files), objset.doc._id))
215
216 return spdx_files
217
218
219def get_package_sources_from_debug(
220 d, package, package_files, sources, source_hash_cache
221):
222 def file_path_match(file_path, pkg_file):
223 if file_path.lstrip("/") == pkg_file.name.lstrip("/"):
224 return True
225
226 for e in pkg_file.extension:
227 if isinstance(e, oe.sbom30.OEFileNameAliasExtension):
228 for a in e.aliases:
229 if file_path.lstrip("/") == a.lstrip("/"):
230 return True
231
232 return False
233
234 debug_search_paths = [
235 Path(d.getVar("SPDXWORK")),
236 Path(d.getVar("PKGD")),
237 Path(d.getVar("STAGING_DIR_TARGET")),
238 Path(d.getVar("STAGING_DIR_NATIVE")),
239 Path(d.getVar("STAGING_KERNEL_DIR")),
240 ]
241
242 pkg_data = oe.packagedata.read_subpkgdata_extended(package, d)
243
244 if pkg_data is None:
245 return
246
247 dep_source_files = set()
248
249 for file_path, file_data in pkg_data["files_info"].items():
250 if not "debugsrc" in file_data:
251 continue
252
253 if not any(file_path_match(file_path, pkg_file) for pkg_file in package_files):
254 bb.fatal(
255 "No package file found for %s in %s; SPDX found: %s"
256 % (str(file_path), package, " ".join(p.name for p in package_files))
257 )
258 continue
259
260 for debugsrc in file_data["debugsrc"]:
261 for search in debug_search_paths:
262 if debugsrc.startswith("/usr/src/kernel"):
263 debugsrc_path = search / debugsrc.replace("/usr/src/kernel/", "")
264 else:
265 debugsrc_path = search / debugsrc.lstrip("/")
266
267 if debugsrc_path in source_hash_cache:
268 file_sha256 = source_hash_cache[debugsrc_path]
269 if file_sha256 is None:
270 continue
271 else:
272 # We can only hash files below, skip directories, links, etc.
273 if not debugsrc_path.is_file():
274 source_hash_cache[debugsrc_path] = None
275 continue
276
277 file_sha256 = bb.utils.sha256_file(debugsrc_path)
278 source_hash_cache[debugsrc_path] = file_sha256
279
280 if file_sha256 in sources:
281 source_file = sources[file_sha256]
282 dep_source_files.add(source_file)
283 else:
284 bb.debug(
285 1,
286 "Debug source %s with SHA256 %s not found in any dependency"
287 % (str(debugsrc_path), file_sha256),
288 )
289 break
290 else:
291 bb.debug(1, "Debug source %s not found" % debugsrc)
292
293 return dep_source_files
294
295
296def collect_dep_objsets(d, build):
297 deps = oe.spdx_common.get_spdx_deps(d)
298
299 dep_objsets = []
300 dep_builds = set()
301
302 dep_build_spdxids = set()
303 for dep in deps:
304 bb.debug(1, "Fetching SPDX for dependency %s" % (dep.pn))
305 dep_build, dep_objset = oe.sbom30.find_root_obj_in_jsonld(
306 d, "recipes", "recipe-" + dep.pn, oe.spdx30.build_Build
307 )
308 # If the dependency is part of the taskhash, return it to be linked
309 # against. Otherwise, it cannot be linked against because this recipe
310 # will not rebuilt if dependency changes
311 if dep.in_taskhash:
312 dep_objsets.append(dep_objset)
313
314 # The build _can_ be linked against (by alias)
315 dep_builds.add(dep_build)
316
317 return dep_objsets, dep_builds
318
319
320def index_sources_by_hash(sources, dest):
321 for s in sources:
322 if not isinstance(s, oe.spdx30.software_File):
323 continue
324
325 if s.software_primaryPurpose != oe.spdx30.software_SoftwarePurpose.source:
326 continue
327
328 for v in s.verifiedUsing:
329 if v.algorithm == oe.spdx30.HashAlgorithm.sha256:
330 if not v.hashValue in dest:
331 dest[v.hashValue] = s
332 break
333 else:
334 bb.fatal(f"No SHA256 found for {s.name}")
335
336
337def collect_dep_sources(dep_objsets, dest):
338 for objset in dep_objsets:
339 # Don't collect sources from native recipes as they
340 # match non-native sources also.
341 if objset.is_native():
342 continue
343
344 bb.debug(1, "Fetching Sources for dependency %s" % (objset.doc.name))
345
346 dep_build = objset.find_root(oe.spdx30.build_Build)
347 if not dep_build:
348 bb.fatal("Unable to find a build")
349
350 for e in objset.foreach_type(oe.spdx30.Relationship):
351 if dep_build is not e.from_:
352 continue
353
354 if e.relationshipType != oe.spdx30.RelationshipType.hasInput:
355 continue
356
357 index_sources_by_hash(e.to, dest)
358
359
360def add_download_files(d, objset):
361 inputs = set()
362
363 urls = d.getVar("SRC_URI").split()
364 fetch = bb.fetch2.Fetch(urls, d)
365
366 for download_idx, src_uri in enumerate(urls):
367 fd = fetch.ud[src_uri]
368
369 file_name = os.path.basename(fetch.localpath(src_uri))
370 if oe.patch.patch_path(src_uri, fetch, "", expand=False):
371 primary_purpose = oe.spdx30.software_SoftwarePurpose.patch
372 else:
373 primary_purpose = oe.spdx30.software_SoftwarePurpose.source
374
375 if fd.type == "file":
376 if os.path.isdir(fd.localpath):
377 walk_idx = 1
378 for root, dirs, files in os.walk(fd.localpath, onerror=walk_error):
379 dirs.sort()
380 files.sort()
381 for f in files:
382 f_path = os.path.join(root, f)
383 if os.path.islink(f_path):
384 # TODO: SPDX doesn't support symlinks yet
385 continue
386
387 file = objset.new_file(
388 objset.new_spdxid(
389 "source", str(download_idx + 1), str(walk_idx)
390 ),
391 os.path.join(
392 file_name, os.path.relpath(f_path, fd.localpath)
393 ),
394 f_path,
395 purposes=[primary_purpose],
396 )
397
398 inputs.add(file)
399 walk_idx += 1
400
401 else:
402 file = objset.new_file(
403 objset.new_spdxid("source", str(download_idx + 1)),
404 file_name,
405 fd.localpath,
406 purposes=[primary_purpose],
407 )
408 inputs.add(file)
409
410 else:
411 dl = objset.add(
412 oe.spdx30.software_Package(
413 _id=objset.new_spdxid("source", str(download_idx + 1)),
414 creationInfo=objset.doc.creationInfo,
415 name=file_name,
416 software_primaryPurpose=primary_purpose,
417 software_downloadLocation=oe.spdx_common.fetch_data_to_uri(
418 fd, fd.name
419 ),
420 )
421 )
422
423 if fd.method.supports_checksum(fd):
424 # TODO Need something better than hard coding this
425 for checksum_id in ["sha256", "sha1"]:
426 expected_checksum = getattr(
427 fd, "%s_expected" % checksum_id, None
428 )
429 if expected_checksum is None:
430 continue
431
432 dl.verifiedUsing.append(
433 oe.spdx30.Hash(
434 algorithm=getattr(oe.spdx30.HashAlgorithm, checksum_id),
435 hashValue=expected_checksum,
436 )
437 )
438
439 inputs.add(dl)
440
441 return inputs
442
443
444def set_purposes(d, element, *var_names, force_purposes=[]):
445 purposes = force_purposes[:]
446
447 for var_name in var_names:
448 val = d.getVar(var_name)
449 if val:
450 purposes.extend(val.split())
451 break
452
453 if not purposes:
454 bb.warn("No SPDX purposes found in %s" % " ".join(var_names))
455 return
456
457 element.software_primaryPurpose = getattr(
458 oe.spdx30.software_SoftwarePurpose, purposes[0]
459 )
460 element.software_additionalPurpose = [
461 getattr(oe.spdx30.software_SoftwarePurpose, p) for p in purposes[1:]
462 ]
463
464
465def create_spdx(d):
466 def set_var_field(var, obj, name, package=None):
467 val = None
468 if package:
469 val = d.getVar("%s:%s" % (var, package))
470
471 if not val:
472 val = d.getVar(var)
473
474 if val:
475 setattr(obj, name, val)
476
477 license_data = oe.spdx_common.load_spdx_license_data(d)
478
479 deploydir = Path(d.getVar("SPDXDEPLOY"))
480 deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
481 spdx_workdir = Path(d.getVar("SPDXWORK"))
482 include_sources = d.getVar("SPDX_INCLUDE_SOURCES") == "1"
483 pkg_arch = d.getVar("SSTATE_PKGARCH")
484 is_native = bb.data.inherits_class("native", d) or bb.data.inherits_class(
485 "cross", d
486 )
487 include_vex = d.getVar("SPDX_INCLUDE_VEX")
488 if not include_vex in ("none", "current", "all"):
489 bb.fatal("SPDX_INCLUDE_VEX must be one of 'none', 'current', 'all'")
490
491 build_objset = oe.sbom30.ObjectSet.new_objset(d, "recipe-" + d.getVar("PN"))
492
493 build = build_objset.new_task_build("recipe", "recipe")
494 build_objset.set_element_alias(build)
495
496 build_objset.doc.rootElement.append(build)
497
498 build_objset.set_is_native(is_native)
499
500 for var in (d.getVar("SPDX_CUSTOM_ANNOTATION_VARS") or "").split():
501 new_annotation(
502 d,
503 build_objset,
504 build,
505 "%s=%s" % (var, d.getVar(var)),
506 oe.spdx30.AnnotationType.other,
507 )
508
509 build_inputs = set()
510
511 # Add CVEs
512 cve_by_status = {}
513 if include_vex != "none":
514 patched_cves = oe.cve_check.get_patched_cves(d)
515 for cve, patched_cve in patched_cves.items():
516 decoded_status = {
517 "mapping": patched_cve["abbrev-status"],
518 "detail": patched_cve["status"],
519 "description": patched_cve.get("justification", None)
520 }
521
522 # If this CVE is fixed upstream, skip it unless all CVEs are
523 # specified.
524 if (
525 include_vex != "all"
526 and "detail" in decoded_status
527 and decoded_status["detail"]
528 in (
529 "fixed-version",
530 "cpe-stable-backport",
531 )
532 ):
533 bb.debug(1, "Skipping %s since it is already fixed upstream" % cve)
534 continue
535
536 spdx_cve = build_objset.new_cve_vuln(cve)
537 build_objset.set_element_alias(spdx_cve)
538
539 cve_by_status.setdefault(decoded_status["mapping"], {})[cve] = (
540 spdx_cve,
541 decoded_status["detail"],
542 decoded_status["description"],
543 )
544
545 cpe_ids = oe.cve_check.get_cpe_ids(d.getVar("CVE_PRODUCT"), d.getVar("CVE_VERSION"))
546
547 source_files = add_download_files(d, build_objset)
548 build_inputs |= source_files
549
550 recipe_spdx_license = add_license_expression(
551 d, build_objset, d.getVar("LICENSE"), license_data
552 )
553 build_objset.new_relationship(
554 source_files,
555 oe.spdx30.RelationshipType.hasDeclaredLicense,
556 [oe.sbom30.get_element_link_id(recipe_spdx_license)],
557 )
558
559 dep_sources = {}
560 if oe.spdx_common.process_sources(d) and include_sources:
561 bb.debug(1, "Adding source files to SPDX")
562 oe.spdx_common.get_patched_src(d)
563
564 files = add_package_files(
565 d,
566 build_objset,
567 spdx_workdir,
568 lambda file_counter: build_objset.new_spdxid(
569 "sourcefile", str(file_counter)
570 ),
571 lambda filepath: [oe.spdx30.software_SoftwarePurpose.source],
572 license_data,
573 ignore_dirs=[".git"],
574 ignore_top_level_dirs=["temp"],
575 archive=None,
576 )
577 build_inputs |= files
578 index_sources_by_hash(files, dep_sources)
579
580 dep_objsets, dep_builds = collect_dep_objsets(d, build)
581 if dep_builds:
582 build_objset.new_scoped_relationship(
583 [build],
584 oe.spdx30.RelationshipType.dependsOn,
585 oe.spdx30.LifecycleScopeType.build,
586 sorted(oe.sbom30.get_element_link_id(b) for b in dep_builds),
587 )
588
589 debug_source_ids = set()
590 source_hash_cache = {}
591
592 # Write out the package SPDX data now. It is not complete as we cannot
593 # write the runtime data, so write it to a staging area and a later task
594 # will write out the final collection
595
596 # TODO: Handle native recipe output
597 if not is_native:
598 bb.debug(1, "Collecting Dependency sources files")
599 collect_dep_sources(dep_objsets, dep_sources)
600
601 bb.build.exec_func("read_subpackage_metadata", d)
602
603 pkgdest = Path(d.getVar("PKGDEST"))
604 for package in d.getVar("PACKAGES").split():
605 if not oe.packagedata.packaged(package, d):
606 continue
607
608 pkg_name = d.getVar("PKG:%s" % package) or package
609
610 bb.debug(1, "Creating SPDX for package %s" % pkg_name)
611
612 pkg_objset = oe.sbom30.ObjectSet.new_objset(d, "package-" + pkg_name)
613
614 spdx_package = pkg_objset.add_root(
615 oe.spdx30.software_Package(
616 _id=pkg_objset.new_spdxid("package", pkg_name),
617 creationInfo=pkg_objset.doc.creationInfo,
618 name=pkg_name,
619 software_packageVersion=d.getVar("SPDX_PACKAGE_VERSION"),
620 )
621 )
622 set_timestamp_now(d, spdx_package, "builtTime")
623
624 set_purposes(
625 d,
626 spdx_package,
627 "SPDX_PACKAGE_ADDITIONAL_PURPOSE:%s" % package,
628 "SPDX_PACKAGE_ADDITIONAL_PURPOSE",
629 force_purposes=["install"],
630 )
631
632 supplier = build_objset.new_agent("SPDX_PACKAGE_SUPPLIER")
633 if supplier is not None:
634 spdx_package.suppliedBy = (
635 supplier if isinstance(supplier, str) else supplier._id
636 )
637
638 set_var_field(
639 "HOMEPAGE", spdx_package, "software_homePage", package=package
640 )
641 set_var_field("SUMMARY", spdx_package, "summary", package=package)
642 set_var_field("DESCRIPTION", spdx_package, "description", package=package)
643
644 if d.getVar("SPDX_PACKAGE_URL:%s" % package) or d.getVar("SPDX_PACKAGE_URL"):
645 set_var_field(
646 "SPDX_PACKAGE_URL",
647 spdx_package,
648 "software_packageUrl",
649 package=package
650 )
651
652 pkg_objset.new_scoped_relationship(
653 [oe.sbom30.get_element_link_id(build)],
654 oe.spdx30.RelationshipType.hasOutput,
655 oe.spdx30.LifecycleScopeType.build,
656 [spdx_package],
657 )
658
659 for cpe_id in cpe_ids:
660 spdx_package.externalIdentifier.append(
661 oe.spdx30.ExternalIdentifier(
662 externalIdentifierType=oe.spdx30.ExternalIdentifierType.cpe23,
663 identifier=cpe_id,
664 )
665 )
666
667 # TODO: Generate a file for each actual IPK/DEB/RPM/TGZ file
668 # generated and link it to the package
669 # spdx_package_file = pkg_objset.add(oe.spdx30.software_File(
670 # _id=pkg_objset.new_spdxid("distribution", pkg_name),
671 # creationInfo=pkg_objset.doc.creationInfo,
672 # name=pkg_name,
673 # software_primaryPurpose=spdx_package.software_primaryPurpose,
674 # software_additionalPurpose=spdx_package.software_additionalPurpose,
675 # ))
676 # set_timestamp_now(d, spdx_package_file, "builtTime")
677
678 ## TODO add hashes
679 # pkg_objset.new_relationship(
680 # [spdx_package],
681 # oe.spdx30.RelationshipType.hasDistributionArtifact,
682 # [spdx_package_file],
683 # )
684
685 # NOTE: licenses live in the recipe collection and are referenced
686 # by ID in the package collection(s). This helps reduce duplication
687 # (since a lot of packages will have the same license), and also
688 # prevents duplicate license SPDX IDs in the packages
689 package_license = d.getVar("LICENSE:%s" % package)
690 if package_license and package_license != d.getVar("LICENSE"):
691 package_spdx_license = add_license_expression(
692 d, build_objset, package_license, license_data
693 )
694 else:
695 package_spdx_license = recipe_spdx_license
696
697 pkg_objset.new_relationship(
698 [spdx_package],
699 oe.spdx30.RelationshipType.hasConcludedLicense,
700 [oe.sbom30.get_element_link_id(package_spdx_license)],
701 )
702
703 # NOTE: CVE Elements live in the recipe collection
704 all_cves = set()
705 for status, cves in cve_by_status.items():
706 for cve, items in cves.items():
707 spdx_cve, detail, description = items
708 spdx_cve_id = oe.sbom30.get_element_link_id(spdx_cve)
709
710 all_cves.add(spdx_cve_id)
711
712 if status == "Patched":
713 pkg_objset.new_vex_patched_relationship(
714 [spdx_cve_id], [spdx_package]
715 )
716 elif status == "Unpatched":
717 pkg_objset.new_vex_unpatched_relationship(
718 [spdx_cve_id], [spdx_package]
719 )
720 elif status == "Ignored":
721 spdx_vex = pkg_objset.new_vex_ignored_relationship(
722 [spdx_cve_id],
723 [spdx_package],
724 impact_statement=description,
725 )
726
727 if detail in (
728 "ignored",
729 "cpe-incorrect",
730 "disputed",
731 "upstream-wontfix",
732 ):
733 # VEX doesn't have justifications for this
734 pass
735 elif detail in (
736 "not-applicable-config",
737 "not-applicable-platform",
738 ):
739 for v in spdx_vex:
740 v.security_justificationType = (
741 oe.spdx30.security_VexJustificationType.vulnerableCodeNotPresent
742 )
743 else:
744 bb.fatal(f"Unknown detail '{detail}' for ignored {cve}")
745 elif status == "Unknown":
746 bb.note(f"Skipping {cve} with status 'Unknown'")
747 else:
748 bb.fatal(f"Unknown {cve} status '{status}'")
749
750 if all_cves:
751 pkg_objset.new_relationship(
752 [spdx_package],
753 oe.spdx30.RelationshipType.hasAssociatedVulnerability,
754 sorted(list(all_cves)),
755 )
756
757 bb.debug(1, "Adding package files to SPDX for package %s" % pkg_name)
758 package_files = add_package_files(
759 d,
760 pkg_objset,
761 pkgdest / package,
762 lambda file_counter: pkg_objset.new_spdxid(
763 "package", pkg_name, "file", str(file_counter)
764 ),
765 # TODO: Can we know the purpose here?
766 lambda filepath: [],
767 license_data,
768 ignore_top_level_dirs=["CONTROL", "DEBIAN"],
769 archive=None,
770 )
771
772 if package_files:
773 pkg_objset.new_relationship(
774 [spdx_package],
775 oe.spdx30.RelationshipType.contains,
776 sorted(list(package_files)),
777 )
778
779 if include_sources:
780 debug_sources = get_package_sources_from_debug(
781 d, package, package_files, dep_sources, source_hash_cache
782 )
783 debug_source_ids |= set(
784 oe.sbom30.get_element_link_id(d) for d in debug_sources
785 )
786
787 oe.sbom30.write_recipe_jsonld_doc(
788 d, pkg_objset, "packages-staging", deploydir, create_spdx_id_links=False
789 )
790
791 if include_sources:
792 bb.debug(1, "Adding sysroot files to SPDX")
793 sysroot_files = add_package_files(
794 d,
795 build_objset,
796 d.expand("${COMPONENTS_DIR}/${PACKAGE_ARCH}/${PN}"),
797 lambda file_counter: build_objset.new_spdxid("sysroot", str(file_counter)),
798 lambda filepath: [],
799 license_data,
800 archive=None,
801 )
802
803 if sysroot_files:
804 build_objset.new_scoped_relationship(
805 [build],
806 oe.spdx30.RelationshipType.hasOutput,
807 oe.spdx30.LifecycleScopeType.build,
808 sorted(list(sysroot_files)),
809 )
810
811 if build_inputs or debug_source_ids:
812 build_objset.new_scoped_relationship(
813 [build],
814 oe.spdx30.RelationshipType.hasInput,
815 oe.spdx30.LifecycleScopeType.build,
816 sorted(list(build_inputs)) + sorted(list(debug_source_ids)),
817 )
818
819 oe.sbom30.write_recipe_jsonld_doc(d, build_objset, "recipes", deploydir)
820
821
822def create_package_spdx(d):
823 deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
824 deploydir = Path(d.getVar("SPDXRUNTIMEDEPLOY"))
825 is_native = bb.data.inherits_class("native", d) or bb.data.inherits_class(
826 "cross", d
827 )
828
829 providers = oe.spdx_common.collect_package_providers(d)
830 pkg_arch = d.getVar("SSTATE_PKGARCH")
831
832 if is_native:
833 return
834
835 bb.build.exec_func("read_subpackage_metadata", d)
836
837 dep_package_cache = {}
838
839 # Any element common to all packages that need to be referenced by ID
840 # should be written into this objset set
841 common_objset = oe.sbom30.ObjectSet.new_objset(
842 d, "%s-package-common" % d.getVar("PN")
843 )
844
845 pkgdest = Path(d.getVar("PKGDEST"))
846 for package in d.getVar("PACKAGES").split():
847 localdata = bb.data.createCopy(d)
848 pkg_name = d.getVar("PKG:%s" % package) or package
849 localdata.setVar("PKG", pkg_name)
850 localdata.setVar("OVERRIDES", d.getVar("OVERRIDES", False) + ":" + package)
851
852 if not oe.packagedata.packaged(package, localdata):
853 continue
854
855 spdx_package, pkg_objset = oe.sbom30.load_obj_in_jsonld(
856 d,
857 pkg_arch,
858 "packages-staging",
859 "package-" + pkg_name,
860 oe.spdx30.software_Package,
861 software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.install,
862 )
863
864 # We will write out a new collection, so link it to the new
865 # creation info in the common package data. The old creation info
866 # should still exist and be referenced by all the existing elements
867 # in the package
868 pkg_objset.creationInfo = pkg_objset.copy_creation_info(
869 common_objset.doc.creationInfo
870 )
871
872 runtime_spdx_deps = set()
873
874 deps = bb.utils.explode_dep_versions2(localdata.getVar("RDEPENDS") or "")
875 seen_deps = set()
876 for dep, _ in deps.items():
877 if dep in seen_deps:
878 continue
879
880 if dep not in providers:
881 continue
882
883 (dep, _) = providers[dep]
884
885 if not oe.packagedata.packaged(dep, localdata):
886 continue
887
888 dep_pkg_data = oe.packagedata.read_subpkgdata_dict(dep, d)
889 dep_pkg = dep_pkg_data["PKG"]
890
891 if dep in dep_package_cache:
892 dep_spdx_package = dep_package_cache[dep]
893 else:
894 bb.debug(1, "Searching for %s" % dep_pkg)
895 dep_spdx_package, _ = oe.sbom30.find_root_obj_in_jsonld(
896 d,
897 "packages-staging",
898 "package-" + dep_pkg,
899 oe.spdx30.software_Package,
900 software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.install,
901 )
902 dep_package_cache[dep] = dep_spdx_package
903
904 runtime_spdx_deps.add(dep_spdx_package)
905 seen_deps.add(dep)
906
907 if runtime_spdx_deps:
908 pkg_objset.new_scoped_relationship(
909 [spdx_package],
910 oe.spdx30.RelationshipType.dependsOn,
911 oe.spdx30.LifecycleScopeType.runtime,
912 [oe.sbom30.get_element_link_id(dep) for dep in runtime_spdx_deps],
913 )
914
915 oe.sbom30.write_recipe_jsonld_doc(d, pkg_objset, "packages", deploydir)
916
917 oe.sbom30.write_recipe_jsonld_doc(d, common_objset, "common-package", deploydir)
918
919
920def write_bitbake_spdx(d):
921 # Set PN to "bitbake" so that SPDX IDs can be generated
922 d.setVar("PN", "bitbake")
923 d.setVar("BB_TASKHASH", "bitbake")
924 oe.spdx_common.load_spdx_license_data(d)
925
926 deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
927
928 objset = oe.sbom30.ObjectSet.new_objset(d, "bitbake", False)
929
930 host_import_key = d.getVar("SPDX_BUILD_HOST")
931 invoked_by = objset.new_agent("SPDX_INVOKED_BY", add=False)
932 on_behalf_of = objset.new_agent("SPDX_ON_BEHALF_OF", add=False)
933
934 if d.getVar("SPDX_INCLUDE_BITBAKE_PARENT_BUILD") == "1":
935 # Since the Build objects are unique, we may as well set the creation
936 # time to the current time instead of the fallback SDE
937 objset.doc.creationInfo.created = datetime.now(timezone.utc)
938
939 # Each invocation of bitbake should have a unique ID since it is a
940 # unique build
941 nonce = os.urandom(16).hex()
942
943 build = objset.add_root(
944 oe.spdx30.build_Build(
945 _id=objset.new_spdxid(nonce, include_unihash=False),
946 creationInfo=objset.doc.creationInfo,
947 build_buildType=oe.sbom30.SPDX_BUILD_TYPE,
948 )
949 )
950 set_timestamp_now(d, build, "build_buildStartTime")
951
952 if host_import_key:
953 objset.new_scoped_relationship(
954 [build],
955 oe.spdx30.RelationshipType.hasHost,
956 oe.spdx30.LifecycleScopeType.build,
957 [objset.new_import(host_import_key)],
958 )
959
960 if invoked_by:
961 objset.add(invoked_by)
962 invoked_by_spdx = objset.new_scoped_relationship(
963 [build],
964 oe.spdx30.RelationshipType.invokedBy,
965 oe.spdx30.LifecycleScopeType.build,
966 [invoked_by],
967 )
968
969 if on_behalf_of:
970 objset.add(on_behalf_of)
971 objset.new_scoped_relationship(
972 [on_behalf_of],
973 oe.spdx30.RelationshipType.delegatedTo,
974 oe.spdx30.LifecycleScopeType.build,
975 invoked_by_spdx,
976 )
977
978 elif on_behalf_of:
979 bb.warn("SPDX_ON_BEHALF_OF has no effect if SPDX_INVOKED_BY is not set")
980
981 else:
982 if host_import_key:
983 bb.warn(
984 "SPDX_BUILD_HOST has no effect if SPDX_INCLUDE_BITBAKE_PARENT_BUILD is not set"
985 )
986
987 if invoked_by:
988 bb.warn(
989 "SPDX_INVOKED_BY has no effect if SPDX_INCLUDE_BITBAKE_PARENT_BUILD is not set"
990 )
991
992 if on_behalf_of:
993 bb.warn(
994 "SPDX_ON_BEHALF_OF has no effect if SPDX_INCLUDE_BITBAKE_PARENT_BUILD is not set"
995 )
996
997 for obj in objset.foreach_type(oe.spdx30.Element):
998 obj.extension.append(oe.sbom30.OEIdAliasExtension())
999
1000 oe.sbom30.write_jsonld_doc(d, objset, deploy_dir_spdx / "bitbake.spdx.json")
1001
1002
1003def collect_build_package_inputs(d, objset, build, packages, files_by_hash=None):
1004 import oe.sbom30
1005
1006 providers = oe.spdx_common.collect_package_providers(d)
1007
1008 build_deps = set()
1009 missing_providers = set()
1010
1011 for name in sorted(packages.keys()):
1012 if name not in providers:
1013 missing_providers.add(name)
1014 continue
1015
1016 pkg_name, pkg_hashfn = providers[name]
1017
1018 # Copy all of the package SPDX files into the Sbom elements
1019 pkg_spdx, pkg_objset = oe.sbom30.find_root_obj_in_jsonld(
1020 d,
1021 "packages",
1022 "package-" + pkg_name,
1023 oe.spdx30.software_Package,
1024 software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.install,
1025 )
1026 build_deps.add(oe.sbom30.get_element_link_id(pkg_spdx))
1027
1028 if files_by_hash is not None:
1029 for h, f in pkg_objset.by_sha256_hash.items():
1030 files_by_hash.setdefault(h, set()).update(f)
1031
1032 if missing_providers:
1033 bb.fatal(
1034 f"Unable to find SPDX provider(s) for: {', '.join(sorted(missing_providers))}"
1035 )
1036
1037 if build_deps:
1038 objset.new_scoped_relationship(
1039 [build],
1040 oe.spdx30.RelationshipType.hasInput,
1041 oe.spdx30.LifecycleScopeType.build,
1042 sorted(list(build_deps)),
1043 )
1044
1045
1046def create_rootfs_spdx(d):
1047 deploy_dir_spdx = Path(d.getVar("DEPLOY_DIR_SPDX"))
1048 deploydir = Path(d.getVar("SPDXROOTFSDEPLOY"))
1049 root_packages_file = Path(d.getVar("SPDX_ROOTFS_PACKAGES"))
1050 image_basename = d.getVar("IMAGE_BASENAME")
1051 image_rootfs = d.getVar("IMAGE_ROOTFS")
1052 machine = d.getVar("MACHINE")
1053
1054 with root_packages_file.open("r") as f:
1055 packages = json.load(f)
1056
1057 objset = oe.sbom30.ObjectSet.new_objset(
1058 d, "%s-%s-rootfs" % (image_basename, machine)
1059 )
1060
1061 rootfs = objset.add_root(
1062 oe.spdx30.software_Package(
1063 _id=objset.new_spdxid("rootfs", image_basename),
1064 creationInfo=objset.doc.creationInfo,
1065 name=image_basename,
1066 software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.archive,
1067 )
1068 )
1069 set_timestamp_now(d, rootfs, "builtTime")
1070
1071 rootfs_build = objset.add_root(objset.new_task_build("rootfs", "rootfs"))
1072 set_timestamp_now(d, rootfs_build, "build_buildEndTime")
1073
1074 objset.new_scoped_relationship(
1075 [rootfs_build],
1076 oe.spdx30.RelationshipType.hasOutput,
1077 oe.spdx30.LifecycleScopeType.build,
1078 [rootfs],
1079 )
1080
1081 files_by_hash = {}
1082 collect_build_package_inputs(d, objset, rootfs_build, packages, files_by_hash)
1083
1084 files = set()
1085 for dirpath, dirnames, filenames in os.walk(image_rootfs, onerror=walk_error):
1086 dirnames.sort()
1087 filenames.sort()
1088 for fn in filenames:
1089 fpath = Path(dirpath) / fn
1090 if fpath.is_symlink() or not fpath.is_file():
1091 continue
1092
1093 relpath = str(fpath.relative_to(image_rootfs))
1094 h = bb.utils.sha256_file(fpath)
1095
1096 found = False
1097 if h in files_by_hash:
1098 for f in files_by_hash[h]:
1099 if isinstance(f, oe.spdx30.software_File) and f.name == relpath:
1100 files.add(oe.sbom30.get_element_link_id(f))
1101 found = True
1102 break
1103
1104 if not found:
1105 files.add(
1106 objset.new_file(
1107 objset.new_spdxid("rootfs-file", relpath),
1108 relpath,
1109 fpath,
1110 )
1111 )
1112
1113 if files:
1114 objset.new_relationship(
1115 [rootfs],
1116 oe.spdx30.RelationshipType.contains,
1117 sorted(list(files)),
1118 )
1119
1120 oe.sbom30.write_recipe_jsonld_doc(d, objset, "rootfs", deploydir)
1121
1122
1123def create_image_spdx(d):
1124 import oe.sbom30
1125
1126 image_deploy_dir = Path(d.getVar("IMGDEPLOYDIR"))
1127 manifest_path = Path(d.getVar("IMAGE_OUTPUT_MANIFEST"))
1128 spdx_work_dir = Path(d.getVar("SPDXIMAGEWORK"))
1129
1130 image_basename = d.getVar("IMAGE_BASENAME")
1131 machine = d.getVar("MACHINE")
1132
1133 objset = oe.sbom30.ObjectSet.new_objset(
1134 d, "%s-%s-image" % (image_basename, machine)
1135 )
1136
1137 with manifest_path.open("r") as f:
1138 manifest = json.load(f)
1139
1140 builds = []
1141 for task in manifest:
1142 imagetype = task["imagetype"]
1143 taskname = task["taskname"]
1144
1145 image_build = objset.add_root(
1146 objset.new_task_build(taskname, "image/%s" % imagetype)
1147 )
1148 set_timestamp_now(d, image_build, "build_buildEndTime")
1149 builds.append(image_build)
1150
1151 artifacts = []
1152
1153 for image in task["images"]:
1154 image_filename = image["filename"]
1155 image_path = image_deploy_dir / image_filename
1156 if os.path.isdir(image_path):
1157 a = add_package_files(
1158 d,
1159 objset,
1160 image_path,
1161 lambda file_counter: objset.new_spdxid(
1162 "imagefile", str(file_counter)
1163 ),
1164 lambda filepath: [],
1165 license_data=None,
1166 ignore_dirs=[],
1167 ignore_top_level_dirs=[],
1168 archive=None,
1169 )
1170 artifacts.extend(a)
1171 else:
1172 a = objset.add_root(
1173 oe.spdx30.software_File(
1174 _id=objset.new_spdxid("image", image_filename),
1175 creationInfo=objset.doc.creationInfo,
1176 name=image_filename,
1177 verifiedUsing=[
1178 oe.spdx30.Hash(
1179 algorithm=oe.spdx30.HashAlgorithm.sha256,
1180 hashValue=bb.utils.sha256_file(image_path),
1181 )
1182 ],
1183 )
1184 )
1185
1186 artifacts.append(a)
1187
1188 for a in artifacts:
1189 set_purposes(
1190 d, a, "SPDX_IMAGE_PURPOSE:%s" % imagetype, "SPDX_IMAGE_PURPOSE"
1191 )
1192
1193 set_timestamp_now(d, a, "builtTime")
1194
1195
1196 if artifacts:
1197 objset.new_scoped_relationship(
1198 [image_build],
1199 oe.spdx30.RelationshipType.hasOutput,
1200 oe.spdx30.LifecycleScopeType.build,
1201 artifacts,
1202 )
1203
1204 if builds:
1205 rootfs_image, _ = oe.sbom30.find_root_obj_in_jsonld(
1206 d,
1207 "rootfs",
1208 "%s-%s-rootfs" % (image_basename, machine),
1209 oe.spdx30.software_Package,
1210 # TODO: Should use a purpose to filter here?
1211 )
1212 objset.new_scoped_relationship(
1213 builds,
1214 oe.spdx30.RelationshipType.hasInput,
1215 oe.spdx30.LifecycleScopeType.build,
1216 [oe.sbom30.get_element_link_id(rootfs_image)],
1217 )
1218
1219 objset.add_aliases()
1220 objset.link()
1221 oe.sbom30.write_recipe_jsonld_doc(d, objset, "image", spdx_work_dir)
1222
1223
1224def create_image_sbom_spdx(d):
1225 import oe.sbom30
1226
1227 image_name = d.getVar("IMAGE_NAME")
1228 image_basename = d.getVar("IMAGE_BASENAME")
1229 image_link_name = d.getVar("IMAGE_LINK_NAME")
1230 imgdeploydir = Path(d.getVar("SPDXIMAGEDEPLOYDIR"))
1231 machine = d.getVar("MACHINE")
1232
1233 spdx_path = imgdeploydir / (image_name + ".spdx.json")
1234
1235 root_elements = []
1236
1237 # TODO: Do we need to add the rootfs or are the image files sufficient?
1238 rootfs_image, _ = oe.sbom30.find_root_obj_in_jsonld(
1239 d,
1240 "rootfs",
1241 "%s-%s-rootfs" % (image_basename, machine),
1242 oe.spdx30.software_Package,
1243 # TODO: Should use a purpose here?
1244 )
1245 root_elements.append(oe.sbom30.get_element_link_id(rootfs_image))
1246
1247 image_objset, _ = oe.sbom30.find_jsonld(
1248 d, "image", "%s-%s-image" % (image_basename, machine), required=True
1249 )
1250 for o in image_objset.foreach_root(oe.spdx30.software_File):
1251 root_elements.append(oe.sbom30.get_element_link_id(o))
1252
1253 objset, sbom = oe.sbom30.create_sbom(d, image_name, root_elements)
1254
1255 oe.sbom30.write_jsonld_doc(d, objset, spdx_path)
1256
1257 def make_image_link(target_path, suffix):
1258 if image_link_name:
1259 link = imgdeploydir / (image_link_name + suffix)
1260 if link != target_path:
1261 link.symlink_to(os.path.relpath(target_path, link.parent))
1262
1263 make_image_link(spdx_path, ".spdx.json")
1264
1265
1266def sdk_create_spdx(d, sdk_type, spdx_work_dir, toolchain_outputname):
1267 sdk_name = toolchain_outputname + "-" + sdk_type
1268 sdk_packages = oe.sdk.sdk_list_installed_packages(d, sdk_type == "target")
1269
1270 objset = oe.sbom30.ObjectSet.new_objset(d, sdk_name)
1271
1272 sdk_rootfs = objset.add_root(
1273 oe.spdx30.software_Package(
1274 _id=objset.new_spdxid("sdk-rootfs", sdk_name),
1275 creationInfo=objset.doc.creationInfo,
1276 name=sdk_name,
1277 software_primaryPurpose=oe.spdx30.software_SoftwarePurpose.archive,
1278 )
1279 )
1280 set_timestamp_now(d, sdk_rootfs, "builtTime")
1281
1282 sdk_build = objset.add_root(objset.new_task_build("sdk-rootfs", "sdk-rootfs"))
1283 set_timestamp_now(d, sdk_build, "build_buildEndTime")
1284
1285 objset.new_scoped_relationship(
1286 [sdk_build],
1287 oe.spdx30.RelationshipType.hasOutput,
1288 oe.spdx30.LifecycleScopeType.build,
1289 [sdk_rootfs],
1290 )
1291
1292 collect_build_package_inputs(d, objset, sdk_build, sdk_packages)
1293
1294 objset.add_aliases()
1295 oe.sbom30.write_jsonld_doc(d, objset, spdx_work_dir / "sdk-rootfs.spdx.json")
1296
1297
1298def create_sdk_sbom(d, sdk_deploydir, spdx_work_dir, toolchain_outputname):
1299 # Load the document written earlier
1300 rootfs_objset = oe.sbom30.load_jsonld(
1301 d, spdx_work_dir / "sdk-rootfs.spdx.json", required=True
1302 )
1303
1304 # Create a new build for the SDK installer
1305 sdk_build = rootfs_objset.new_task_build("sdk-populate", "sdk-populate")
1306 set_timestamp_now(d, sdk_build, "build_buildEndTime")
1307
1308 rootfs = rootfs_objset.find_root(oe.spdx30.software_Package)
1309 if rootfs is None:
1310 bb.fatal("Unable to find rootfs artifact")
1311
1312 rootfs_objset.new_scoped_relationship(
1313 [sdk_build],
1314 oe.spdx30.RelationshipType.hasInput,
1315 oe.spdx30.LifecycleScopeType.build,
1316 [rootfs],
1317 )
1318
1319 files = set()
1320 root_files = []
1321
1322 # NOTE: os.walk() doesn't return symlinks
1323 for dirpath, dirnames, filenames in os.walk(sdk_deploydir, onerror=walk_error):
1324 dirnames.sort()
1325 filenames.sort()
1326 for fn in filenames:
1327 fpath = Path(dirpath) / fn
1328 if not fpath.is_file() or fpath.is_symlink():
1329 continue
1330
1331 relpath = str(fpath.relative_to(sdk_deploydir))
1332
1333 f = rootfs_objset.new_file(
1334 rootfs_objset.new_spdxid("sdk-installer", relpath),
1335 relpath,
1336 fpath,
1337 )
1338 set_timestamp_now(d, f, "builtTime")
1339
1340 if fn.endswith(".manifest"):
1341 f.software_primaryPurpose = oe.spdx30.software_SoftwarePurpose.manifest
1342 elif fn.endswith(".testdata.json"):
1343 f.software_primaryPurpose = (
1344 oe.spdx30.software_SoftwarePurpose.configuration
1345 )
1346 else:
1347 set_purposes(d, f, "SPDX_SDK_PURPOSE")
1348 root_files.append(f)
1349
1350 files.add(f)
1351
1352 if files:
1353 rootfs_objset.new_scoped_relationship(
1354 [sdk_build],
1355 oe.spdx30.RelationshipType.hasOutput,
1356 oe.spdx30.LifecycleScopeType.build,
1357 files,
1358 )
1359 else:
1360 bb.warn(f"No SDK output files found in {sdk_deploydir}")
1361
1362 objset, sbom = oe.sbom30.create_sbom(
1363 d, toolchain_outputname, sorted(list(files)), [rootfs_objset]
1364 )
1365
1366 oe.sbom30.write_jsonld_doc(
1367 d, objset, sdk_deploydir / (toolchain_outputname + ".spdx.json")
1368 )