diff options
| author | Luis Augenstein <luis.augenstein@tngtech.com> | 2026-05-18 08:20:55 +0200 |
|---|---|---|
| committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2026-05-22 13:14:41 +0200 |
| commit | a68a29a1cc3ae6c129acdf945964dea16c8a49dc (patch) | |
| tree | f42f59c81770b158ef2d7a728aea47337384cb2f | |
| parent | 06f4e57165caf3012876b211cb687ba802188aff (diff) | |
| download | linux-next-history-a68a29a1cc3ae6c129acdf945964dea16c8a49dc.tar.gz | |
scripts/sbom: add JSON-LD serialization
Add infrastructure to serialize an SPDX graph as a JSON-LD
document. NamespaceMaps in the SPDX document are converted
to custom prefixes in the @context field of the JSON-LD output.
The SBOM tool uses NamespaceMaps solely to shorten SPDX IDs,
avoiding repetition of full namespace URIs by using short prefixes.
Assisted-by: Cursor:claude-sonnet-4-5
Assisted-by: OpenCode:GLM-4-7
Co-developed-by: Maximilian Huber <maximilian.huber@tngtech.com>
Signed-off-by: Maximilian Huber <maximilian.huber@tngtech.com>
Signed-off-by: Luis Augenstein <luis.augenstein@tngtech.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
| -rw-r--r-- | Makefile | 3 | ||||
| -rw-r--r-- | scripts/sbom/sbom.py | 56 | ||||
| -rw-r--r-- | scripts/sbom/sbom/config.py | 56 | ||||
| -rw-r--r-- | scripts/sbom/sbom/spdx_graph/__init__.py | 7 | ||||
| -rw-r--r-- | scripts/sbom/sbom/spdx_graph/build_spdx_graphs.py | 36 | ||||
| -rw-r--r-- | scripts/sbom/sbom/spdx_graph/spdx_graph_model.py | 36 |
6 files changed, 193 insertions, 1 deletions
diff --git a/Makefile b/Makefile index 4c6133af55496..2443d4c824548 100644 --- a/Makefile +++ b/Makefile @@ -2212,7 +2212,8 @@ quiet_cmd_sbom = GEN $(sbom_targets) --src-tree $(abspath $(srctree)) \ --obj-tree $(abspath $(objtree)) \ --roots-file "$(tmp-target)" \ - --output-directory $(abspath $(objtree)); + --output-directory $(abspath $(objtree)) \ + --generate-spdx; PHONY += sbom sbom: $(notdir $(KBUILD_IMAGE)) include/generated/autoconf.h $(if $(CONFIG_MODULES),modules modules.order) $(call cmd,sbom) diff --git a/scripts/sbom/sbom.py b/scripts/sbom/sbom.py index d700e4f294f76..764175b9c8933 100644 --- a/scripts/sbom/sbom.py +++ b/scripts/sbom/sbom.py @@ -6,13 +6,18 @@ Compute software bill of materials in SPDX format describing a kernel build. """ +import json import logging import os import sys import time +import uuid import sbom.sbom_logging as sbom_logging from sbom.config import get_config from sbom.path_utils import is_relative_to +from sbom.spdx import JsonLdSpdxDocument, SpdxIdGenerator +from sbom.spdx.core import CreationInfo, SpdxDocument +from sbom.spdx_graph import SpdxIdGeneratorCollection, build_spdx_graphs from sbom.cmd_graph import CmdGraph @@ -71,6 +76,57 @@ def main(): f.write("\n".join(str(file_path) for file_path in used_files)) logging.debug(f"Successfully saved {used_files_path}") + if config.generate_spdx is False: + _exit_with_summary(config.write_output_on_error) + return + + # Build SPDX Documents + logging.debug("Start generating SPDX graph based on cmd graph") + start_time = time.time() + + # The real uuid will be generated based on the content of the SPDX graphs + # to ensure that the same SPDX document is always assigned the same uuid. + PLACEHOLDER_UUID = "00000000-0000-0000-0000-000000000000" + spdx_id_base_namespace = f"{config.spdxId_prefix}{PLACEHOLDER_UUID}/" + spdx_id_generators = SpdxIdGeneratorCollection( + base=SpdxIdGenerator(prefix="p", namespace=spdx_id_base_namespace), + source=SpdxIdGenerator(prefix="s", namespace=f"{spdx_id_base_namespace}source/"), + build=SpdxIdGenerator(prefix="b", namespace=f"{spdx_id_base_namespace}build/"), + output=SpdxIdGenerator(prefix="o", namespace=f"{spdx_id_base_namespace}output/"), + ) + + spdx_graphs = build_spdx_graphs( + cmd_graph, + spdx_id_generators, + config, + ) + spdx_id_uuid = uuid.uuid5( + uuid.NAMESPACE_URL, + "".join( + json.dumps(element.to_dict()) for spdx_graph in spdx_graphs.values() for element in spdx_graph.to_list() + ), + ) + logging.debug(f"Generated SPDX graph in {time.time() - start_time} seconds") + + if not sbom_logging.has_errors() or config.write_output_on_error: + for kernel_sbom_kind, spdx_graph in spdx_graphs.items(): + spdx_graph_objects = spdx_graph.to_list() + # Add warning and error summary to creation info comment + creation_info = next(element for element in spdx_graph_objects if isinstance(element, CreationInfo)) + creation_info.comment = "\n".join([ + sbom_logging.summarize_warnings(), + sbom_logging.summarize_errors(), + ]).strip() + # Replace Placeholder uuid with real uuid for spdxIds + spdx_document = next(element for element in spdx_graph_objects if isinstance(element, SpdxDocument)) + for namespaceMap in spdx_document.namespaceMap: + namespaceMap.namespace = namespaceMap.namespace.replace(PLACEHOLDER_UUID, str(spdx_id_uuid)) + # Serialize SPDX graph to JSON-LD + spdx_doc = JsonLdSpdxDocument(graph=spdx_graph_objects) + save_path = os.path.join(config.output_directory, config.spdx_file_names[kernel_sbom_kind]) + spdx_doc.save(save_path, config.prettify_json) + logging.debug(f"Successfully saved {save_path}") + _exit_with_summary(config.write_output_on_error) diff --git a/scripts/sbom/sbom/config.py b/scripts/sbom/sbom/config.py index b8c1a2b404dfc..98c7d939364d7 100644 --- a/scripts/sbom/sbom/config.py +++ b/scripts/sbom/sbom/config.py @@ -3,11 +3,18 @@ import argparse from dataclasses import dataclass +from enum import Enum import os from typing import Any from sbom.path_utils import PathStr +class KernelSpdxDocumentKind(Enum): + SOURCE = "source" + BUILD = "build" + OUTPUT = "output" + + @dataclass class KernelSbomConfig: src_tree: PathStr @@ -19,6 +26,13 @@ class KernelSbomConfig: root_paths: list[PathStr] """List of paths to root outputs (relative to obj_tree) to base the SBOM on.""" + generate_spdx: bool + """Whether to generate SPDX SBOM documents. If False, no SPDX files are created.""" + + spdx_file_names: dict[KernelSpdxDocumentKind, str] + """If `generate_spdx` is True, defines the file names for each SPDX SBOM kind + (source, build, output) to store on disk.""" + generate_used_files: bool """Whether to generate a flat list of all source files used in the build. If False, no used-files document is created.""" @@ -38,6 +52,12 @@ class KernelSbomConfig: write_output_on_error: bool """Whether to write output documents even if errors occur.""" + spdxId_prefix: str + """Prefix to use for all SPDX element IDs.""" + + prettify_json: bool + """Whether to pretty-print generated SPDX JSON documents.""" + def _parse_cli_arguments(parser: argparse.ArgumentParser) -> dict[str, Any]: """ @@ -68,6 +88,15 @@ def _parse_cli_arguments(parser: argparse.ArgumentParser) -> dict[str, Any]: help="Path to a file containing the root paths (one per line). Cannot be used together with --roots.", ) parser.add_argument( + "--generate-spdx", + action="store_true", + default=False, + help=( + "Whether to create sbom-source.spdx.json, sbom-build.spdx.json and " + "sbom-output.spdx.json documents (default: False)" + ), + ) + parser.add_argument( "--generate-used-files", action="store_true", default=False, @@ -114,6 +143,20 @@ def _parse_cli_arguments(parser: argparse.ArgumentParser) -> dict[str, Any]: ), ) + # SPDX specific options + spdx_group = parser.add_argument_group("SPDX options", "Options for customizing SPDX document generation") + spdx_group.add_argument( + "--spdxId-prefix", + default="urn:spdx.dev:", + help="The prefix to use for all spdxId properties. (default: urn:spdx.dev:)", + ) + spdx_group.add_argument( + "--prettify-json", + action="store_true", + default=False, + help="Whether to pretty print the generated spdx.json documents (default: False)", + ) + args = vars(parser.parse_args()) return args @@ -144,6 +187,7 @@ def get_config() -> KernelSbomConfig: root_paths = args["roots"] _validate_path_arguments(parser, src_tree, obj_tree, root_paths) + generate_spdx = args["generate_spdx"] generate_used_files = args["generate_used_files"] output_directory = os.path.realpath(args["output_directory"]) debug = args["debug"] @@ -151,19 +195,31 @@ def get_config() -> KernelSbomConfig: fail_on_unknown_build_command = not args["do_not_fail_on_unknown_build_command"] write_output_on_error = args["write_output_on_error"] + spdxId_prefix = args["spdxId_prefix"] + prettify_json = args["prettify_json"] + # Hardcoded config + spdx_file_names = { + KernelSpdxDocumentKind.SOURCE: "sbom-source.spdx.json", + KernelSpdxDocumentKind.BUILD: "sbom-build.spdx.json", + KernelSpdxDocumentKind.OUTPUT: "sbom-output.spdx.json", + } used_files_file_name = "sbom.used-files.txt" return KernelSbomConfig( src_tree=src_tree, obj_tree=obj_tree, root_paths=root_paths, + generate_spdx=generate_spdx, + spdx_file_names=spdx_file_names, generate_used_files=generate_used_files, used_files_file_name=used_files_file_name, output_directory=output_directory, debug=debug, fail_on_unknown_build_command=fail_on_unknown_build_command, write_output_on_error=write_output_on_error, + spdxId_prefix=spdxId_prefix, + prettify_json=prettify_json, ) diff --git a/scripts/sbom/sbom/spdx_graph/__init__.py b/scripts/sbom/sbom/spdx_graph/__init__.py new file mode 100644 index 0000000000000..3557b1d51bf93 --- /dev/null +++ b/scripts/sbom/sbom/spdx_graph/__init__.py @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from .build_spdx_graphs import build_spdx_graphs +from .spdx_graph_model import SpdxIdGeneratorCollection + +__all__ = ["build_spdx_graphs", "SpdxIdGeneratorCollection"] diff --git a/scripts/sbom/sbom/spdx_graph/build_spdx_graphs.py b/scripts/sbom/sbom/spdx_graph/build_spdx_graphs.py new file mode 100644 index 0000000000000..bb3db4e423da4 --- /dev/null +++ b/scripts/sbom/sbom/spdx_graph/build_spdx_graphs.py @@ -0,0 +1,36 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + + +from typing import Protocol + +from sbom.config import KernelSpdxDocumentKind +from sbom.cmd_graph import CmdGraph +from sbom.path_utils import PathStr +from sbom.spdx_graph.spdx_graph_model import SpdxGraph, SpdxIdGeneratorCollection + + +class SpdxGraphConfig(Protocol): + obj_tree: PathStr + src_tree: PathStr + + +def build_spdx_graphs( + cmd_graph: CmdGraph, + spdx_id_generators: SpdxIdGeneratorCollection, + config: SpdxGraphConfig, +) -> dict[KernelSpdxDocumentKind, SpdxGraph]: + """ + Builds SPDX graphs (output, source, and build) based on a cmd dependency graph. + If the source and object trees are identical, no dedicated source graph can be created. + In that case the source files are added to the build graph instead. + + Args: + cmd_graph: The dependency graph of a kernel build. + spdx_id_generators: Collection of SPDX ID generators. + config: Configuration options. + + Returns: + Dictionary of SPDX graphs + """ + return {} diff --git a/scripts/sbom/sbom/spdx_graph/spdx_graph_model.py b/scripts/sbom/sbom/spdx_graph/spdx_graph_model.py new file mode 100644 index 0000000000000..682194d4362a2 --- /dev/null +++ b/scripts/sbom/sbom/spdx_graph/spdx_graph_model.py @@ -0,0 +1,36 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from dataclasses import dataclass +from sbom.spdx.core import CreationInfo, SoftwareAgent, SpdxDocument, SpdxObject +from sbom.spdx.software import Sbom +from sbom.spdx.spdxId import SpdxIdGenerator + + +@dataclass +class SpdxGraph: + """Represents the complete graph of a single SPDX document.""" + + spdx_document: SpdxDocument + agent: SoftwareAgent + creation_info: CreationInfo + sbom: Sbom + + def to_list(self) -> list[SpdxObject]: + return [ + self.spdx_document, + self.agent, + self.creation_info, + self.sbom, + *self.sbom.element, + ] + + +@dataclass +class SpdxIdGeneratorCollection: + """Holds SPDX ID generators for different document types to ensure globally unique SPDX IDs.""" + + base: SpdxIdGenerator + source: SpdxIdGenerator + build: SpdxIdGenerator + output: SpdxIdGenerator |
