aboutsummaryrefslogtreecommitdiffstats
path: root/scripts
diff options
authorLinus Torvalds <torvalds@linux-foundation.org>2026-06-22 12:06:22 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2026-06-22 12:06:22 -0700
commite4b4bfaa5090760925b98848aa3e0fc10b3c574f (patch)
tree1a67add78f7c9734602fa2816644a3db22ae86a0 /scripts
parent8a500fd09385a13ba598cda651f2e4ac40bfa578 (diff)
parent880bae5f1269b4d81bb2a254963e84377cd37bc1 (diff)
downloadath-e4b4bfaa5090760925b98848aa3e0fc10b3c574f.tar.gz
Merge tag 'spdx-7.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/spdx
Pull SPDX updates from Greg KH: "Here is a "big" set of SPDX-like patches for 7.2-rc1. It is the addition of the ability for the kernel build process to generate a Software Bill of Materials (SBOM) in the SPDX format, that matches up exactly with just the files that are actually built for the specific kernel image generated. To generate a sbom, after the kernel has been built, just do: make sbom and marvel at the JSON file that is generated... This is needed by users for environments in which a SBOM is required (medical, automotive, anything shipped in the EU, etc.) and cuts down by a massive size the "naive" SBOM solution that many vendors have done by just including _all_ of the kernel files in the resulting document. This result is still a giant JSON file, that I am told parses properly, so we just have to trust that it is properly inclusive as attempting to parse that thing by hand is impossible. The scripts here are self-contained python scripts, no additional libraries or tools to create the SBOM are needed, which is important for many build systems. Overall it's just a bit over 4000 lines of "simple" python code, the most complex part is the regex matching lines, but those are nothing compared to what we maintain in scripts/checkpatch.pl today... The various parts where the tool touches the kbuild subsystem have been acked by the kbuild maintainer, so all should be good here. All of these patches have been in linux-next for weeks with no reported problems" * tag 'spdx-7.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/spdx: scripts/sbom: add unit tests for SPDX-License-Identifier parsing scripts/sbom: add unit tests for command parsers scripts/sbom: add SPDX build graph scripts/sbom: add SPDX source graph scripts/sbom: add SPDX output graph scripts/sbom: collect file metadata scripts/sbom: add shared SPDX elements scripts/sbom: add JSON-LD serialization scripts/sbom: add SPDX classes scripts/sbom: add additional dependency sources for cmd graph scripts/sbom: add cmd graph generation scripts/sbom: add command parsers scripts/sbom: setup sbom logging scripts/sbom: integrate script in make process scripts/sbom: add documentation
Diffstat (limited to 'scripts')
-rw-r--r--scripts/sbom/sbom.py135
-rw-r--r--scripts/sbom/sbom/__init__.py0
-rw-r--r--scripts/sbom/sbom/cmd_graph/__init__.py7
-rw-r--r--scripts/sbom/sbom/cmd_graph/cmd_file.py162
-rw-r--r--scripts/sbom/sbom/cmd_graph/cmd_graph.py46
-rw-r--r--scripts/sbom/sbom/cmd_graph/cmd_graph_node.py142
-rw-r--r--scripts/sbom/sbom/cmd_graph/deps_parser.py52
-rw-r--r--scripts/sbom/sbom/cmd_graph/hardcoded_dependencies.py87
-rw-r--r--scripts/sbom/sbom/cmd_graph/incbin_parser.py42
-rw-r--r--scripts/sbom/sbom/cmd_graph/savedcmd_parser/__init__.py6
-rw-r--r--scripts/sbom/sbom/cmd_graph/savedcmd_parser/command_parser_registry.py516
-rw-r--r--scripts/sbom/sbom/cmd_graph/savedcmd_parser/command_splitter.py128
-rw-r--r--scripts/sbom/sbom/cmd_graph/savedcmd_parser/savedcmd_parser.py67
-rw-r--r--scripts/sbom/sbom/cmd_graph/savedcmd_parser/tokenizer.py92
-rw-r--r--scripts/sbom/sbom/config.py320
-rw-r--r--scripts/sbom/sbom/environment.py192
-rw-r--r--scripts/sbom/sbom/path_utils.py22
-rw-r--r--scripts/sbom/sbom/sbom_logging.py94
-rw-r--r--scripts/sbom/sbom/spdx/__init__.py7
-rw-r--r--scripts/sbom/sbom/spdx/build.py17
-rw-r--r--scripts/sbom/sbom/spdx/core.py170
-rw-r--r--scripts/sbom/sbom/spdx/serialization.py62
-rw-r--r--scripts/sbom/sbom/spdx/simplelicensing.py20
-rw-r--r--scripts/sbom/sbom/spdx/software.py69
-rw-r--r--scripts/sbom/sbom/spdx/spdxId.py36
-rw-r--r--scripts/sbom/sbom/spdx_graph/__init__.py7
-rw-r--r--scripts/sbom/sbom/spdx_graph/build_spdx_graphs.py83
-rw-r--r--scripts/sbom/sbom/spdx_graph/kernel_file.py315
-rw-r--r--scripts/sbom/sbom/spdx_graph/shared_spdx_elements.py32
-rw-r--r--scripts/sbom/sbom/spdx_graph/spdx_build_graph.py318
-rw-r--r--scripts/sbom/sbom/spdx_graph/spdx_graph_model.py36
-rw-r--r--scripts/sbom/sbom/spdx_graph/spdx_output_graph.py187
-rw-r--r--scripts/sbom/sbom/spdx_graph/spdx_source_graph.py130
-rw-r--r--scripts/sbom/tests/__init__.py0
-rw-r--r--scripts/sbom/tests/cmd_graph/__init__.py0
-rw-r--r--scripts/sbom/tests/cmd_graph/test_savedcmd_parser.py443
-rw-r--r--scripts/sbom/tests/spdx_graph/__init__.py0
-rw-r--r--scripts/sbom/tests/spdx_graph/test_kernel_file.py35
38 files changed, 4077 insertions, 0 deletions
diff --git a/scripts/sbom/sbom.py b/scripts/sbom/sbom.py
new file mode 100644
index 0000000000000..764175b9c8933
--- /dev/null
+++ b/scripts/sbom/sbom.py
@@ -0,0 +1,135 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+"""
+Compute software bill of materials in SPDX format describing a kernel build.
+"""
+
+import json
+import logging
+import os
+import sys
+import time
+import uuid
+import sbom.sbom_logging as sbom_logging
+from sbom.config import get_config
+from sbom.path_utils import is_relative_to
+from sbom.spdx import JsonLdSpdxDocument, SpdxIdGenerator
+from sbom.spdx.core import CreationInfo, SpdxDocument
+from sbom.spdx_graph import SpdxIdGeneratorCollection, build_spdx_graphs
+from sbom.cmd_graph import CmdGraph
+
+
+def _exit_with_summary(write_output_on_error: bool = False) -> None:
+ warning_summary = sbom_logging.summarize_warnings()
+ error_summary = sbom_logging.summarize_errors()
+ if warning_summary:
+ logging.warning(warning_summary)
+ if error_summary:
+ logging.error(error_summary)
+ if not write_output_on_error:
+ logging.info(
+ "Use --write-output-on-error to generate output documents even when errors occur. "
+ "Note that in this case the generated documents may be incomplete."
+ )
+ sys.exit(1)
+
+
+def main():
+ # Read config
+ config = get_config()
+
+ # Configure logging
+ logging.basicConfig(
+ level=logging.DEBUG if config.debug else logging.INFO,
+ format="[%(levelname)s] %(message)s",
+ )
+
+ # Build cmd graph
+ logging.debug("Start building cmd graph")
+ start_time = time.time()
+ cmd_graph = CmdGraph.create(config.root_paths, config)
+ logging.debug(f"Built cmd graph in {time.time() - start_time} seconds")
+
+ # Save used files document
+ if config.generate_used_files:
+ if config.src_tree == config.obj_tree:
+ logging.info(
+ f"Extracting all files from the cmd graph to {config.used_files_file_name} "
+ "instead of only source files because source files cannot be "
+ "reliably classified when the source and object trees are identical.",
+ )
+ used_files = [os.path.relpath(node.absolute_path, config.src_tree) for node in cmd_graph]
+ logging.debug(f"Found {len(used_files)} files in cmd graph.")
+ else:
+ used_files = [
+ os.path.relpath(node.absolute_path, config.src_tree)
+ for node in cmd_graph
+ if is_relative_to(node.absolute_path, config.src_tree)
+ and not is_relative_to(node.absolute_path, config.obj_tree)
+ ]
+ logging.debug(f"Found {len(used_files)} source files in cmd graph")
+ if not sbom_logging.has_errors() or config.write_output_on_error:
+ used_files_path = os.path.join(config.output_directory, config.used_files_file_name)
+ with open(used_files_path, "w", encoding="utf-8") as f:
+ f.write("\n".join(str(file_path) for file_path in used_files))
+ logging.debug(f"Successfully saved {used_files_path}")
+
+ if config.generate_spdx is False:
+ _exit_with_summary(config.write_output_on_error)
+ return
+
+ # Build SPDX Documents
+ logging.debug("Start generating SPDX graph based on cmd graph")
+ start_time = time.time()
+
+ # The real uuid will be generated based on the content of the SPDX graphs
+ # to ensure that the same SPDX document is always assigned the same uuid.
+ PLACEHOLDER_UUID = "00000000-0000-0000-0000-000000000000"
+ spdx_id_base_namespace = f"{config.spdxId_prefix}{PLACEHOLDER_UUID}/"
+ spdx_id_generators = SpdxIdGeneratorCollection(
+ base=SpdxIdGenerator(prefix="p", namespace=spdx_id_base_namespace),
+ source=SpdxIdGenerator(prefix="s", namespace=f"{spdx_id_base_namespace}source/"),
+ build=SpdxIdGenerator(prefix="b", namespace=f"{spdx_id_base_namespace}build/"),
+ output=SpdxIdGenerator(prefix="o", namespace=f"{spdx_id_base_namespace}output/"),
+ )
+
+ spdx_graphs = build_spdx_graphs(
+ cmd_graph,
+ spdx_id_generators,
+ config,
+ )
+ spdx_id_uuid = uuid.uuid5(
+ uuid.NAMESPACE_URL,
+ "".join(
+ json.dumps(element.to_dict()) for spdx_graph in spdx_graphs.values() for element in spdx_graph.to_list()
+ ),
+ )
+ logging.debug(f"Generated SPDX graph in {time.time() - start_time} seconds")
+
+ if not sbom_logging.has_errors() or config.write_output_on_error:
+ for kernel_sbom_kind, spdx_graph in spdx_graphs.items():
+ spdx_graph_objects = spdx_graph.to_list()
+ # Add warning and error summary to creation info comment
+ creation_info = next(element for element in spdx_graph_objects if isinstance(element, CreationInfo))
+ creation_info.comment = "\n".join([
+ sbom_logging.summarize_warnings(),
+ sbom_logging.summarize_errors(),
+ ]).strip()
+ # Replace Placeholder uuid with real uuid for spdxIds
+ spdx_document = next(element for element in spdx_graph_objects if isinstance(element, SpdxDocument))
+ for namespaceMap in spdx_document.namespaceMap:
+ namespaceMap.namespace = namespaceMap.namespace.replace(PLACEHOLDER_UUID, str(spdx_id_uuid))
+ # Serialize SPDX graph to JSON-LD
+ spdx_doc = JsonLdSpdxDocument(graph=spdx_graph_objects)
+ save_path = os.path.join(config.output_directory, config.spdx_file_names[kernel_sbom_kind])
+ spdx_doc.save(save_path, config.prettify_json)
+ logging.debug(f"Successfully saved {save_path}")
+
+ _exit_with_summary(config.write_output_on_error)
+
+
+# Call main method
+if __name__ == "__main__":
+ main()
diff --git a/scripts/sbom/sbom/__init__.py b/scripts/sbom/sbom/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
--- /dev/null
+++ b/scripts/sbom/sbom/__init__.py
diff --git a/scripts/sbom/sbom/cmd_graph/__init__.py b/scripts/sbom/sbom/cmd_graph/__init__.py
new file mode 100644
index 0000000000000..9d661a5c3d93f
--- /dev/null
+++ b/scripts/sbom/sbom/cmd_graph/__init__.py
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+from .cmd_graph import CmdGraph
+from .cmd_graph_node import CmdGraphNode, CmdGraphNodeConfig
+
+__all__ = ["CmdGraph", "CmdGraphNode", "CmdGraphNodeConfig"]
diff --git a/scripts/sbom/sbom/cmd_graph/cmd_file.py b/scripts/sbom/sbom/cmd_graph/cmd_file.py
new file mode 100644
index 0000000000000..dcd63e284a38c
--- /dev/null
+++ b/scripts/sbom/sbom/cmd_graph/cmd_file.py
@@ -0,0 +1,162 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+import os
+import re
+from dataclasses import dataclass, field
+from sbom.cmd_graph.deps_parser import parse_cmd_file_deps
+from sbom.cmd_graph.savedcmd_parser import parse_inputs_from_commands
+import sbom.sbom_logging as sbom_logging
+from sbom.path_utils import PathStr
+
+SAVEDCMD_PATTERN = re.compile(r"^(saved)?cmd_.*?:=\s*(?P<full_command>.+)$")
+SOURCE_PATTERN = re.compile(r"^source.*?:=\s*(?P<source_file>.+)$")
+
+
+@dataclass
+class CmdFile:
+ cmd_file_path: PathStr
+ savedcmd: str
+ source: PathStr | None = None
+ deps: list[str] = field(default_factory=list)
+ make_rules: list[str] = field(default_factory=list)
+
+ @classmethod
+ def create(cls, cmd_file_path: PathStr) -> "CmdFile | None":
+ """
+ Parses a .cmd file.
+ .cmd files are assumed to have one of the following structures:
+ 1. Full Cmd File
+ (saved)?cmd_<output> := <command>
+ source_<output> := <main_input>
+ deps_<output> := \
+ <dependencies>
+ <output> := $(deps_<output>)
+ $(deps_<output>):
+
+ 2. Command Only Cmd File
+ (saved)?cmd_<output> := <command>
+
+ 3. Single Dependency Cmd File
+ (saved)?cmd_<output> := <command>
+ <output> : <dependency>
+
+ Args:
+ cmd_file_path (Path): absolute Path to a .cmd file
+
+ Returns:
+ cmd_file (CmdFile): Parsed cmd file.
+ """
+ with open(cmd_file_path, "rt", encoding="utf-8") as f:
+ lines = [line.strip() for line in f.readlines() if line.strip() != "" and not line.startswith("#")]
+
+ # savedcmd
+ match = SAVEDCMD_PATTERN.match(lines[0] if lines else "")
+ if match is None:
+ sbom_logging.error(
+ "Skip parsing '{cmd_file_path}' because no 'savedcmd_' command was found.", cmd_file_path=cmd_file_path
+ )
+ return None
+ savedcmd = match.group("full_command")
+
+ # Command Only Cmd File
+ if len(lines) == 1:
+ return CmdFile(cmd_file_path, savedcmd)
+
+ # Single Dependency Cmd File
+ if len(lines) == 2:
+ parts = lines[1].split(":", 1)
+ if len(parts) != 2:
+ sbom_logging.error(
+ "Skip parsing '{cmd_file_path}'. Expected dependency line '<output>: <dependency>' but got {second_line}", cmd_file_path=cmd_file_path, second_line=lines[1]
+ )
+ return None
+ dep = parts[1].strip()
+ return CmdFile(cmd_file_path, savedcmd, deps=[dep])
+
+ # Full Cmd File
+ # source
+ line1 = SOURCE_PATTERN.match(lines[1])
+ if line1 is None:
+ sbom_logging.error(
+ "Skip parsing '{cmd_file_path}' because no 'source_' entry was found.", cmd_file_path=cmd_file_path
+ )
+ return CmdFile(cmd_file_path, savedcmd)
+ source = line1.group("source_file")
+
+ # deps
+ deps: list[str] = []
+ i = 3 # lines[2] includes the variable assignment but no actual dependency, so we need to start at lines[3].
+ while i < len(lines):
+ if not lines[i].endswith("\\"):
+ break
+ deps.append(lines[i][:-1].strip())
+ i += 1
+
+ # make_rules
+ make_rules = lines[i:]
+
+ return CmdFile(cmd_file_path, savedcmd, source, deps, make_rules)
+
+ def get_dependencies(
+ self: "CmdFile", target_path: PathStr, obj_tree: PathStr, fail_on_unknown_build_command: bool
+ ) -> list[PathStr]:
+ """
+ Parses all dependencies required to build a target file from its cmd file.
+
+ Args:
+ target_path: path to the target file relative to `obj_tree`.
+ obj_tree: absolute path to the object tree.
+ fail_on_unknown_build_command: Whether to fail if an unknown build command is encountered.
+
+ Returns:
+ list[PathStr]: dependency file paths relative to `obj_tree`.
+ """
+ input_files: list[PathStr] = [
+ str(p) for p in parse_inputs_from_commands(self.savedcmd, fail_on_unknown_build_command)
+ ]
+ if self.deps:
+ input_files += [str(p) for p in parse_cmd_file_deps(self.deps)]
+ input_files = _expand_resolve_files(input_files, obj_tree)
+
+ cmd_file_dependencies: list[PathStr] = []
+ for input_file in input_files:
+ # input files are either absolute or relative to the object tree
+ if os.path.isabs(input_file):
+ input_file = os.path.relpath(input_file, obj_tree)
+ if input_file == target_path:
+ # Skip target file to prevent cycles. This is necessary because some multi stage commands first create an output and then pass it as input to the next command, e.g., objcopy.
+ continue
+ cmd_file_dependencies.append(input_file)
+ unique_cmd_file_dependencies = list(dict.fromkeys(cmd_file_dependencies))
+ return unique_cmd_file_dependencies
+
+
+def _expand_resolve_files(input_files: list[PathStr], obj_tree: PathStr) -> list[PathStr]:
+ """
+ Expands resolve files which may reference additional files via '@' notation.
+
+ Args:
+ input_files (list[PathStr]): List of file paths relative to the object tree, where paths starting with '@' refer to files
+ containing further file paths, each on a separate line.
+ obj_tree: Absolute path to the root of the object tree.
+
+ Returns:
+ list[PathStr]: Flattened list of all input file paths, with any nested '@' file references resolved recursively.
+ """
+ expanded_input_files: list[PathStr] = []
+ for input_file in input_files:
+ if not input_file.startswith("@"):
+ expanded_input_files.append(input_file)
+ continue
+ resolve_file_path = os.path.join(obj_tree, input_file.removeprefix("@"))
+ if not os.path.exists(resolve_file_path):
+ sbom_logging.error(
+ "Skip resolving '{resolve_file_path}' because the response file does not exist.",
+ resolve_file_path=resolve_file_path,
+ )
+ continue
+ with open(resolve_file_path, "rt", encoding="utf-8") as f:
+ resolve_file_content = [line_stripped for line in f.readlines() if (line_stripped := line.strip())]
+ expanded_input_files += _expand_resolve_files(resolve_file_content, obj_tree)
+ return expanded_input_files
diff --git a/scripts/sbom/sbom/cmd_graph/cmd_graph.py b/scripts/sbom/sbom/cmd_graph/cmd_graph.py
new file mode 100644
index 0000000000000..2f57965237f44
--- /dev/null
+++ b/scripts/sbom/sbom/cmd_graph/cmd_graph.py
@@ -0,0 +1,46 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+from collections import deque
+from dataclasses import dataclass, field
+from typing import Iterator
+
+from sbom.cmd_graph.cmd_graph_node import CmdGraphNode, CmdGraphNodeConfig
+from sbom.path_utils import PathStr
+
+
+@dataclass
+class CmdGraph:
+ """Directed acyclic graph of build dependencies primarily inferred from .cmd files produced during kernel builds"""
+
+ roots: list[CmdGraphNode] = field(default_factory=list)
+
+ @classmethod
+ def create(cls, root_paths: list[PathStr], config: CmdGraphNodeConfig) -> "CmdGraph":
+ """
+ Recursively builds a dependency graph starting from `root_paths`.
+ Dependencies are mainly discovered by parsing the `.cmd` files.
+
+ Args:
+ root_paths (list[PathStr]): List of paths to root outputs relative to obj_tree
+ config (CmdGraphNodeConfig): Configuration options
+
+ Returns:
+ CmdGraph: A graph of all build dependencies for the given root files.
+ """
+ node_cache: dict[PathStr, CmdGraphNode] = {}
+ root_nodes = [CmdGraphNode.create(root_path, config, node_cache) for root_path in root_paths]
+ return CmdGraph(root_nodes)
+
+ def __iter__(self) -> Iterator[CmdGraphNode]:
+ """Traverse the graph in breadth-first order, yielding each unique node."""
+ visited: set[PathStr] = set()
+ node_stack: deque[CmdGraphNode] = deque(self.roots)
+ while len(node_stack) > 0:
+ node = node_stack.popleft()
+ if node.absolute_path in visited:
+ continue
+
+ visited.add(node.absolute_path)
+ node_stack.extend(node.children)
+ yield node
diff --git a/scripts/sbom/sbom/cmd_graph/cmd_graph_node.py b/scripts/sbom/sbom/cmd_graph/cmd_graph_node.py
new file mode 100644
index 0000000000000..61f3a8140cea3
--- /dev/null
+++ b/scripts/sbom/sbom/cmd_graph/cmd_graph_node.py
@@ -0,0 +1,142 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+from dataclasses import dataclass, field
+from itertools import chain
+import logging
+import os
+from typing import Iterator, Protocol
+
+from sbom import sbom_logging
+from sbom.cmd_graph.cmd_file import CmdFile
+from sbom.cmd_graph.hardcoded_dependencies import get_hardcoded_dependencies
+from sbom.cmd_graph.incbin_parser import parse_incbin_statements
+from sbom.path_utils import PathStr, has_link, is_relative_to
+
+
+@dataclass
+class IncbinDependency:
+ node: "CmdGraphNode"
+ full_statement: str
+
+
+class CmdGraphNodeConfig(Protocol):
+ obj_tree: PathStr
+ src_tree: PathStr
+ fail_on_unknown_build_command: bool
+
+
+@dataclass
+class CmdGraphNode:
+ """A node in the cmd graph representing a single file and its dependencies."""
+
+ absolute_path: PathStr
+ """Absolute path to the file this node represents."""
+
+ cmd_file: CmdFile | None = None
+ """Parsed .cmd file describing how the file at absolute_path was built, or None if not available."""
+
+ cmd_file_dependencies: list["CmdGraphNode"] = field(default_factory=list)
+ incbin_dependencies: list[IncbinDependency] = field(default_factory=list)
+ hardcoded_dependencies: list["CmdGraphNode"] = field(default_factory=list)
+
+ @property
+ def children(self) -> Iterator["CmdGraphNode"]:
+ seen: set[PathStr] = set()
+ for node in chain(
+ self.cmd_file_dependencies,
+ (dep.node for dep in self.incbin_dependencies),
+ self.hardcoded_dependencies,
+ ):
+ if node.absolute_path not in seen:
+ seen.add(node.absolute_path)
+ yield node
+
+ @classmethod
+ def create(
+ cls,
+ target_path: PathStr,
+ config: CmdGraphNodeConfig,
+ cache: dict[PathStr, "CmdGraphNode"] | None = None,
+ depth: int = 0,
+ ) -> "CmdGraphNode":
+ """
+ Recursively builds a dependency graph starting from `target_path`.
+ Dependencies are mainly discovered by parsing the `.<target_path.name>.cmd` file.
+
+ Args:
+ target_path: Path to the target file relative to obj_tree.
+ config: Config options
+ cache: Tracks processed nodes to prevent cycles.
+ depth: Internal parameter to track the current recursion depth.
+
+ Returns:
+ CmdGraphNode: cmd graph node representing the target file
+ """
+ if cache is None:
+ cache = {}
+
+ target_path_absolute = (
+ os.path.realpath(p)
+ if has_link(p:=os.path.join(config.obj_tree, target_path))
+ else os.path.normpath(p)
+ )
+
+ if target_path_absolute in cache:
+ return cache[target_path_absolute]
+
+ if depth == 0:
+ logging.debug(f"Build node: {target_path}")
+
+ cmd_file_path = _to_cmd_path(target_path_absolute)
+ cmd_file = CmdFile.create(cmd_file_path) if os.path.exists(cmd_file_path) else None
+ node = CmdGraphNode(target_path_absolute, cmd_file)
+ cache[target_path_absolute] = node
+
+ if not os.path.exists(target_path_absolute):
+ error_or_warning = (
+ sbom_logging.error
+ if is_relative_to(target_path_absolute, config.obj_tree)
+ or is_relative_to(target_path_absolute, config.src_tree)
+ else sbom_logging.warning
+ )
+ error_or_warning(
+ "Skip parsing '{target_path_absolute}' because file does not exist",
+ target_path_absolute=target_path_absolute,
+ )
+ return node
+
+ # Search for dependencies to add to the graph as child nodes. Child paths are always relative to the output tree.
+ def _build_child_node(child_path: PathStr) -> "CmdGraphNode":
+ return CmdGraphNode.create(child_path, config, cache, depth + 1)
+
+ node.hardcoded_dependencies = [
+ _build_child_node(hardcoded_dependency_path)
+ for hardcoded_dependency_path in get_hardcoded_dependencies(
+ target_path_absolute, config.obj_tree, config.src_tree
+ )
+ ]
+
+ if cmd_file is not None:
+ node.cmd_file_dependencies = [
+ _build_child_node(cmd_file_dependency_path)
+ for cmd_file_dependency_path in cmd_file.get_dependencies(
+ target_path, config.obj_tree, config.fail_on_unknown_build_command
+ )
+ ]
+
+ if node.absolute_path.endswith(".S"):
+ node.incbin_dependencies = [
+ IncbinDependency(
+ node=_build_child_node(incbin_statement.path),
+ full_statement=incbin_statement.full_statement,
+ )
+ for incbin_statement in parse_incbin_statements(node.absolute_path)
+ ]
+
+ return node
+
+
+def _to_cmd_path(path: PathStr) -> PathStr:
+ name = os.path.basename(path)
+ return path.removesuffix(name) + f".{name}.cmd"
diff --git a/scripts/sbom/sbom/cmd_graph/deps_parser.py b/scripts/sbom/sbom/cmd_graph/deps_parser.py
new file mode 100644
index 0000000000000..6a2d92f0778ce
--- /dev/null
+++ b/scripts/sbom/sbom/cmd_graph/deps_parser.py
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+import re
+import sbom.sbom_logging as sbom_logging
+from sbom.path_utils import PathStr
+
+# Match dependencies on config files
+# Example match: "$(wildcard include/config/CONFIG_SOMETHING)"
+CONFIG_PATTERN = re.compile(r"\$\(wildcard (include/config/[^)]+)\)")
+
+# Match dependencies on the objtool binary
+# Example match: "$(wildcard ./tools/objtool/objtool)"
+OBJTOOL_PATTERN = re.compile(r"\$\(wildcard \./tools/objtool/objtool\)")
+
+# Match any Makefile wildcard reference
+# Example match: "$(wildcard path/to/file)"
+WILDCARD_PATTERN = re.compile(r"\$\(wildcard (?P<path>[^)]+)\)")
+
+# Match ordinary paths:
+# - ^(\/)?: Optionally starts with a '/'
+# - (([\w\-\.,+~=@ ]*)\/)*: Zero or more directory levels
+# - [\w\-\.,+~=@ ]+$: Path component (file or directory)
+# Example matches: "/foo/bar.c", "dir1/dir2/file.txt", "plainfile"
+VALID_PATH_PATTERN = re.compile(r"^(\/)?(([\w\-\.,+~=@ ]*)\/)*[\w\-\.,+~=@ ]+$")
+
+
+def parse_cmd_file_deps(deps: list[str]) -> list[PathStr]:
+ """
+ Parse dependency strings of a .cmd file and return valid input file paths.
+
+ Args:
+ deps: List of dependency strings as found in `.cmd` files.
+
+ Returns:
+ input_files: List of input file paths
+ """
+ input_files: list[PathStr] = []
+ for dep in deps:
+ dep = dep.strip()
+ match dep:
+ case _ if CONFIG_PATTERN.match(dep) or OBJTOOL_PATTERN.match(dep):
+ # config paths like include/config/<CONFIG_NAME> should not be included in the graph
+ continue
+ case _ if match := WILDCARD_PATTERN.match(dep):
+ path = match.group("path")
+ input_files.append(path)
+ case _ if VALID_PATH_PATTERN.match(dep):
+ input_files.append(dep)
+ case _:
+ sbom_logging.error("Skip parsing dependency {dep} because of unrecognized format", dep=dep)
+ return input_files
diff --git a/scripts/sbom/sbom/cmd_graph/hardcoded_dependencies.py b/scripts/sbom/sbom/cmd_graph/hardcoded_dependencies.py
new file mode 100644
index 0000000000000..2eb04d30f4e67
--- /dev/null
+++ b/scripts/sbom/sbom/cmd_graph/hardcoded_dependencies.py
@@ -0,0 +1,87 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+import os
+from typing import Callable
+import sbom.sbom_logging as sbom_logging
+from sbom.path_utils import PathStr, is_relative_to
+from sbom.environment import Environment
+
+HARDCODED_DEPENDENCIES: dict[str, list[str]] = {
+ # defined in linux/Kbuild
+ "include/generated/rq-offsets.h": ["kernel/sched/rq-offsets.s"],
+ "kernel/sched/rq-offsets.s": ["include/generated/asm-offsets.h"],
+ "include/generated/bounds.h": ["kernel/bounds.s"],
+ "include/generated/asm-offsets.h": ["arch/{arch}/kernel/asm-offsets.s"],
+}
+"""
+Maps file paths to the list of dependencies required to build them
+which are not tracked by the .cmd dependency mechanism.
+Paths are relative to either the source tree or the object tree.
+"""
+
+def get_hardcoded_dependencies(path: PathStr, obj_tree: PathStr, src_tree: PathStr) -> list[PathStr]:
+ """
+ Some files in the kernel build process are not tracked by the .cmd dependency mechanism.
+ Parsing these dependencies programmatically is too complex for the scope of this project.
+ Therefore, this function provides manually defined dependencies to be added to the build graph.
+
+ Args:
+ path: absolute path to a file within the src tree or object tree.
+ obj_tree: absolute Path to the base directory of the object tree.
+ src_tree: absolute Path to the `linux` source directory.
+
+ Returns:
+ list[PathStr]: A list of dependency file paths (relative to the object tree) required to build the file at the given path.
+ """
+ if is_relative_to(path, obj_tree):
+ path = os.path.relpath(path, obj_tree)
+ elif is_relative_to(path, src_tree):
+ path = os.path.relpath(path, src_tree)
+
+ if path not in HARDCODED_DEPENDENCIES:
+ return []
+
+ template_variables: dict[str, Callable[[], str | None]] = {
+ "arch": lambda: _get_arch(path),
+ }
+
+ dependencies: list[PathStr] = []
+ for dependency_template in HARDCODED_DEPENDENCIES[path]:
+ dependency = _evaluate_template(dependency_template, template_variables)
+ if dependency is None:
+ continue
+ if os.path.exists(os.path.join(obj_tree, dependency)):
+ dependencies.append(dependency)
+ elif os.path.exists(dependency_absolute := os.path.join(src_tree, dependency)):
+ dependencies.append(os.path.relpath(dependency_absolute, obj_tree))
+ else:
+ sbom_logging.error(
+ "Skip hardcoded dependency '{dependency}' for '{path}' because the dependency lies neither in the src tree nor the object tree.",
+ dependency=dependency,
+ path=path,
+ )
+
+ return dependencies
+
+
+def _evaluate_template(template: str, variables: dict[str, Callable[[], str | None]]) -> str | None:
+ for key, value_function in variables.items():
+ template_key = "{" + key + "}"
+ if template_key in template:
+ value = value_function()
+ if value is None:
+ return None
+ template = template.replace(template_key, value)
+ return template
+
+
+def _get_arch(path: PathStr):
+ srcarch = Environment.SRCARCH()
+ if srcarch is None:
+ sbom_logging.error(
+ "Skipped architecture specific hardcoded dependency for '{path}' because the SRCARCH environment variable was not set.",
+ path=path,
+ )
+ return None
+ return srcarch
diff --git a/scripts/sbom/sbom/cmd_graph/incbin_parser.py b/scripts/sbom/sbom/cmd_graph/incbin_parser.py
new file mode 100644
index 0000000000000..ca289c2b8888b
--- /dev/null
+++ b/scripts/sbom/sbom/cmd_graph/incbin_parser.py
@@ -0,0 +1,42 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+from dataclasses import dataclass
+import re
+
+from sbom.path_utils import PathStr
+
+INCBIN_PATTERN = re.compile(r'\s*\.incbin\s+"(?P<path>[^"]+)"')
+"""Regex pattern for matching `.incbin "<path>"` statements."""
+
+
+@dataclass
+class IncbinStatement:
+ """A parsed `.incbin "<path>"` directive."""
+
+ path: PathStr
+ """path to the file referenced by the `.incbin` directive."""
+
+ full_statement: str
+ """Full `.incbin "<path>"` statement as it originally appeared in the file."""
+
+
+def parse_incbin_statements(absolute_path: PathStr) -> list[IncbinStatement]:
+ """
+ Parses `.incbin` directives from an `.S` assembly file.
+
+ Args:
+ absolute_path: Absolute path to the `.S` assembly file.
+
+ Returns:
+ list[IncbinStatement]: Parsed `.incbin` statements.
+ """
+ with open(absolute_path, "rt", encoding="utf-8") as f:
+ content = f.read()
+ return [
+ IncbinStatement(
+ path=match.group("path"),
+ full_statement=match.group(0).strip(),
+ )
+ for match in INCBIN_PATTERN.finditer(content)
+ ]
diff --git a/scripts/sbom/sbom/cmd_graph/savedcmd_parser/__init__.py b/scripts/sbom/sbom/cmd_graph/savedcmd_parser/__init__.py
new file mode 100644
index 0000000000000..d13876af4dfda
--- /dev/null
+++ b/scripts/sbom/sbom/cmd_graph/savedcmd_parser/__init__.py
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+from sbom.cmd_graph.savedcmd_parser.savedcmd_parser import parse_inputs_from_commands
+
+__all__ = ["parse_inputs_from_commands"]
diff --git a/scripts/sbom/sbom/cmd_graph/savedcmd_parser/command_parser_registry.py b/scripts/sbom/sbom/cmd_graph/savedcmd_parser/command_parser_registry.py
new file mode 100644
index 0000000000000..a48040b2c13c8
--- /dev/null
+++ b/scripts/sbom/sbom/cmd_graph/savedcmd_parser/command_parser_registry.py
@@ -0,0 +1,516 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+import re
+import shlex
+from typing import Callable, Iterator
+
+import sbom.sbom_logging as sbom_logging
+from sbom.environment import Environment
+from sbom.cmd_graph.savedcmd_parser.command_splitter import IfBlock, split_commands
+from sbom.cmd_graph.savedcmd_parser.tokenizer import (
+ CmdParsingError,
+ Option,
+ Positional,
+ tokenize_single_command,
+ tokenize_single_command_positionals_only,
+)
+from sbom.path_utils import PathStr
+
+CommandParser = Callable[[str], list[PathStr]]
+CommandParserRegistryEntry = tuple[re.Pattern[str], CommandParser]
+
+
+def _parse_dd_command(command: str) -> list[PathStr]:
+ match = re.match(r"dd.*?if=(\S+)", command)
+ if match:
+ return [match.group(1)]
+ return []
+
+
+def _parse_cat_command(command: str) -> list[PathStr]:
+ positionals = tokenize_single_command_positionals_only(command)
+ # expect positionals to be ["cat", input1, input2, ...]
+ return [p for p in positionals[1:]]
+
+
+def _parse_compound_command(command: str) -> list[PathStr]:
+ compound_command_parsers: list[CommandParserRegistryEntry] = [
+ (re.compile(r"dd\b"), _parse_dd_command),
+ (re.compile(r"cat.*?\|"), lambda c: _parse_cat_command(c.split("|")[0])),
+ (re.compile(r"cat\b[^|>]*$"), _parse_cat_command),
+ (re.compile(r"echo\b"), _parse_noop),
+ (re.compile(r"\S+="), _parse_noop),
+ (re.compile(r"printf\b"), _parse_noop),
+ (re.compile(r"sed\b"), _parse_sed_command),
+ (
+ re.compile(r"(.*/)scripts/bin2c\s*<"),
+ lambda c: [input] if (input := c.split("<")[1].split(">")[0].strip()) != "/dev/null" else [],
+ ),
+ (re.compile(r"^:$"), _parse_noop),
+ ]
+
+ match = re.match(r"\s*[\(\{](.*)[\)\}]\s*>", command, re.DOTALL)
+ if match is None:
+ raise CmdParsingError("No inner commands found for compound command")
+ input_files: list[PathStr] = []
+ inner_commands = split_commands(match.group(1))
+ for inner_command in inner_commands:
+ if isinstance(inner_command, IfBlock):
+ sbom_logging.error(
+ "Skip parsing inner command {inner_command} of compound command because IfBlock is not supported",
+ inner_command=inner_command,
+ )
+ continue
+
+ parser = next((parser for pattern, parser in compound_command_parsers if pattern.match(inner_command)), None)
+ if parser is None:
+ sbom_logging.error(
+ "Skip parsing inner command {inner_command} of compound command because no matching parser was found",
+ inner_command=inner_command,
+ )
+ continue
+ try:
+ input_files += parser(inner_command)
+ except (CmdParsingError, IndexError) as e:
+ sbom_logging.error(
+ "Skip parsing inner command {inner_command} of compound command because of command parsing error: {error_message}",
+ inner_command=inner_command,
+ error_message=str(e),
+ )
+ return input_files
+
+
+def _parse_objcopy_command(command: str) -> list[PathStr]:
+ command_parts = tokenize_single_command(command, flag_options=["-S", "-w"])
+ positionals = [part.value for part in command_parts if isinstance(part, Positional)]
+ # expect positionals to be ['objcopy', input_file] or ['objcopy', input_file, output_file]
+ return [positionals[1]]
+
+
+def _parse_link_vmlinux_command(command: str) -> list[PathStr]:
+ """
+ For simplicity we do not parse the `scripts/link-vmlinux.sh` script.
+ Instead the `vmlinux.a` dependency is just hardcoded for now.
+ """
+ return ["vmlinux.a"]
+
+
+def _parse_cp_command(command: str) -> list[PathStr]:
+ positionals = tokenize_single_command_positionals_only(command)
+ # expect positionals to be ["cp", input1, ..., destination]
+ return positionals[1:-1]
+
+
+def _parse_noop(command: str) -> list[PathStr]:
+ """
+ No-op parser for commands with no input files (e.g., 'rm', 'true').
+ Returns an empty list.
+ """
+ return []
+
+
+def _parse_ar_command(command: str) -> list[PathStr]:
+ positionals = tokenize_single_command_positionals_only(command)
+ # expect positionals to be ['ar', flags, output, input1, input2, ...]
+ flags = positionals[1]
+ if "r" not in flags:
+ # 'r' option indicates that new files are added to the archive.
+ # If this option is missing we won't find any relevant input files.
+ return []
+ return positionals[3:]
+
+
+def _parse_ar_piped_xargs_command(command: str) -> list[PathStr]:
+ printf_command, _ = command.split("|", 1)
+ positionals = tokenize_single_command_positionals_only(printf_command.strip())
+ # expect positionals to be ['printf', '{prefix_path}%s ', input1, input2, ...]
+ prefix_path = positionals[1].removesuffix("%s ")
+ return [f"{prefix_path}{filename}" for filename in positionals[2:]]
+
+
+def _parse_gcc_or_clang_command(command: str) -> list[PathStr]:
+ parts = shlex.split(command)
+ # compile mode: expect last positional argument ending in a source file extension to be the input file
+ for part in reversed(parts):
+ if not part.startswith("-") and any(part.endswith(suffix) for suffix in [".c", ".S", ".dts"]):
+ return [part]
+
+ # linking mode: expect all .o files to be the inputs
+ return [p for p in parts if p.endswith(".o")]
+
+
+def _parse_rustc_command(command: str) -> list[PathStr]:
+ parts = shlex.split(command)
+ # expect last positional argument ending in `.rs` to be the input file
+ for part in reversed(parts):
+ if not part.startswith("-") and part.endswith(".rs"):
+ return [part]
+ raise CmdParsingError("Could not find .rs input source file")
+
+
+def _parse_rustdoc_command(command: str) -> list[PathStr]:
+ parts = shlex.split(command)
+ # expect last positional argument ending in `.rs` to be the input file
+ for part in reversed(parts):
+ if not part.startswith("-") and part.endswith(".rs"):
+ return [part]
+ raise CmdParsingError("Could not find .rs input source file")
+
+
+def _parse_syscallhdr_command(command: str) -> list[PathStr]:
+ command_parts = tokenize_single_command(command.strip(), flag_options=["--emit-nr"])
+ positionals = [p.value for p in command_parts if isinstance(p, Positional)]
+ # expect positionals to be ["sh", path/to/syscallhdr.sh, input, output]
+ return [positionals[2]]
+
+
+def _parse_syscalltbl_command(command: str) -> list[PathStr]:
+ command_parts = tokenize_single_command(command.strip())
+ positionals = [p.value for p in command_parts if isinstance(p, Positional)]
+ # expect positionals to be ["sh", path/to/syscalltbl.sh, input, output]
+ return [positionals[2]]
+
+
+def _parse_mkcapflags_command(command: str) -> list[PathStr]:
+ positionals = tokenize_single_command_positionals_only(command)
+ # expect positionals to be ["sh", path/to/mkcapflags.sh, output, input1, input2]
+ return [positionals[3], positionals[4]]
+
+
+def _parse_orc_hash_command(command: str) -> list[PathStr]:
+ positionals = tokenize_single_command_positionals_only(command)
+ # expect positionals to be ["sh", path/to/orc_hash.sh, '<', input, '>', output]
+ return [positionals[3]]
+
+
+def _parse_xen_hypercalls_command(command: str) -> list[PathStr]:
+ positionals = tokenize_single_command_positionals_only(command)
+ # expect positionals to be ["sh", path/to/xen-hypercalls.sh, output, input1, input2, ...]
+ return positionals[3:]
+
+
+def _parse_gen_initramfs_command(command: str) -> list[PathStr]:
+ command_parts = tokenize_single_command(command)
+ positionals = [p.value for p in command_parts if isinstance(p, Positional)]
+ # expect positionals to be ["sh", path/to/gen_initramfs.sh, input1, input2, ...]
+ return positionals[2:]
+
+
+def _parse_vdso2c_command(command: str) -> list[PathStr]:
+ positionals = tokenize_single_command_positionals_only(command)
+ # expect positionals to be ['vdso2c', raw_input, stripped_input, output]
+ return [positionals[1], positionals[2]]
+
+
+def _parse_vdsomunge_command(command: str) -> list[PathStr]:
+ positionals = tokenize_single_command_positionals_only(command)
+ # expect positionals to be ['vdsomunge', input, output]
+ return [positionals[1]]
+
+
+def _parse_ld_command(command: str) -> list[PathStr]:
+ command_parts = tokenize_single_command(
+ command=command.strip(),
+ flag_options=[
+ "-shared",
+ "--no-undefined",
+ "--eh-frame-hdr",
+ "-Bsymbolic",
+ "-r",
+ "--no-ld-generated-unwind-info",
+ "--no-dynamic-linker",
+ "-pie",
+ "--no-dynamic-linker--whole-archive",
+ "--whole-archive",
+ "--no-whole-archive",
+ "--start-group",
+ "--end-group",
+ ],
+ )
+ positionals = [p.value for p in command_parts if isinstance(p, Positional)]
+ # expect positionals to be ["ld", input1, input2, ...]
+ return positionals[1:]
+
+
+def _parse_sed_command(command: str) -> list[PathStr]:
+ command_parts = shlex.split(command)
+ # expect command parts to be ["sed", *, input]
+ input = command_parts[-1]
+ if input == "/dev/null":
+ return []
+ return [input]
+
+
+def _parse_awk(command: str) -> list[PathStr]:
+ command_parts = tokenize_single_command(command)
+ options = [p for p in command_parts if isinstance(p, Option)]
+ positionals = [p.value for p in command_parts if isinstance(p, Positional)]
+ has_script_file = any(p.name == "-f" for p in options)
+ # With -f option: expect ["awk", input1, input2, ...]
+ # Without -f option: expect ["awk", inline_program, input1, input2, ...]
+ return positionals[1:] if has_script_file else positionals[2:]
+
+
+def _parse_nm_piped_command(command: str) -> list[PathStr]:
+ nm_command, _ = command.split("|", 1)
+ command_parts = tokenize_single_command(
+ command=nm_command.strip(),
+ flag_options=["-p", "--defined-only"],
+ )
+ positionals = [p.value for p in command_parts if isinstance(p, Positional)]
+ # expect positionals to be ["nm", input1, input2, ...]
+ return [p for p in positionals[1:]]
+
+
+def _parse_pnm_to_logo_command(command: str) -> list[PathStr]:
+ command_parts = shlex.split(command)
+ # expect command parts to be ["pnmtologo", <options>, input]
+ return [command_parts[-1]]
+
+
+def _parse_relacheck(command: str) -> list[PathStr]:
+ positionals = tokenize_single_command_positionals_only(command)
+ # expect positionals to be ["relacheck", input, log_reference]
+ return [positionals[1]]
+
+
+def _parse_gen_hyprel_command(command: str) -> list[PathStr]:
+ gen_hyprel_command, _ = command.split(">", 1)
+ command_parts = shlex.split(gen_hyprel_command)
+ # expect command_parts to be ["gen-hyprel", input]
+ return [command_parts[1]]
+
+
+def _parse_perl_command(command: str) -> list[PathStr]:
+ positionals = tokenize_single_command_positionals_only(command.strip())
+ # expect positionals to be ["perl", input]
+ return [positionals[1]]
+
+
+def _parse_strip_command(command: str) -> list[PathStr]:
+ command_parts = tokenize_single_command(command, flag_options=["--strip-debug"])
+ positionals = [p.value for p in command_parts if isinstance(p, Positional)]
+ # expect positionals to be ["strip", input1, input2, ...]
+ return positionals[1:]
+
+
+def _parse_mkpiggy_command(command: str) -> list[PathStr]:
+ mkpiggy_command, _ = command.split(">", 1)
+ positionals = tokenize_single_command_positionals_only(mkpiggy_command)
+ # expect positionals to be ["mkpiggy", input]
+ return [positionals[1]]
+
+
+def _parse_relocs_command(command: str) -> list[PathStr]:
+ if ">" not in command:
+ # Only consider relocs commands that redirect output to a file.
+ # If there's no redirection, we assume it produces no output file and therefore has no input we care about.
+ return []
+ relocs_command, _ = command.split(">", 1)
+ command_parts = shlex.split(relocs_command)
+ # expect command_parts to be ["relocs", options, input]
+ return [command_parts[-1]]
+
+
+def _parse_mk_elfconfig_command(command: str) -> list[PathStr]:
+ positionals = tokenize_single_command_positionals_only(command)
+ # expect positionals to be ["mk_elfconfig", "<", input, ">", output]
+ return [positionals[2]]
+
+
+def _parse_flex_command(command: str) -> list[PathStr]:
+ parts = shlex.split(command)
+ # expect last positional argument ending in `.l` to be the input file
+ for part in reversed(parts):
+ if not part.startswith("-") and part.endswith(".l"):
+ return [part]
+ raise CmdParsingError("Could not find .l input source file in command")
+
+
+def _parse_bison_command(command: str) -> list[PathStr]:
+ parts = shlex.split(command)
+ # expect last positional argument ending in `.y` to be the input file
+ for part in reversed(parts):
+ if not part.startswith("-") and part.endswith(".y"):
+ return [part]
+ raise CmdParsingError("Could not find input .y input source file in command")
+
+
+def _parse_tools_build_command(command: str) -> list[PathStr]:
+ positionals = tokenize_single_command_positionals_only(command)
+ # expect positionals to be ["tools/build", "input1", "input2", "input3", "output"]
+ return positionals[1:-1]
+
+
+def _parse_extract_cert_command(command: str) -> list[PathStr]:
+ command_parts = shlex.split(command)
+ # expect command parts to be [path/to/extract-cert, input, output]
+ input = command_parts[1]
+ if not input:
+ return []
+ return [input]
+
+
+def _parse_dtc_command(command: str) -> list[PathStr]:
+ wno_flags = [command_part for command_part in shlex.split(command) if command_part.startswith("-Wno-")]
+ command_parts = tokenize_single_command(command, flag_options=wno_flags)
+ positionals = [p.value for p in command_parts if isinstance(p, Positional)]
+ # expect positionals to be [path/to/dtc, input]
+ return [positionals[1]]
+
+
+def _parse_bindgen_command(command: str) -> list[PathStr]:
+ command_parts = shlex.split(command)
+ header_file_input_paths = [part for part in command_parts if part.endswith(".h")]
+ return header_file_input_paths
+
+
+def _parse_gen_header(command: str) -> list[PathStr]:
+ command_parts = shlex.split(command)
+ # expect command parts to be ["python3", path/to/gen_headers.py, ..., "--xml", input]
+ i = next((i for i, token in enumerate(command_parts) if token == "--xml"), None)
+ if i is None:
+ raise CmdParsingError(f"Expected --xml input file in gen_headers command but got {command}")
+ return [command_parts[i + 1]]
+
+def _parse_mkuboot_command(command: str) -> list[PathStr]:
+ command_parts = tokenize_single_command(command)
+ # mkuboot.sh passes all args to mkimage; -d specifies the data/input image file
+ for part in command_parts:
+ if isinstance(part, Option) and part.name == "-d" and part.value is not None:
+ return [part.value]
+ raise CmdParsingError("Could not find -d (data file) option in mkuboot.sh command")
+
+
+def _parse_syscallnr_command(command: str) -> list[PathStr]:
+ command_parts = tokenize_single_command(command.strip())
+ positionals = [p.value for p in command_parts if isinstance(p, Positional)]
+ # expect positionals to be ["sh", path/to/syscallnr.sh, input, output]
+ return [positionals[2]]
+
+
+def _parse_gen_kernel_hwcaps_command(command: str) -> list[PathStr]:
+ command_parts = tokenize_single_command(command.strip(), flag_options=["-e"])
+ positionals = [p.value for p in command_parts if isinstance(p, Positional)]
+ # expect positionals to be ["sh", path/to/gen-kernel-hwcaps.sh, input]
+ return [positionals[2]]
+
+
+class CommandParserRegistry:
+ """
+ Registry mapping command patterns to their input-file parsers.
+ """
+
+ def __init__(self, entries: list[CommandParserRegistryEntry]) -> None:
+ self._entries = entries
+
+ def __iter__(self) -> Iterator[CommandParserRegistryEntry]:
+ return iter(self._entries)
+
+ @staticmethod
+ def create() -> "CommandParserRegistry":
+ def env_or_default_pattern(env_value: str | None, default_pattern: str) -> str:
+ if env_value is None or not env_value.strip():
+ return default_pattern
+ return rf"(?:{re.escape(env_value.strip())}|{default_pattern})"
+
+ cc_pattern = env_or_default_pattern(Environment.CC(), r"([^\s]+-)?(gcc|clang)")
+ ld_pattern = env_or_default_pattern(Environment.LD(), r"([^\s]+-)?ld")
+ ar_pattern = env_or_default_pattern(Environment.AR(), r"([^\s]+-)?ar")
+ nm_pattern = env_or_default_pattern(Environment.NM(), r"([^\s]+-)?nm")
+ objcopy_pattern = env_or_default_pattern(Environment.OBJCOPY(), r"([^\s]+-)?objcopy")
+ strip_pattern = env_or_default_pattern(Environment.STRIP(), r"([^\s]+-)?strip")
+
+ entries: list[CommandParserRegistryEntry] = [
+ # Compound commands
+ (re.compile(r"\(.*?\)\s*>", re.DOTALL), _parse_compound_command),
+ (re.compile(r"\{.*?\}\s*>", re.DOTALL), _parse_compound_command),
+ # Standard Unix utilities and system tools
+ (re.compile(r"^rm\b"), _parse_noop),
+ (re.compile(r"^mkdir\b"), _parse_noop),
+ (re.compile(r"^touch\b"), _parse_noop),
+ (re.compile(r"^cp\b"), _parse_cp_command),
+ (re.compile(r"^truncate\b"), _parse_noop),
+ (re.compile(r"^cat\b.*?[\|>]"), lambda c: _parse_cat_command(c.split("|")[0].split(">")[0])),
+ (re.compile(r"^echo[^|]*$"), _parse_noop),
+ (re.compile(r"^sed.*?>"), lambda c: _parse_sed_command(c.split(">")[0])),
+ (re.compile(r"^sed\b"), _parse_noop),
+ (re.compile(r"^awk.*?<.*?>"), lambda c: [c.split("<")[1].split(">")[0]]),
+ (re.compile(r"^awk.*?>"), lambda c: _parse_awk(c.split(">")[0])),
+ (re.compile(r"^(/bin/)?true\b"), _parse_noop),
+ (re.compile(r"^(/bin/)?false\b"), _parse_noop),
+ (re.compile(r"^openssl\s+req.*?-new.*?-keyout"), _parse_noop),
+ # Compilers and code generators
+ # (C/LLVM toolchain, Rust, Flex/Bison, Bindgen, Perl, etc.)
+ (
+ re.compile(rf"^{cc_pattern}\b"),
+ lambda command: _parse_gcc_or_clang_command(re.sub(rf"^{cc_pattern}\b", "gcc", command, count=1)),
+ ),
+ (
+ re.compile(rf"^{ld_pattern}\b"),
+ lambda command: _parse_ld_command(re.sub(rf"^{ld_pattern}\b", "ld", command, count=1)),
+ ),
+ (
+ re.compile(rf"^printf\b.*\| xargs {ar_pattern}\b"),
+ lambda command: _parse_ar_piped_xargs_command(
+ re.sub(rf"xargs {ar_pattern}\b", "xargs ar", command, count=1)
+ ),
+ ),
+ (
+ re.compile(rf"^{ar_pattern}\b"),
+ lambda command: _parse_ar_command(re.sub(rf"^{ar_pattern}\b", "ar", command, count=1)),
+ ),
+ (
+ re.compile(rf"^{nm_pattern}\b.*?\|"),
+ lambda command: _parse_nm_piped_command(re.sub(rf"^{nm_pattern}\b", "nm", command, count=1)),
+ ),
+ (
+ re.compile(rf"^{objcopy_pattern}\b"),
+ lambda command: _parse_objcopy_command(re.sub(rf"^{objcopy_pattern}\b", "objcopy", command, count=1)),
+ ),
+ (
+ re.compile(rf"^{strip_pattern}\b"),
+ lambda command: _parse_strip_command(re.sub(rf"^{strip_pattern}\b", "strip", command, count=1)),
+ ),
+ (re.compile(r".*?rustc\b"), _parse_rustc_command),
+ (re.compile(r".*?rustdoc\b"), _parse_rustdoc_command),
+ (re.compile(r"^flex\b"), _parse_flex_command),
+ (re.compile(r"^bison\b"), _parse_bison_command),
+ (re.compile(r"^bindgen\b"), _parse_bindgen_command),
+ (re.compile(r"^perl\b"), _parse_perl_command),
+ # Kernel-specific build scripts and tools
+ (re.compile(r"^(.*/)?link-vmlinux\.sh\b"), _parse_link_vmlinux_command),
+ (re.compile(r"sh (.*/)?syscallhdr\.sh\b"), _parse_syscallhdr_command),
+ (re.compile(r"sh (.*/)?syscalltbl\.sh\b"), _parse_syscalltbl_command),
+ (re.compile(r"sh (.*/)?mkcapflags\.sh\b"), _parse_mkcapflags_command),
+ (re.compile(r"sh (.*/)?orc_hash\.sh\b"), _parse_orc_hash_command),
+ (re.compile(r"sh (.*/)?xen-hypercalls\.sh\b"), _parse_xen_hypercalls_command),
+ (re.compile(r"sh (.*/)?gen_initramfs\.sh\b"), _parse_gen_initramfs_command),
+ (re.compile(r"sh (.*/)?checkundef\.sh\b"), _parse_noop),
+ (re.compile(r"(bash|sh) (.*/)?mkuboot\.sh\b"), _parse_mkuboot_command),
+ (re.compile(r"sh (.*/)?syscallnr\.sh\b"), _parse_syscallnr_command),
+ (re.compile(r"(/bin/)?sh (.*/)?gen-kernel-hwcaps\.sh\b"), lambda c: _parse_gen_kernel_hwcaps_command(c.split(">")[0])),
+ (re.compile(r"(.*/)?vdso2c\b"), _parse_vdso2c_command),
+ (re.compile(r"(.*/)?vdsomunge\b"), _parse_vdsomunge_command),
+ (re.compile(r"^(.*/)?mkpiggy.*?>"), _parse_mkpiggy_command),
+ (re.compile(r"^(.*/)?relocs\b"), _parse_relocs_command),
+ (re.compile(r"^(.*/)?mk_elfconfig.*?<.*?>"), _parse_mk_elfconfig_command),
+ (re.compile(r"^(.*/)?tools/build\b"), _parse_tools_build_command),
+ (re.compile(r"^(.*/)?certs/extract-cert"), _parse_extract_cert_command),
+ (re.compile(r"^(.*/)?scripts/dtc/dtc\b"), _parse_dtc_command),
+ (re.compile(r"^(.*/)?pnmtologo\b"), _parse_pnm_to_logo_command),
+ (re.compile(r"^(.*/)?kernel/pi/relacheck"), _parse_relacheck),
+ (re.compile(r"^(.*/)?gen-hyprel\b"), _parse_gen_hyprel_command),
+ (re.compile(r"^drivers/gpu/drm/radeon/mkregtable"), lambda c: [c.split(" ")[1]]),
+ (re.compile(r"(.*/)?genheaders\b"), _parse_noop),
+ (re.compile(r"^(.*/)?mkcpustr\s+>"), _parse_noop),
+ (re.compile(r"^(.*/)polgen\b"), _parse_noop),
+ (re.compile(r"make -f .*/arch/x86/Makefile\.postlink"), _parse_noop),
+ (re.compile(r"^(.*/)?raid6/mktables\s+>"), _parse_noop),
+ (re.compile(r"^(.*/)?objtool\b"), _parse_noop),
+ (re.compile(r"^(.*/)?module/gen_test_kallsyms.sh"), _parse_noop),
+ (re.compile(r"^(.*/)?gen_header.py"), _parse_gen_header),
+ (re.compile(r"^(.*/)?scripts/rustdoc_test_gen"), _parse_noop),
+ ]
+ return CommandParserRegistry(entries)
diff --git a/scripts/sbom/sbom/cmd_graph/savedcmd_parser/command_splitter.py b/scripts/sbom/sbom/cmd_graph/savedcmd_parser/command_splitter.py
new file mode 100644
index 0000000000000..4749f4bd669ea
--- /dev/null
+++ b/scripts/sbom/sbom/cmd_graph/savedcmd_parser/command_splitter.py
@@ -0,0 +1,128 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+import re
+from dataclasses import dataclass
+
+
+# If Block pattern to match a simple, single-level if-then-fi block. Nested If blocks are not supported.
+IF_BLOCK_PATTERN = re.compile(
+ r"""
+ ^if(.*?);\s* # Match 'if <condition>;' (non-greedy)
+ then(.*?);\s* # Match 'then <body>;' (non-greedy)
+ fi\b # Match 'fi'
+ """,
+ re.VERBOSE,
+)
+
+
+@dataclass
+class IfBlock:
+ condition: str
+ then_statement: str
+
+
+def _unwrap_outer_parentheses(s: str) -> str:
+ s = s.strip()
+ if not (s.startswith("(") and s.endswith(")")):
+ return s
+
+ count = 0
+ for i, char in enumerate(s):
+ if char == "(":
+ count += 1
+ elif char == ")":
+ count -= 1
+ # If count is 0 before the end, outer parentheses don't match
+ if count == 0 and i != len(s) - 1:
+ return s
+
+ # outer parentheses do match, unwrap once
+ return _unwrap_outer_parentheses(s[1:-1])
+
+
+def _find_first_top_level_command_separator(
+ commands: str, separators: list[str] = [";", "&&"]
+) -> tuple[int | None, int | None]:
+ def is_escaped(index: int) -> bool:
+ preceding = commands[:index]
+ return (len(preceding) - len(preceding.rstrip("\\"))) % 2 == 1
+
+ in_single_quote = False
+ in_double_quote = False
+ in_curly_braces = 0
+ in_braces = 0
+ for i, char in enumerate(commands):
+ if char == "'" and not in_double_quote and not is_escaped(i):
+ # Toggle single quote state (unless inside double quotes or escaped)
+ in_single_quote = not in_single_quote
+ elif char == '"' and not in_single_quote and not is_escaped(i):
+ # Toggle double quote state (unless inside single quotes or escaped)
+ in_double_quote = not in_double_quote
+
+ if in_single_quote or in_double_quote:
+ continue
+
+ # Toggle braces state
+ if char == "{":
+ in_curly_braces += 1
+ if char == "}":
+ in_curly_braces -= 1
+
+ if char == "(":
+ in_braces += 1
+ if char == ")":
+ in_braces -= 1
+
+ if in_curly_braces > 0 or in_braces > 0:
+ continue
+
+ # return found separator position and separator length
+ for separator in separators:
+ if commands[i : i + len(separator)] == separator:
+ return i, len(separator)
+
+ return None, None
+
+
+def split_commands(commands: str) -> list[str | IfBlock]:
+ """
+ Splits a string of command-line commands into individual parts.
+
+ This function handles:
+ - Top-level command separators (e.g., `;` and `&&`) to split multiple commands.
+ - Conditional if-blocks, returning them as `IfBlock` instances.
+ - Preserves the order of commands and trims whitespace.
+
+ Args:
+ commands (str): The raw command string.
+
+ Returns:
+ list[str | IfBlock]: A list of single commands or `IfBlock` objects.
+ """
+ single_commands: list[str | IfBlock] = []
+ remaining_commands = _unwrap_outer_parentheses(commands)
+ while len(remaining_commands) > 0:
+ remaining_commands = remaining_commands.strip()
+
+ # if block
+ matched_if = IF_BLOCK_PATTERN.match(remaining_commands)
+ if matched_if:
+ condition, then_statement = matched_if.groups()
+ single_commands.append(IfBlock(condition.strip(), then_statement.strip()))
+ full_matched = matched_if.group(0)
+ remaining_commands = remaining_commands.removeprefix(full_matched).lstrip("; \n")
+ continue
+
+ # command until next separator
+ separator_position, separator_length = _find_first_top_level_command_separator(remaining_commands)
+ if separator_position is not None and separator_length is not None:
+ single_commands.append(remaining_commands[:separator_position].strip())
+ remaining_commands = remaining_commands[separator_position + separator_length :].strip()
+ continue
+
+ # single last command
+ single_commands.append(remaining_commands)
+ break
+
+ return single_commands
diff --git a/scripts/sbom/sbom/cmd_graph/savedcmd_parser/savedcmd_parser.py b/scripts/sbom/sbom/cmd_graph/savedcmd_parser/savedcmd_parser.py
new file mode 100644
index 0000000000000..6a7ea4787aa16
--- /dev/null
+++ b/scripts/sbom/sbom/cmd_graph/savedcmd_parser/savedcmd_parser.py
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+import sbom.sbom_logging as sbom_logging
+from sbom.cmd_graph.savedcmd_parser.command_splitter import IfBlock, split_commands
+from sbom.cmd_graph.savedcmd_parser.command_parser_registry import CommandParserRegistry
+from sbom.cmd_graph.savedcmd_parser.tokenizer import CmdParsingError
+from sbom.path_utils import PathStr
+
+DEFAULT_COMMAND_PARSER_REGISTRY = CommandParserRegistry.create()
+
+
+def parse_inputs_from_commands(
+ commands: str,
+ fail_on_unknown_build_command: bool,
+ registry: CommandParserRegistry | None = None,
+) -> list[PathStr]:
+ """
+ Extract input files referenced in a set of command-line commands.
+
+ Args:
+ commands (str): Command line expression to parse.
+ fail_on_unknown_build_command (bool): Whether to fail if an unknown build command is encountered. If False, errors are logged as warnings.
+ registry (CommandParserRegistry | None): Registry of single command parsers.
+
+ Returns:
+ list[PathStr]: List of input file paths required by the commands.
+ """
+
+ def log_error_or_warning(message: str, /, **kwargs: str) -> None:
+ if fail_on_unknown_build_command:
+ sbom_logging.error(message, **kwargs)
+ else:
+ sbom_logging.warning(message, **kwargs)
+
+ if registry is None:
+ registry = DEFAULT_COMMAND_PARSER_REGISTRY
+
+ input_files: list[PathStr] = []
+ for single_command in split_commands(commands):
+ if isinstance(single_command, IfBlock):
+ inputs = parse_inputs_from_commands(single_command.then_statement, fail_on_unknown_build_command, registry)
+ if inputs:
+ log_error_or_warning(
+ "Skipped parsing command {then_statement} because input files in IfBlock 'then' statement are not supported",
+ then_statement=single_command.then_statement,
+ )
+ continue
+
+ matched_parser = next((parser for pattern, parser in registry if pattern.match(single_command)), None)
+ if matched_parser is None:
+ log_error_or_warning(
+ "Skipped parsing command {single_command} because no matching parser was found",
+ single_command=single_command,
+ )
+ continue
+ try:
+ inputs = matched_parser(single_command)
+ input_files.extend(inputs)
+ except (CmdParsingError, IndexError) as e:
+ log_error_or_warning(
+ "Skipped parsing command {single_command} because of command parsing error: {error_message}",
+ single_command=single_command,
+ error_message=str(e),
+ )
+
+ return [input.strip().rstrip("/") for input in input_files]
diff --git a/scripts/sbom/sbom/cmd_graph/savedcmd_parser/tokenizer.py b/scripts/sbom/sbom/cmd_graph/savedcmd_parser/tokenizer.py
new file mode 100644
index 0000000000000..1bf081f40be78
--- /dev/null
+++ b/scripts/sbom/sbom/cmd_graph/savedcmd_parser/tokenizer.py
@@ -0,0 +1,92 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+import re
+import shlex
+from dataclasses import dataclass
+from typing import Union
+
+
+class CmdParsingError(Exception):
+ pass
+
+
+@dataclass
+class Option:
+ name: str
+ value: str | None = None
+
+
+@dataclass
+class Positional:
+ value: str
+
+
+_SUBCOMMAND_PATTERN = re.compile(r"\$\$\(([^()]*)\)")
+"""Pattern to match $$(...) blocks"""
+
+
+def tokenize_single_command(command: str, flag_options: list[str] | None = None) -> list[Union[Option, Positional]]:
+ """
+ Parse a shell command into a list of Options and Positionals.
+ - Positional: the command and any positional arguments.
+ - Options: handles flags and options with values provided as space-separated, or equals-sign
+ (e.g., '--opt val', '--opt=val', '--flag').
+
+ Args:
+ command: Command line string.
+ flag_options: Options that are flags without values (e.g., '--verbose').
+
+ Returns:
+ List of `Option` and `Positional` objects in command order.
+ """
+
+ # Wrap all $$(...) blocks in double quotes to prevent shlex from splitting them.
+ command_with_protected_subcommands = _SUBCOMMAND_PATTERN.sub(lambda m: f'"$$({m.group(1)})"', command)
+ tokens = shlex.split(command_with_protected_subcommands)
+
+ parsed: list[Option | Positional] = []
+ i = 0
+ while i < len(tokens):
+ token = tokens[i]
+
+ # Positional
+ if not token.startswith("-"):
+ parsed.append(Positional(token))
+ i += 1
+ continue
+
+ # Option without value (--flag)
+ if (token.startswith("-") and i + 1 < len(tokens) and tokens[i + 1].startswith("-")) or (
+ flag_options and token in flag_options
+ ):
+ parsed.append(Option(name=token))
+ i += 1
+ continue
+
+ # Option with equals sign (--opt=val)
+ if "=" in token:
+ name, value = token.split("=", 1)
+ parsed.append(Option(name=name, value=value))
+ i += 1
+ continue
+
+ # Option with space-separated value (--opt val)
+ if i + 1 < len(tokens) and not tokens[i + 1].startswith("-"):
+ parsed.append(Option(name=token, value=tokens[i + 1]))
+ i += 2
+ continue
+
+ raise CmdParsingError(f"Unrecognized token: {token} in command {command}")
+
+ return parsed
+
+
+def tokenize_single_command_positionals_only(command: str) -> list[str]:
+ command_parts = tokenize_single_command(command)
+ positionals = [p.value for p in command_parts if isinstance(p, Positional)]
+ if len(positionals) != len(command_parts):
+ raise CmdParsingError(
+ f"Invalid command format: expected positional arguments only but got options in command {command}."
+ )
+ return positionals
diff --git a/scripts/sbom/sbom/config.py b/scripts/sbom/sbom/config.py
new file mode 100644
index 0000000000000..6811f782943eb
--- /dev/null
+++ b/scripts/sbom/sbom/config.py
@@ -0,0 +1,320 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+import argparse
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from enum import Enum
+import os
+from typing import Any
+from sbom.path_utils import PathStr
+
+
+class KernelSpdxDocumentKind(Enum):
+ SOURCE = "source"
+ BUILD = "build"
+ OUTPUT = "output"
+
+
+@dataclass
+class KernelSbomConfig:
+ src_tree: PathStr
+ """Absolute path to the Linux kernel source directory."""
+
+ obj_tree: PathStr
+ """Absolute path to the build output directory."""
+
+ root_paths: list[PathStr]
+ """List of paths to root outputs (relative to obj_tree) to base the SBOM on."""
+
+ generate_spdx: bool
+ """Whether to generate SPDX SBOM documents. If False, no SPDX files are created."""
+
+ spdx_file_names: dict[KernelSpdxDocumentKind, str]
+ """If `generate_spdx` is True, defines the file names for each SPDX SBOM kind
+ (source, build, output) to store on disk."""
+
+ generate_used_files: bool
+ """Whether to generate a flat list of all source files used in the build.
+ If False, no used-files document is created."""
+
+ used_files_file_name: str
+ """If `generate_used_files` is True, specifies the file name for the used-files document."""
+
+ output_directory: PathStr
+ """Path to the directory where the generated output documents will be saved."""
+
+ debug: bool
+ """Whether to enable debug logging."""
+
+ fail_on_unknown_build_command: bool
+ """Whether to fail if an unknown build command is encountered in a .cmd file."""
+
+ write_output_on_error: bool
+ """Whether to write output documents even if errors occur."""
+
+ created: datetime
+ """Datetime to use for the SPDX created property of the CreationInfo element."""
+
+ spdxId_prefix: str
+ """Prefix to use for all SPDX element IDs."""
+
+ build_type: str
+ """SPDX buildType property to use for all Build elements."""
+
+ build_id: str | None
+ """SPDX buildId property to use for all Build elements."""
+
+ package_license: str
+ """License expression applied to all SPDX Packages."""
+
+ package_version: str | None
+ """Version string applied to all SPDX Packages."""
+
+ package_copyright_text: str | None
+ """Copyright text applied to all SPDX Packages."""
+
+ prettify_json: bool
+ """Whether to pretty-print generated SPDX JSON documents."""
+
+
+def _parse_cli_arguments(parser: argparse.ArgumentParser) -> dict[str, Any]:
+ """
+ Parse command-line arguments using argparse.
+
+ Returns:
+ Dictionary of parsed arguments.
+ """
+ parser.add_argument(
+ "--src-tree",
+ default="../linux",
+ help="Path to the kernel source tree (default: ../linux)",
+ )
+ parser.add_argument(
+ "--obj-tree",
+ default="../linux/kernel_build",
+ help="Path to the build output directory (default: ../linux/kernel_build)",
+ )
+ group = parser.add_mutually_exclusive_group(required=True)
+ group.add_argument(
+ "--roots",
+ nargs="+",
+ help="Space-separated list of paths relative to obj-tree for which the SBOM will be created.\n"
+ "Cannot be used together with --roots-file.",
+ )
+ group.add_argument(
+ "--roots-file",
+ help="Path to a file containing the root paths (one per line). Cannot be used together with --roots.",
+ )
+ parser.add_argument(
+ "--generate-spdx",
+ action="store_true",
+ default=False,
+ help=(
+ "Whether to create sbom-source.spdx.json, sbom-build.spdx.json and "
+ "sbom-output.spdx.json documents (default: False)"
+ ),
+ )
+ parser.add_argument(
+ "--generate-used-files",
+ action="store_true",
+ default=False,
+ help=(
+ "Whether to create the sbom.used-files.txt file, a flat list of all "
+ "source files used for the kernel build.\n"
+ "If src-tree and obj-tree are equal it is not possible to reliably "
+ "classify source files.\n"
+ "In this case sbom.used-files.txt will contain all files used for the "
+ "kernel build including all build artifacts. (default: False)"
+ ),
+ )
+ parser.add_argument(
+ "--output-directory",
+ default=".",
+ help="Path to the directory where the generated output documents will be stored (default: .)",
+ )
+ parser.add_argument(
+ "--debug",
+ action="store_true",
+ default=False,
+ help="Enable debug logs (default: False)",
+ )
+
+ # Error handling settings
+ parser.add_argument(
+ "--do-not-fail-on-unknown-build-command",
+ action="store_true",
+ default=False,
+ help=(
+ "Whether to fail if an unknown build command is encountered in a .cmd file.\n"
+ "If set to True, errors are logged as warnings instead. (default: False)"
+ ),
+ )
+ parser.add_argument(
+ "--write-output-on-error",
+ action="store_true",
+ default=False,
+ help=(
+ "Write output documents even if errors occur. The resulting documents "
+ "may be incomplete.\n"
+ "A summary of warnings and errors can be found in the 'comment' property "
+ "of the CreationInfo element. (default: False)"
+ ),
+ )
+
+ # SPDX specific options
+ spdx_group = parser.add_argument_group("SPDX options", "Options for customizing SPDX document generation")
+ spdx_group.add_argument(
+ "--spdxId-prefix",
+ default="urn:spdx.dev:",
+ help="The prefix to use for all spdxId properties. (default: urn:spdx.dev:)",
+ )
+ spdx_group.add_argument(
+ "--build-type",
+ default="urn:spdx.dev:Kbuild",
+ help="The SPDX buildType property to use for all Build elements. (default: urn:spdx.dev:Kbuild)",
+ )
+ spdx_group.add_argument(
+ "--build-id",
+ default=None,
+ help="The SPDX buildId property to use for all Build elements.\n"
+ "If not provided the spdxId of the high level Build element is used as the buildId. (default: None)",
+ )
+ spdx_group.add_argument(
+ "--package-license",
+ default="NOASSERTION",
+ help=(
+ "The SPDX licenseExpression property to use for the LicenseExpression "
+ "linked to all SPDX Package elements. (default: NOASSERTION)"
+ ),
+ )
+ spdx_group.add_argument(
+ "--package-version",
+ default=None,
+ help="The SPDX packageVersion property to use for all SPDX Package elements. (default: None)",
+ )
+ spdx_group.add_argument(
+ "--package-copyright-text",
+ default=None,
+ help=(
+ "The SPDX copyrightText property to use for all SPDX Package elements.\n"
+ "If not specified, and if a COPYING file exists in the source tree,\n"
+ "the package-copyright-text is set to the content of this file. "
+ "(default: None)"
+ ),
+ )
+ spdx_group.add_argument(
+ "--prettify-json",
+ action="store_true",
+ default=False,
+ help="Whether to pretty print the generated spdx.json documents (default: False)",
+ )
+
+ args = vars(parser.parse_args())
+ return args
+
+
+def get_config() -> KernelSbomConfig:
+ """
+ Parse command-line arguments and construct the configuration object.
+
+ Returns:
+ KernelSbomConfig: Configuration object with all settings for SBOM generation.
+ """
+ parser = argparse.ArgumentParser(
+ formatter_class=argparse.RawTextHelpFormatter,
+ description="Generate SPDX SBOM documents for kernel builds",
+ )
+ args = _parse_cli_arguments(parser)
+
+ # Extract and validate cli arguments
+ src_tree = os.path.realpath(args["src_tree"])
+ obj_tree = os.path.realpath(args["obj_tree"])
+ root_paths = []
+ if args["roots_file"]:
+ with open(args["roots_file"], "rt", encoding="utf-8") as f:
+ root_paths = [root.strip() for root in f.readlines()]
+ if len(root_paths) == 0:
+ parser.error("--roots-file must contain at least one path")
+ else:
+ root_paths = args["roots"]
+ _validate_path_arguments(parser, src_tree, obj_tree, root_paths)
+
+ generate_spdx = args["generate_spdx"]
+ generate_used_files = args["generate_used_files"]
+ output_directory = os.path.realpath(args["output_directory"])
+ debug = args["debug"]
+
+ fail_on_unknown_build_command = not args["do_not_fail_on_unknown_build_command"]
+ write_output_on_error = args["write_output_on_error"]
+
+ created = datetime.fromtimestamp(
+ max([os.path.getmtime(os.path.join(obj_tree, root_path)) for root_path in root_paths]),
+ tz=timezone.utc,
+ )
+ spdxId_prefix = args["spdxId_prefix"]
+ build_type = args["build_type"]
+ build_id = args["build_id"]
+ package_license = args["package_license"]
+ package_version = args["package_version"] if args["package_version"] is not None else None
+ package_copyright_text: str | None = None
+ if args["package_copyright_text"] is not None:
+ package_copyright_text = args["package_copyright_text"]
+ elif os.path.isfile(copying_path := os.path.join(src_tree, "COPYING")):
+ with open(copying_path, "r", encoding="utf-8") as f:
+ package_copyright_text = f.read()
+ prettify_json = args["prettify_json"]
+
+ # Hardcoded config
+ spdx_file_names = {
+ KernelSpdxDocumentKind.SOURCE: "sbom-source.spdx.json",
+ KernelSpdxDocumentKind.BUILD: "sbom-build.spdx.json",
+ KernelSpdxDocumentKind.OUTPUT: "sbom-output.spdx.json",
+ }
+ used_files_file_name = "sbom.used-files.txt"
+
+ return KernelSbomConfig(
+ src_tree=src_tree,
+ obj_tree=obj_tree,
+ root_paths=root_paths,
+ generate_spdx=generate_spdx,
+ spdx_file_names=spdx_file_names,
+ generate_used_files=generate_used_files,
+ used_files_file_name=used_files_file_name,
+ output_directory=output_directory,
+ debug=debug,
+ fail_on_unknown_build_command=fail_on_unknown_build_command,
+ write_output_on_error=write_output_on_error,
+ created=created,
+ spdxId_prefix=spdxId_prefix,
+ build_type=build_type,
+ build_id=build_id,
+ package_license=package_license,
+ package_version=package_version,
+ package_copyright_text=package_copyright_text,
+ prettify_json=prettify_json,
+ )
+
+
+def _validate_path_arguments(
+ parser: argparse.ArgumentParser,
+ src_tree: PathStr,
+ obj_tree: PathStr,
+ root_paths: list[PathStr],
+) -> None:
+ """
+ Validate that the provided paths exist.
+
+ Args:
+ parser: The argument parser, used to emit well-formatted error messages.
+ src_tree: Absolute path to the source tree.
+ obj_tree: Absolute path to the object tree.
+ root_paths: List of root paths relative to obj_tree.
+ """
+ if not os.path.exists(src_tree):
+ parser.error(f"--src-tree {src_tree} does not exist")
+ if not os.path.exists(obj_tree):
+ parser.error(f"--obj-tree {obj_tree} does not exist")
+ for root_path in root_paths:
+ if not os.path.isfile(root_path_absolute := os.path.join(obj_tree, root_path)):
+ parser.error(f"path to root artifact {root_path_absolute} is not a file")
diff --git a/scripts/sbom/sbom/environment.py b/scripts/sbom/sbom/environment.py
new file mode 100644
index 0000000000000..4304066fe974e
--- /dev/null
+++ b/scripts/sbom/sbom/environment.py
@@ -0,0 +1,192 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+import os
+
+KERNEL_BUILD_VARIABLES_ALLOWLIST = [
+ "AFLAGS_KERNEL",
+ "AFLAGS_MODULE",
+ "AR",
+ "ARCH",
+ "ARCH_CORE",
+ "ARCH_DRIVERS",
+ "ARCH_LIB",
+ "AWK",
+ "BASH",
+ "BINDGEN",
+ "BITS",
+ "CC",
+ "CC_FLAGS_FPU",
+ "CC_FLAGS_NO_FPU",
+ "CFLAGS_GCOV",
+ "CFLAGS_KERNEL",
+ "CFLAGS_MODULE",
+ "CHECK",
+ "CHECKFLAGS",
+ "CLIPPY_CONF_DIR",
+ "CONFIG_SHELL",
+ "CPP",
+ "CROSS_COMPILE",
+ "CURDIR",
+ "GNUMAKEFLAGS",
+ "HOSTCC",
+ "HOSTCXX",
+ "HOSTPKG_CONFIG",
+ "HOSTRUSTC",
+ "INSTALLKERNEL",
+ "INSTALL_DTBS_PATH",
+ "INSTALL_HDR_PATH",
+ "INSTALL_PATH",
+ "KBUILD_AFLAGS",
+ "KBUILD_AFLAGS_KERNEL",
+ "KBUILD_AFLAGS_MODULE",
+ "KBUILD_BUILTIN",
+ "KBUILD_CFLAGS",
+ "KBUILD_CFLAGS_KERNEL",
+ "KBUILD_CFLAGS_MODULE",
+ "KBUILD_CHECKSRC",
+ "KBUILD_CLIPPY",
+ "KBUILD_CPPFLAGS",
+ "KBUILD_EXTMOD",
+ "KBUILD_EXTRA_WARN",
+ "KBUILD_HOSTCFLAGS",
+ "KBUILD_HOSTCXXFLAGS",
+ "KBUILD_HOSTLDFLAGS",
+ "KBUILD_HOSTLDLIBS",
+ "KBUILD_HOSTRUSTFLAGS",
+ "KBUILD_IMAGE",
+ "KBUILD_LDFLAGS",
+ "KBUILD_LDFLAGS_MODULE",
+ "KBUILD_LDS",
+ "KBUILD_MODULES",
+ "KBUILD_PROCMACROLDFLAGS",
+ "KBUILD_RUSTFLAGS",
+ "KBUILD_RUSTFLAGS_KERNEL",
+ "KBUILD_RUSTFLAGS_MODULE",
+ "KBUILD_USERCFLAGS",
+ "KBUILD_USERLDFLAGS",
+ "KBUILD_VERBOSE",
+ "KBUILD_VMLINUX_LIBS",
+ "KBZIP2",
+ "KCONFIG_CONFIG",
+ "KERNELDOC",
+ "KERNELRELEASE",
+ "KERNELVERSION",
+ "KGZIP",
+ "KLZOP",
+ "LC_COLLATE",
+ "LC_NUMERIC",
+ "LD",
+ "LDFLAGS_MODULE",
+ "LEX",
+ "LINUXINCLUDE",
+ "LZ4",
+ "LZMA",
+ "MAKE",
+ "MAKEFILES",
+ "MAKEFILE_LIST",
+ "MAKEFLAGS",
+ "MAKELEVEL",
+ "MAKEOVERRIDES",
+ "MAKE_COMMAND",
+ "MAKE_HOST",
+ "MAKE_TERMERR",
+ "MAKE_TERMOUT",
+ "MAKE_VERSION",
+ "MFLAGS",
+ "MODLIB",
+ "NM",
+ "NOSTDINC_FLAGS",
+ "O",
+ "OBJCOPY",
+ "OBJCOPYFLAGS",
+ "OBJDUMP",
+ "PAHOLE",
+ "PATCHLEVEL",
+ "PERL",
+ "PYTHON3",
+ "Q",
+ "RCS_FIND_IGNORE",
+ "READELF",
+ "REALMODE_CFLAGS",
+ "RESOLVE_BTFIDS",
+ "RETHUNK_CFLAGS",
+ "RETHUNK_RUSTFLAGS",
+ "RETPOLINE_CFLAGS",
+ "RETPOLINE_RUSTFLAGS",
+ "RETPOLINE_VDSO_CFLAGS",
+ "RUSTC",
+ "RUSTC_BOOTSTRAP",
+ "RUSTC_OR_CLIPPY",
+ "RUSTC_OR_CLIPPY_QUIET",
+ "RUSTDOC",
+ "RUSTFLAGS_KERNEL",
+ "RUSTFLAGS_MODULE",
+ "RUSTFMT",
+ "SRCARCH",
+ "STRIP",
+ "SUBLEVEL",
+ "SUFFIXES",
+ "TAR",
+ "UTS_MACHINE",
+ "VERSION",
+ "VPATH",
+ "XZ",
+ "YACC",
+ "ZSTD",
+ "building_out_of_srctree",
+ "cross_compiling",
+ "objtree",
+ "quiet",
+ "rust_common_flags",
+ "srcroot",
+ "srctree",
+ "sub_make_done",
+ "subdir",
+]
+
+
+class Environment:
+ """
+ Read-only accessor for kernel build environment variables.
+ """
+
+ @classmethod
+ def KERNEL_BUILD_VARIABLES(cls) -> dict[str, str]:
+ return {
+ name: value.strip()
+ for name in KERNEL_BUILD_VARIABLES_ALLOWLIST
+ if (value := os.getenv(name)) is not None and value.strip()
+ }
+
+ @classmethod
+ def ARCH(cls) -> str | None:
+ return os.getenv("ARCH")
+
+ @classmethod
+ def SRCARCH(cls) -> str | None:
+ return os.getenv("SRCARCH")
+
+ @classmethod
+ def CC(cls) -> str | None:
+ return os.getenv("CC")
+
+ @classmethod
+ def LD(cls) -> str | None:
+ return os.getenv("LD")
+
+ @classmethod
+ def AR(cls) -> str | None:
+ return os.getenv("AR")
+
+ @classmethod
+ def NM(cls) -> str | None:
+ return os.getenv("NM")
+
+ @classmethod
+ def OBJCOPY(cls) -> str | None:
+ return os.getenv("OBJCOPY")
+
+ @classmethod
+ def STRIP(cls) -> str | None:
+ return os.getenv("STRIP")
diff --git a/scripts/sbom/sbom/path_utils.py b/scripts/sbom/sbom/path_utils.py
new file mode 100644
index 0000000000000..29820046dc884
--- /dev/null
+++ b/scripts/sbom/sbom/path_utils.py
@@ -0,0 +1,22 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+import os
+from functools import lru_cache
+
+PathStr = str
+"""Filesystem path represented as a plain string for better performance than pathlib.Path."""
+
+
+def is_relative_to(path: PathStr, base: PathStr) -> bool:
+ return os.path.commonpath([path, base]) == base
+
+@lru_cache(maxsize=None)
+def has_link(path: PathStr) -> bool:
+ """Returns True if path or any of its ancestor directories is a symlink. Results are cached to avoid duplicate lstat syscalls."""
+ if os.path.islink(path):
+ return True
+ parent = os.path.dirname(path)
+ if parent == path:
+ return False
+ return has_link(parent)
diff --git a/scripts/sbom/sbom/sbom_logging.py b/scripts/sbom/sbom/sbom_logging.py
new file mode 100644
index 0000000000000..fbc53cc77ef44
--- /dev/null
+++ b/scripts/sbom/sbom/sbom_logging.py
@@ -0,0 +1,94 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+import logging
+import inspect
+from typing import Literal
+
+
+MessageTemplate = str
+
+
+class MessageLogger:
+ """Logger that suppresses repeated messages and stores a summary of all logged messages."""
+
+ _messages: dict[MessageTemplate, list[str]]
+ _message_counts: dict[MessageTemplate, int]
+ _repeated_logs_limit: int
+ """Maximum number of repeated messages of the same type to log before suppressing further output."""
+
+ def __init__(self, level: Literal["error", "warning"], repeated_logs_limit: int = 3) -> None:
+ self._level = level
+ self._messages = {}
+ self._message_counts = {}
+ self._repeated_logs_limit = repeated_logs_limit
+
+ def log(self, template: MessageTemplate, /, **kwargs: str) -> None:
+ """Log a message based on a template and optional variables. Example: `log("Missing {path}", path=str(p))`."""
+ message = template
+ for key, value in kwargs.items():
+ message = message.replace("{" + key + "}", value)
+ if template not in self._messages:
+ self._messages[template] = []
+ self._message_counts[template] = 0
+ self._message_counts[template] += 1
+ if self._message_counts[template] <= self._repeated_logs_limit:
+ if self._level == "error":
+ logging.error(message)
+ elif self._level == "warning":
+ logging.warning(message)
+ self._messages[template].append(message)
+
+ def get_summary(self) -> str:
+ if len(self._messages) == 0:
+ return ""
+ summary: list[str] = [f"Summarize {self._level}s:"]
+ for template, messages in self._messages.items():
+ for message in messages:
+ summary.append(message)
+ n_suppressed_messages = self._message_counts[template] - self._repeated_logs_limit
+ if n_suppressed_messages > 0:
+ instances = "instance" if n_suppressed_messages == 1 else "instances"
+ summary.append(f"... (Found {n_suppressed_messages} more {instances} of this {self._level})")
+ return "\n".join(summary)
+
+ def has_messages(self) -> bool:
+ return len(self._message_counts) > 0
+
+
+_warning_logger: MessageLogger
+_error_logger: MessageLogger
+
+
+def warning(msg_template: MessageTemplate, /, **kwargs: str) -> None:
+ _warning_logger.log(msg_template, **kwargs)
+
+
+def error(msg_template: MessageTemplate, /, **kwargs: str) -> None:
+ frame = inspect.currentframe()
+ caller_frame = frame.f_back if frame else None
+ info = inspect.getframeinfo(caller_frame) if caller_frame else None
+ if info:
+ msg_template = f'File "{info.filename}", line {info.lineno}, in {info.function}\n{msg_template}'
+ _error_logger.log(msg_template, **kwargs)
+
+
+def summarize_warnings() -> str:
+ return _warning_logger.get_summary()
+
+
+def summarize_errors() -> str:
+ return _error_logger.get_summary()
+
+
+def has_errors() -> bool:
+ return _error_logger.has_messages()
+
+
+def init() -> None:
+ global _warning_logger, _error_logger
+ _warning_logger = MessageLogger("warning")
+ _error_logger = MessageLogger("error")
+
+
+init()
diff --git a/scripts/sbom/sbom/spdx/__init__.py b/scripts/sbom/sbom/spdx/__init__.py
new file mode 100644
index 0000000000000..4097b59f8f172
--- /dev/null
+++ b/scripts/sbom/sbom/spdx/__init__.py
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+from .spdxId import SpdxId, SpdxIdGenerator
+from .serialization import JsonLdSpdxDocument
+
+__all__ = ["JsonLdSpdxDocument", "SpdxId", "SpdxIdGenerator"]
diff --git a/scripts/sbom/sbom/spdx/build.py b/scripts/sbom/sbom/spdx/build.py
new file mode 100644
index 0000000000000..a39ec9c09b16f
--- /dev/null
+++ b/scripts/sbom/sbom/spdx/build.py
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+from dataclasses import dataclass, field
+from sbom.spdx.core import DictionaryEntry, Element, Hash
+
+
+@dataclass(kw_only=True)
+class Build(Element):
+ """https://spdx.github.io/spdx-spec/v3.0.1/model/Build/Classes/Build/"""
+
+ type: str = field(init=False, default="build_Build")
+ build_buildType: str
+ build_buildId: str
+ build_environment: list[DictionaryEntry] = field(default_factory=list)
+ build_configSourceUri: list[str] = field(default_factory=list)
+ build_configSourceDigest: list[Hash] = field(default_factory=list)
diff --git a/scripts/sbom/sbom/spdx/core.py b/scripts/sbom/sbom/spdx/core.py
new file mode 100644
index 0000000000000..7eb376a1cd883
--- /dev/null
+++ b/scripts/sbom/sbom/spdx/core.py
@@ -0,0 +1,170 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+from dataclasses import dataclass, field
+
+from typing import Any, Literal
+from sbom.spdx.spdxId import SpdxId
+
+SPDX_SPEC_VERSION = "3.0.1"
+
+ExternalIdentifierType = Literal["email", "gitoid", "urlScheme"]
+HashAlgorithm = Literal["sha256", "sha512"]
+ProfileIdentifierType = Literal["core", "software", "build", "lite", "simpleLicensing"]
+RelationshipType = Literal[
+ "contains",
+ "generates",
+ "hasDeclaredLicense",
+ "hasInput",
+ "hasOutput",
+ "ancestorOf",
+ "hasDistributionArtifact",
+ "dependsOn",
+]
+RelationshipCompleteness = Literal["complete", "incomplete", "noAssertion"]
+
+
+@dataclass
+class SpdxObject:
+ def to_dict(self) -> dict[str, Any]:
+ def _to_dict(v: Any):
+ return v.to_dict() if hasattr(v, "to_dict") else v
+
+ d: dict[str, Any] = {}
+ for field_name in self.__dataclass_fields__:
+ value = getattr(self, field_name)
+ if value is None or value == [] or value == "":
+ continue
+
+ if isinstance(value, Element):
+ d[field_name] = value.spdxId
+ elif isinstance(value, list) and len(value) > 0 and isinstance(value[0], Element): # type: ignore
+ value: list[Element] = value
+ d[field_name] = [v.spdxId for v in value]
+ else:
+ d[field_name] = [_to_dict(v) for v in value] if isinstance(value, list) else _to_dict(value) # type: ignore
+ return d
+
+
+@dataclass(kw_only=True)
+class IntegrityMethod(SpdxObject):
+ """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/IntegrityMethod/"""
+
+
+@dataclass(kw_only=True)
+class Hash(IntegrityMethod):
+ """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/Hash/"""
+
+ type: str = field(init=False, default="Hash")
+ hashValue: str
+ algorithm: HashAlgorithm
+
+
+@dataclass(kw_only=True)
+class Element(SpdxObject):
+ """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/Element/"""
+
+ type: str = field(init=False, default="Element")
+ spdxId: SpdxId
+ creationInfo: str = "_:creationinfo"
+ name: str | None = None
+ verifiedUsing: list[Hash] = field(default_factory=list)
+ comment: str | None = None
+
+
+@dataclass(kw_only=True)
+class ExternalMap(SpdxObject):
+ """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/ExternalMap/"""
+
+ type: str = field(init=False, default="ExternalMap")
+ externalSpdxId: SpdxId
+
+
+@dataclass(kw_only=True)
+class NamespaceMap(SpdxObject):
+ """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/NamespaceMap/"""
+
+ type: str = field(init=False, default="NamespaceMap")
+ prefix: str
+ namespace: str
+
+
+@dataclass(kw_only=True)
+class ElementCollection(Element):
+ """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/ElementCollection/"""
+
+ type: str = field(init=False, default="ElementCollection")
+ element: list[Element] = field(default_factory=list)
+ rootElement: list[Element] = field(default_factory=list)
+ profileConformance: list[ProfileIdentifierType] = field(default_factory=list)
+
+
+@dataclass(kw_only=True)
+class SpdxDocument(ElementCollection):
+ """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/SpdxDocument/"""
+
+ type: str = field(init=False, default="SpdxDocument")
+ import_: list[ExternalMap] = field(default_factory=list)
+ namespaceMap: list[NamespaceMap] = field(default_factory=list)
+
+ def to_dict(self) -> dict[str, Any]:
+ return {("import" if k == "import_" else k): v for k, v in super().to_dict().items()}
+
+
+@dataclass(kw_only=True)
+class Agent(Element):
+ """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/Agent/"""
+
+ type: str = field(init=False, default="Agent")
+
+
+@dataclass(kw_only=True)
+class SoftwareAgent(Agent):
+ """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/SoftwareAgent/"""
+
+ type: str = field(init=False, default="SoftwareAgent")
+
+
+@dataclass(kw_only=True)
+class CreationInfo(SpdxObject):
+ """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/CreationInfo/"""
+
+ type: str = field(init=False, default="CreationInfo")
+ id: SpdxId = "_:creationinfo"
+ specVersion: str = SPDX_SPEC_VERSION
+ createdBy: list[Agent]
+ created: str
+ comment: str | None = None
+
+ def to_dict(self) -> dict[str, Any]:
+ return {("@id" if k == "id" else k): v for k, v in super().to_dict().items()}
+
+
+@dataclass(kw_only=True)
+class Relationship(Element):
+ """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/Relationship/"""
+
+ type: str = field(init=False, default="Relationship")
+ relationshipType: RelationshipType
+ from_: Element # underscore because 'from' is a reserved keyword
+ to: list[Element]
+ completeness: RelationshipCompleteness | None = None
+
+ def to_dict(self) -> dict[str, Any]:
+ return {("from" if k == "from_" else k): v for k, v in super().to_dict().items()}
+
+
+@dataclass(kw_only=True)
+class Artifact(Element):
+ """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/Artifact/"""
+
+ type: str = field(init=False, default="Artifact")
+
+
+@dataclass(kw_only=True)
+class DictionaryEntry(SpdxObject):
+ """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/DictionaryEntry/"""
+
+ type: str = field(init=False, default="DictionaryEntry")
+ key: str
+ value: str
diff --git a/scripts/sbom/sbom/spdx/serialization.py b/scripts/sbom/sbom/spdx/serialization.py
new file mode 100644
index 0000000000000..b4df7d368d467
--- /dev/null
+++ b/scripts/sbom/sbom/spdx/serialization.py
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+import json
+from typing import Any
+from sbom.path_utils import PathStr
+from sbom.spdx.core import SPDX_SPEC_VERSION, SpdxDocument, SpdxObject
+
+
+class JsonLdSpdxDocument:
+ """Represents an SPDX document in JSON-LD format for serialization."""
+
+ graph: list[SpdxObject]
+
+ def __init__(self, graph: list[SpdxObject]) -> None:
+ """
+ Initialize a JSON-LD SPDX document from a graph of SPDX objects.
+ The graph must contain a single SpdxDocument element.
+
+ Args:
+ graph: List of SPDX objects representing the complete SPDX document.
+ """
+ self.graph = graph
+
+ @property
+ def context(self) -> list[str | dict[str, str]]:
+ spdx_document = next(element for element in self.graph if isinstance(element, SpdxDocument))
+ return [
+ f"https://spdx.org/rdf/{SPDX_SPEC_VERSION}/spdx-context.jsonld",
+ {ns.prefix: ns.namespace for ns in spdx_document.namespaceMap},
+ ]
+
+ def to_dict(self) -> dict[str, Any]:
+ """
+ Convert the SPDX document to a dictionary representation suitable for JSON serialization.
+
+ Returns:
+ Dictionary with @context and @graph keys following JSON-LD format.
+ """
+ def _item_to_dict(item: SpdxObject) -> dict:
+ d = item.to_dict()
+ if isinstance(item, SpdxDocument):
+ d.pop("namespaceMap", None)
+ return d
+ return {
+ "@context": self.context,
+ "@graph": [_item_to_dict(item) for item in self.graph],
+ }
+
+ def save(self, path: PathStr, prettify: bool) -> None:
+ """
+ Save the SPDX document to a JSON file.
+
+ Args:
+ path: File path where the document will be saved.
+ prettify: Whether to pretty-print the JSON with indentation.
+ """
+ with open(path, "w", encoding="utf-8") as f:
+ if prettify:
+ json.dump(self.to_dict(), f, indent=2)
+ else:
+ json.dump(self.to_dict(), f, separators=(",", ":"))
diff --git a/scripts/sbom/sbom/spdx/simplelicensing.py b/scripts/sbom/sbom/spdx/simplelicensing.py
new file mode 100644
index 0000000000000..750ddd24ad895
--- /dev/null
+++ b/scripts/sbom/sbom/spdx/simplelicensing.py
@@ -0,0 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+from dataclasses import dataclass, field
+from sbom.spdx.core import Element
+
+
+@dataclass(kw_only=True)
+class AnyLicenseInfo(Element):
+ """https://spdx.github.io/spdx-spec/v3.0.1/model/SimpleLicensing/Classes/AnyLicenseInfo/"""
+
+ type: str = field(init=False, default="simplelicensing_AnyLicenseInfo")
+
+
+@dataclass(kw_only=True)
+class LicenseExpression(AnyLicenseInfo):
+ """https://spdx.github.io/spdx-spec/v3.0.1/model/SimpleLicensing/Classes/LicenseExpression/"""
+
+ type: str = field(init=False, default="simplelicensing_LicenseExpression")
+ simplelicensing_licenseExpression: str
diff --git a/scripts/sbom/sbom/spdx/software.py b/scripts/sbom/sbom/spdx/software.py
new file mode 100644
index 0000000000000..2f46de7c31679
--- /dev/null
+++ b/scripts/sbom/sbom/spdx/software.py
@@ -0,0 +1,69 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+from dataclasses import dataclass, field
+from typing import Literal
+from sbom.spdx.core import Artifact, ElementCollection, IntegrityMethod
+
+
+SbomType = Literal["source", "build"]
+FileKindType = Literal["file", "directory"]
+SoftwarePurpose = Literal[
+ "source",
+ "archive",
+ "library",
+ "file",
+ "data",
+ "configuration",
+ "executable",
+ "module",
+ "application",
+ "documentation",
+ "other",
+]
+ContentIdentifierType = Literal["gitoid", "swhid"]
+
+
+@dataclass(kw_only=True)
+class Sbom(ElementCollection):
+ """https://spdx.github.io/spdx-spec/v3.0.1/model/Software/Classes/Sbom/"""
+
+ type: str = field(init=False, default="software_Sbom")
+ software_sbomType: list[SbomType] = field(default_factory=list)
+
+
+@dataclass(kw_only=True)
+class ContentIdentifier(IntegrityMethod):
+ """https://spdx.github.io/spdx-spec/v3.0.1/model/Software/Classes/ContentIdentifier/"""
+
+ type: str = field(init=False, default="software_ContentIdentifier")
+ software_contentIdentifierType: ContentIdentifierType
+ software_contentIdentifierValue: str
+
+
+@dataclass(kw_only=True)
+class SoftwareArtifact(Artifact):
+ """https://spdx.github.io/spdx-spec/v3.0.1/model/Software/Classes/SoftwareArtifact/"""
+
+ type: str = field(init=False, default="software_Artifact")
+ software_primaryPurpose: SoftwarePurpose | None = None
+ software_copyrightText: str | None = None
+ software_contentIdentifier: list[ContentIdentifier] = field(default_factory=list)
+
+
+@dataclass(kw_only=True)
+class Package(SoftwareArtifact):
+ """https://spdx.github.io/spdx-spec/v3.0.1/model/Software/Classes/Package/"""
+
+ type: str = field(init=False, default="software_Package")
+ name: str # type: ignore
+ software_packageVersion: str | None = None
+
+
+@dataclass(kw_only=True)
+class File(SoftwareArtifact):
+ """https://spdx.github.io/spdx-spec/v3.0.1/model/Software/Classes/File/"""
+
+ type: str = field(init=False, default="software_File")
+ name: str # type: ignore
+ software_fileKind: FileKindType | None = None
diff --git a/scripts/sbom/sbom/spdx/spdxId.py b/scripts/sbom/sbom/spdx/spdxId.py
new file mode 100644
index 0000000000000..589e85c5f7064
--- /dev/null
+++ b/scripts/sbom/sbom/spdx/spdxId.py
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+from itertools import count
+from typing import Iterator
+
+SpdxId = str
+
+
+class SpdxIdGenerator:
+ _namespace: str
+ _prefix: str | None = None
+ _counter: Iterator[int]
+
+ def __init__(self, namespace: str, prefix: str | None = None) -> None:
+ """
+ Initialize the SPDX ID generator with a namespace.
+
+ Args:
+ namespace: The full namespace to use for generated IDs.
+ prefix: Optional. If provided, generated IDs will use this prefix instead of the full namespace.
+ """
+ self._namespace = namespace
+ self._prefix = prefix
+ self._counter = count(0)
+
+ def generate(self) -> SpdxId:
+ return f"{f'{self._prefix}:' if self._prefix else self._namespace}{next(self._counter)}"
+
+ @property
+ def prefix(self) -> str | None:
+ return self._prefix
+
+ @property
+ def namespace(self) -> str:
+ return self._namespace
diff --git a/scripts/sbom/sbom/spdx_graph/__init__.py b/scripts/sbom/sbom/spdx_graph/__init__.py
new file mode 100644
index 0000000000000..3557b1d51bf93
--- /dev/null
+++ b/scripts/sbom/sbom/spdx_graph/__init__.py
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+from .build_spdx_graphs import build_spdx_graphs
+from .spdx_graph_model import SpdxIdGeneratorCollection
+
+__all__ = ["build_spdx_graphs", "SpdxIdGeneratorCollection"]
diff --git a/scripts/sbom/sbom/spdx_graph/build_spdx_graphs.py b/scripts/sbom/sbom/spdx_graph/build_spdx_graphs.py
new file mode 100644
index 0000000000000..ee24e9eaf603c
--- /dev/null
+++ b/scripts/sbom/sbom/spdx_graph/build_spdx_graphs.py
@@ -0,0 +1,83 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+from datetime import datetime
+from typing import Protocol
+
+import logging
+from sbom.config import KernelSpdxDocumentKind
+from sbom.cmd_graph import CmdGraph
+from sbom.path_utils import PathStr
+from sbom.spdx_graph.kernel_file import KernelFileCollection
+from sbom.spdx_graph.spdx_graph_model import SpdxGraph, SpdxIdGeneratorCollection
+from sbom.spdx_graph.shared_spdx_elements import SharedSpdxElements
+from sbom.spdx_graph.spdx_source_graph import SpdxSourceGraph
+from sbom.spdx_graph.spdx_build_graph import SpdxBuildGraph
+from sbom.spdx_graph.spdx_output_graph import SpdxOutputGraph
+
+
+class SpdxGraphConfig(Protocol):
+ obj_tree: PathStr
+ src_tree: PathStr
+ created: datetime
+ build_type: str
+ build_id: str | None
+ package_license: str
+ package_version: str | None
+ package_copyright_text: str | None
+
+
+def build_spdx_graphs(
+ cmd_graph: CmdGraph,
+ spdx_id_generators: SpdxIdGeneratorCollection,
+ config: SpdxGraphConfig,
+) -> dict[KernelSpdxDocumentKind, SpdxGraph]:
+ """
+ Builds SPDX graphs (output, source, and build) based on a cmd dependency graph.
+ If the source and object trees are identical, no dedicated source graph can be created.
+ In that case the source files are added to the build graph instead.
+
+ Args:
+ cmd_graph: The dependency graph of a kernel build.
+ spdx_id_generators: Collection of SPDX ID generators.
+ config: Configuration options.
+
+ Returns:
+ Dictionary of SPDX graphs
+ """
+ shared_elements = SharedSpdxElements.create(spdx_id_generators.base, config.created)
+ kernel_files = KernelFileCollection.create(cmd_graph, config.obj_tree, config.src_tree, spdx_id_generators)
+ output_graph = SpdxOutputGraph.create(
+ root_files=list(kernel_files.output.values()),
+ shared_elements=shared_elements,
+ spdx_id_generators=spdx_id_generators,
+ config=config,
+ )
+ spdx_graphs: dict[KernelSpdxDocumentKind, SpdxGraph] = {
+ KernelSpdxDocumentKind.OUTPUT: output_graph,
+ }
+
+ if len(kernel_files.source) > 0:
+ spdx_graphs[KernelSpdxDocumentKind.SOURCE] = SpdxSourceGraph.create(
+ source_files=list(kernel_files.source.values()),
+ external_files=list(kernel_files.external.values()),
+ shared_elements=shared_elements,
+ spdx_id_generators=spdx_id_generators,
+ )
+ else:
+ logging.info(
+ "Skipped creating a dedicated source SBOM because source files cannot be "
+ "reliably classified when the source and object trees are identical. "
+ "Added source files to the build SBOM instead."
+ )
+
+ build_graph = SpdxBuildGraph.create(
+ cmd_graph,
+ kernel_files,
+ shared_elements,
+ output_graph.high_level_build_element,
+ spdx_id_generators,
+ )
+ spdx_graphs[KernelSpdxDocumentKind.BUILD] = build_graph
+
+ return spdx_graphs
diff --git a/scripts/sbom/sbom/spdx_graph/kernel_file.py b/scripts/sbom/sbom/spdx_graph/kernel_file.py
new file mode 100644
index 0000000000000..505f25f66ebba
--- /dev/null
+++ b/scripts/sbom/sbom/spdx_graph/kernel_file.py
@@ -0,0 +1,315 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+from dataclasses import dataclass
+from enum import Enum
+import hashlib
+import os
+import re
+from sbom.cmd_graph import CmdGraph
+from sbom.path_utils import PathStr, is_relative_to
+from sbom.spdx import SpdxId, SpdxIdGenerator
+from sbom.spdx.core import Hash
+from sbom.spdx.software import ContentIdentifier, File, SoftwarePurpose
+import sbom.sbom_logging as sbom_logging
+from sbom.spdx_graph.spdx_graph_model import SpdxIdGeneratorCollection
+
+
+class KernelFileLocation(Enum):
+ """Represents the location of a file relative to the source/object trees."""
+
+ SOURCE_TREE = "source_tree"
+ """File is located in the source tree."""
+ OBJ_TREE = "obj_tree"
+ """File is located in the object tree."""
+ EXTERNAL = "external"
+ """File is located outside both source and object trees."""
+ BOTH = "both"
+ """File is located in a folder that is both source and object tree."""
+
+
+@dataclass
+class KernelFile:
+ """kernel-specific metadata used to generate an SPDX File element."""
+
+ absolute_path: PathStr
+ """Absolute path of the file."""
+ file_location: KernelFileLocation
+ """Location of the file relative to the source/object trees."""
+ name: str
+ """Name of the file element. Should be relative to the source tree if
+ file_location equals SOURCE_TREE and relative to the object tree if
+ file_location equals OBJ_TREE. If file_location equals EXTERNAL, the
+ absolute path is used."""
+ license_identifier: str | None
+ """SPDX license ID if file_location equals SOURCE_TREE or BOTH; otherwise None."""
+ spdx_id_generator: SpdxIdGenerator
+ """Generator for the SPDX ID of the file element."""
+
+ _spdx_file_element: File | None = None
+
+ @classmethod
+ def create(
+ cls,
+ absolute_path: PathStr,
+ obj_tree: PathStr,
+ src_tree: PathStr,
+ spdx_id_generators: SpdxIdGeneratorCollection,
+ is_output: bool,
+ ) -> "KernelFile":
+ is_in_obj_tree = is_relative_to(absolute_path, obj_tree)
+ is_in_src_tree = is_relative_to(absolute_path, src_tree)
+
+ # file element name should be relative to output or src tree if possible
+ if not is_in_src_tree and not is_in_obj_tree:
+ file_element_name = str(absolute_path)
+ file_location = KernelFileLocation.EXTERNAL
+ spdx_id_generator = spdx_id_generators.source if src_tree != obj_tree else spdx_id_generators.build
+ elif is_in_src_tree and src_tree == obj_tree:
+ file_element_name = os.path.relpath(absolute_path, obj_tree)
+ file_location = KernelFileLocation.BOTH
+ spdx_id_generator = spdx_id_generators.output if is_output else spdx_id_generators.build
+ elif is_in_obj_tree:
+ file_element_name = os.path.relpath(absolute_path, obj_tree)
+ file_location = KernelFileLocation.OBJ_TREE
+ spdx_id_generator = spdx_id_generators.output if is_output else spdx_id_generators.build
+ else:
+ file_element_name = os.path.relpath(absolute_path, src_tree)
+ file_location = KernelFileLocation.SOURCE_TREE
+ spdx_id_generator = spdx_id_generators.source
+
+ # parse spdx license identifier
+ license_identifier = (
+ _parse_spdx_license_identifier(absolute_path)
+ if file_location == KernelFileLocation.SOURCE_TREE or file_location == KernelFileLocation.BOTH
+ else None
+ )
+
+ return KernelFile(
+ absolute_path,
+ file_location,
+ file_element_name,
+ license_identifier,
+ spdx_id_generator,
+ )
+
+ @property
+ def spdx_file_element(self) -> File:
+ if self._spdx_file_element is None:
+ self._spdx_file_element = _build_file_element(
+ self.absolute_path,
+ self.name,
+ self.spdx_id_generator.generate(),
+ self.file_location,
+ )
+ return self._spdx_file_element
+
+
+@dataclass
+class KernelFileCollection:
+ """Collection of kernel files."""
+
+ source: dict[PathStr, KernelFile]
+ build: dict[PathStr, KernelFile]
+ output: dict[PathStr, KernelFile]
+ external: dict[PathStr, KernelFile]
+
+ @classmethod
+ def create(
+ cls,
+ cmd_graph: CmdGraph,
+ obj_tree: PathStr,
+ src_tree: PathStr,
+ spdx_id_generators: SpdxIdGeneratorCollection,
+ ) -> "KernelFileCollection":
+ source: dict[PathStr, KernelFile] = {}
+ build: dict[PathStr, KernelFile] = {}
+ output: dict[PathStr, KernelFile] = {}
+ external: dict[PathStr, KernelFile] = {}
+ root_node_paths = {node.absolute_path for node in cmd_graph.roots}
+ for node in cmd_graph:
+ is_root = node.absolute_path in root_node_paths
+ kernel_file = KernelFile.create(
+ node.absolute_path,
+ obj_tree,
+ src_tree,
+ spdx_id_generators,
+ is_root,
+ )
+ if is_root:
+ output[kernel_file.absolute_path] = kernel_file
+ elif kernel_file.file_location == KernelFileLocation.SOURCE_TREE:
+ source[kernel_file.absolute_path] = kernel_file
+ elif kernel_file.file_location == KernelFileLocation.EXTERNAL:
+ external[kernel_file.absolute_path] = kernel_file
+ else:
+ build[kernel_file.absolute_path] = kernel_file
+
+ return KernelFileCollection(source, build, output, external)
+
+ def to_dict(self) -> dict[PathStr, KernelFile]:
+ return {**self.source, **self.build, **self.output, **self.external}
+
+
+def _build_file_element(absolute_path: PathStr, name: str, spdx_id: SpdxId, file_location: KernelFileLocation) -> File:
+ verifiedUsing: list[Hash] = []
+ content_identifier: list[ContentIdentifier] = []
+ if os.path.isfile(absolute_path):
+ verifiedUsing = [Hash(algorithm="sha256", hashValue=_sha256(absolute_path))]
+ content_identifier = [
+ ContentIdentifier(
+ software_contentIdentifierType="gitoid",
+ software_contentIdentifierValue=_git_blob_oid(absolute_path),
+ )
+ ]
+ elif file_location == KernelFileLocation.EXTERNAL:
+ sbom_logging.warning(
+ "Cannot compute hash for {absolute_path} because file does not exist.",
+ absolute_path=absolute_path,
+ )
+ else:
+ sbom_logging.error(
+ "Cannot compute hash for {absolute_path} because file does not exist.",
+ absolute_path=absolute_path,
+ )
+
+ # primary purpose
+ primary_purpose = _get_primary_purpose(absolute_path)
+
+ return File(
+ spdxId=spdx_id,
+ name=name,
+ verifiedUsing=verifiedUsing,
+ software_primaryPurpose=primary_purpose,
+ software_contentIdentifier=content_identifier,
+ )
+
+
+def _sha256(file_path: PathStr, chunk_size: int = 1 << 20) -> str:
+ """Compute the SHA-256 hex digest of a file, reading it in chunks of chunk_size bytes."""
+ h = hashlib.sha256()
+ with open(file_path, "rb") as f:
+ for chunk in iter(lambda: f.read(chunk_size), b""):
+ h.update(chunk)
+ return h.hexdigest()
+
+
+def _git_blob_oid(file_path: str, chunk_size: int = 1 << 20) -> str:
+ """Compute the Git blob object ID (SHA-1 hex) for a file, like `git hash-object`, reading it in chunks of chunk_size bytes."""
+ h = hashlib.sha1()
+ h.update(f"blob {os.path.getsize(file_path)}\0".encode())
+ with open(file_path, "rb") as f:
+ for chunk in iter(lambda: f.read(chunk_size), b""):
+ h.update(chunk)
+ return h.hexdigest()
+
+
+# REUSE-IgnoreStart
+SPDX_LICENSE_IDENTIFIER_PATTERN = re.compile(
+ r"SPDX-License-Identifier:" # literal tag
+ r"\s*" # optional whitespace after colon
+ r"(?P<id>.*?)" # license expression (non-greedy, stops before terminator)
+ r"(?:\s*" # optional whitespace before terminator (not captured)
+ r"(-->|\*/|$))", # terminator: XML "-->", C-style "*/", or end of line
+ re.MULTILINE, # match end of each line, not just end of string
+)
+# REUSE-IgnoreEnd
+
+
+def _parse_spdx_license_identifier(absolute_path: str, max_bytes: int = 512) -> str | None:
+ """
+ Extracts the SPDX-License-Identifier from the beginning of a source file.
+
+ Args:
+ absolute_path: Path to the source file.
+ max_bytes: Maximum number of bytes to scan for the license identifier.
+
+ Returns:
+ The license identifier string (e.g., 'GPL-2.0-only') if found, otherwise None.
+ """
+ try:
+ with open(absolute_path, "r", encoding="utf-8") as f:
+ match = SPDX_LICENSE_IDENTIFIER_PATTERN.search(f.read(max_bytes))
+ if match:
+ return match.group("id")
+ except (UnicodeDecodeError, OSError):
+ return None
+ return None
+
+
+def _get_primary_purpose(absolute_path: PathStr) -> SoftwarePurpose | None:
+ def ends_with(suffixes: list[str]) -> bool:
+ return any(absolute_path.endswith(suffix) for suffix in suffixes)
+
+ def includes_path_segments(path_segments: list[str]) -> bool:
+ return any(segment in absolute_path for segment in path_segments)
+
+ # Source code
+ if ends_with([".c", ".h", ".S", ".s", ".rs", ".pl", "gen_smb1_mapping", "gen_smb2_mapping"]):
+ return "source"
+
+ # Libraries
+ if ends_with([".a", ".so", ".so.raw", ".rlib"]):
+ return "library"
+
+ # Archives
+ if ends_with([".xz", ".cpio", ".gz", ".tar", ".zip", "piggy_data"]):
+ return "archive"
+
+ # Applications
+ if ends_with(["bzImage", "Image", ".efi"]):
+ return "application"
+
+ # Executables / machine code
+ if ends_with([".bin", ".elf", "vmlinux", "vmlinux.unstripped", "vmlinuz", "bpfilter_umh"]):
+ return "executable"
+
+ # Kernel modules
+ if ends_with([".ko"]):
+ return "module"
+
+ # Data files
+ if ends_with(
+ [
+ ".tbl",
+ ".relocs",
+ ".rmeta",
+ ".in",
+ ".dbg",
+ ".x509",
+ ".pbm",
+ ".ppm",
+ ".dtb",
+ ".uc",
+ ".inc",
+ ".dts",
+ ".dtsi",
+ ".dtbo",
+ ".xml",
+ ".ro",
+ "initramfs_inc_data",
+ "default_cpio_list",
+ "x509_certificate_list",
+ "utf8data.c_shipped",
+ "blacklist_hash_list",
+ "x509_revocation_list",
+ "cpucaps",
+ "sysreg",
+ "mach-types",
+ ]
+ ) or includes_path_segments(["drivers/gpu/drm/radeon/reg_srcs/"]):
+ return "data"
+
+ # Configuration files
+ if ends_with([".pem", ".key", ".conf", ".config", ".cfg", ".bconf"]):
+ return "configuration"
+
+ # Documentation
+ if ends_with([".md"]):
+ return "documentation"
+
+ # Other / miscellaneous
+ if ends_with([".o", ".tmp"]):
+ return "other"
+
+ sbom_logging.warning("Could not infer primary purpose for {absolute_path}", absolute_path=absolute_path)
diff --git a/scripts/sbom/sbom/spdx_graph/shared_spdx_elements.py b/scripts/sbom/sbom/spdx_graph/shared_spdx_elements.py
new file mode 100644
index 0000000000000..115e8778a4671
--- /dev/null
+++ b/scripts/sbom/sbom/spdx_graph/shared_spdx_elements.py
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from sbom.spdx.core import CreationInfo, SoftwareAgent
+from sbom.spdx.spdxId import SpdxIdGenerator
+
+
+@dataclass(frozen=True)
+class SharedSpdxElements:
+ agent: SoftwareAgent
+ creation_info: CreationInfo
+
+ @classmethod
+ def create(cls, spdx_id_generator: SpdxIdGenerator, created: datetime) -> "SharedSpdxElements":
+ """
+ Creates shared SPDX elements used across multiple documents.
+
+ Args:
+ spdx_id_generator: Generator for creating SPDX IDs.
+ created: SPDX 'created' property used for the creation info.
+
+ Returns:
+ SharedSpdxElements with agent and creation info.
+ """
+ agent = SoftwareAgent(
+ spdxId=spdx_id_generator.generate(),
+ name="KernelSbom",
+ )
+ creation_info = CreationInfo(createdBy=[agent], created=created.astimezone(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"))
+ return SharedSpdxElements(agent=agent, creation_info=creation_info)
diff --git a/scripts/sbom/sbom/spdx_graph/spdx_build_graph.py b/scripts/sbom/sbom/spdx_graph/spdx_build_graph.py
new file mode 100644
index 0000000000000..4d738bc3b3e24
--- /dev/null
+++ b/scripts/sbom/sbom/spdx_graph/spdx_build_graph.py
@@ -0,0 +1,318 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+from dataclasses import dataclass
+from typing import Mapping
+from sbom.cmd_graph import CmdGraph
+from sbom.path_utils import PathStr
+from sbom.spdx import SpdxIdGenerator
+from sbom.spdx.build import Build
+from sbom.spdx.core import ExternalMap, NamespaceMap, Relationship, SpdxDocument
+from sbom.spdx.software import File, Sbom
+from sbom.spdx_graph.kernel_file import KernelFileCollection
+from sbom.spdx_graph.shared_spdx_elements import SharedSpdxElements
+from sbom.spdx_graph.spdx_graph_model import SpdxGraph, SpdxIdGeneratorCollection
+from sbom.spdx_graph.spdx_source_graph import source_file_license_elements
+
+
+@dataclass
+class SpdxBuildGraph(SpdxGraph):
+ """SPDX graph representing build dependencies connecting source files and
+ distributable output files"""
+
+ @classmethod
+ def create(
+ cls,
+ cmd_graph: CmdGraph,
+ kernel_files: KernelFileCollection,
+ shared_elements: SharedSpdxElements,
+ high_level_build_element: Build,
+ spdx_id_generators: SpdxIdGeneratorCollection,
+ ) -> "SpdxBuildGraph":
+ if len(kernel_files.source) > 0:
+ return _create_spdx_build_graph(
+ cmd_graph,
+ kernel_files,
+ shared_elements,
+ high_level_build_element,
+ spdx_id_generators,
+ )
+ else:
+ return _create_spdx_build_graph_with_mixed_sources(
+ cmd_graph,
+ kernel_files,
+ shared_elements,
+ high_level_build_element,
+ spdx_id_generators,
+ )
+
+
+def _create_spdx_build_graph(
+ cmd_graph: CmdGraph,
+ kernel_files: KernelFileCollection,
+ shared_elements: SharedSpdxElements,
+ high_level_build_element: Build,
+ spdx_id_generators: SpdxIdGeneratorCollection,
+) -> SpdxBuildGraph:
+ """
+ Creates an SPDX build graph where source and output files are referenced
+ from external documents.
+
+ Args:
+ cmd_graph: The dependency graph of a kernel build.
+ kernel_files: Collection of categorized kernel files involved in the build.
+ shared_elements: SPDX elements shared across multiple documents.
+ high_level_build_element: The high-level Build element referenced by the build graph.
+ spdx_id_generators: Collection of generators for SPDX element IDs.
+
+ Returns:
+ SpdxBuildGraph: The SPDX build graph connecting source files and distributable output files.
+ """
+ # SpdxDocument
+ build_spdx_document = SpdxDocument(
+ spdxId=spdx_id_generators.build.generate(),
+ profileConformance=["core", "software", "build"],
+ namespaceMap=[
+ NamespaceMap(prefix=generator.prefix, namespace=generator.namespace)
+ for generator in [
+ spdx_id_generators.build,
+ spdx_id_generators.source,
+ spdx_id_generators.output,
+ spdx_id_generators.base,
+ ]
+ if generator.prefix is not None
+ ],
+ )
+
+ # Sbom
+ build_sbom = Sbom(
+ spdxId=spdx_id_generators.build.generate(),
+ software_sbomType=["build"],
+ )
+
+ # Src and object tree elements
+ obj_tree_element = File(
+ spdxId=spdx_id_generators.build.generate(),
+ name="$(obj_tree)",
+ software_fileKind="directory",
+ )
+ obj_tree_contains_relationship = Relationship(
+ spdxId=spdx_id_generators.build.generate(),
+ relationshipType="contains",
+ from_=obj_tree_element,
+ to=[],
+ )
+
+ # File elements
+ build_file_elements = [file.spdx_file_element for file in kernel_files.build.values()]
+ file_relationships = _file_relationships(
+ cmd_graph=cmd_graph,
+ file_elements={key: file.spdx_file_element for key, file in kernel_files.to_dict().items()},
+ high_level_build_element=high_level_build_element,
+ spdx_id_generator=spdx_id_generators.build,
+ )
+
+ # Update relationships
+ build_spdx_document.rootElement = [build_sbom]
+
+ build_spdx_document.import_ = [
+ *(
+ ExternalMap(externalSpdxId=file.spdx_file_element.spdxId)
+ for file in (*kernel_files.source.values(), *kernel_files.external.values())
+ ),
+ ExternalMap(externalSpdxId=high_level_build_element.spdxId),
+ *(ExternalMap(externalSpdxId=file.spdx_file_element.spdxId) for file in kernel_files.output.values()),
+ ]
+
+ build_sbom.rootElement = [obj_tree_element]
+ build_sbom.element = [
+ obj_tree_element,
+ obj_tree_contains_relationship,
+ *build_file_elements,
+ *file_relationships,
+ ]
+
+ obj_tree_contains_relationship.to = [
+ *build_file_elements,
+ *(file.spdx_file_element for file in kernel_files.output.values()),
+ ]
+
+ # create Spdx graphs
+ build_graph = SpdxBuildGraph(
+ build_spdx_document,
+ shared_elements.agent,
+ shared_elements.creation_info,
+ build_sbom,
+ )
+ return build_graph
+
+
+def _create_spdx_build_graph_with_mixed_sources(
+ cmd_graph: CmdGraph,
+ kernel_files: KernelFileCollection,
+ shared_elements: SharedSpdxElements,
+ high_level_build_element: Build,
+ spdx_id_generators: SpdxIdGeneratorCollection,
+) -> SpdxBuildGraph:
+ """
+ Creates an SPDX build graph where only output files are referenced from
+ an external document. Source files are included directly in the build graph.
+
+ Args:
+ cmd_graph: The dependency graph of a kernel build.
+ kernel_files: Collection of categorized kernel files involved in the build.
+ shared_elements: SPDX elements shared across multiple documents.
+ high_level_build_element: The high-level Build element referenced by the build graph.
+ spdx_id_generators: Collection of generators for SPDX element IDs.
+
+ Returns:
+ SpdxBuildGraph: The SPDX build graph connecting source files and distributable output files.
+ """
+ # SpdxDocument
+ build_spdx_document = SpdxDocument(
+ spdxId=spdx_id_generators.build.generate(),
+ profileConformance=["core", "software", "build"],
+ namespaceMap=[
+ NamespaceMap(prefix=generator.prefix, namespace=generator.namespace)
+ for generator in [
+ spdx_id_generators.build,
+ spdx_id_generators.output,
+ spdx_id_generators.base,
+ ]
+ if generator.prefix is not None
+ ],
+ )
+
+ # Sbom
+ build_sbom = Sbom(
+ spdxId=spdx_id_generators.build.generate(),
+ software_sbomType=["build"],
+ )
+
+ # File elements
+ build_file_elements = [file.spdx_file_element for file in kernel_files.build.values()]
+ external_file_elements = [file.spdx_file_element for file in kernel_files.external.values()]
+ file_relationships = _file_relationships(
+ cmd_graph=cmd_graph,
+ file_elements={key: file.spdx_file_element for key, file in kernel_files.to_dict().items()},
+ high_level_build_element=high_level_build_element,
+ spdx_id_generator=spdx_id_generators.build,
+ )
+
+ # Source file license elements
+ source_file_license_identifiers, source_file_license_relationships = source_file_license_elements(
+ list(kernel_files.build.values()), spdx_id_generators.build
+ )
+
+ # Update relationships
+ build_spdx_document.rootElement = [build_sbom]
+ root_file_elements = [file.spdx_file_element for file in kernel_files.output.values()]
+ build_spdx_document.import_ = [
+ ExternalMap(externalSpdxId=high_level_build_element.spdxId),
+ *(ExternalMap(externalSpdxId=file.spdxId) for file in root_file_elements),
+ ]
+
+ build_sbom.rootElement = [*root_file_elements]
+ build_sbom.element = [
+ *build_file_elements,
+ *external_file_elements,
+ *source_file_license_identifiers,
+ *source_file_license_relationships,
+ *file_relationships,
+ ]
+
+ build_graph = SpdxBuildGraph(
+ build_spdx_document,
+ shared_elements.agent,
+ shared_elements.creation_info,
+ build_sbom,
+ )
+ return build_graph
+
+
+def _file_relationships(
+ cmd_graph: CmdGraph,
+ file_elements: Mapping[PathStr, File],
+ high_level_build_element: Build,
+ spdx_id_generator: SpdxIdGenerator,
+) -> list[Build | Relationship]:
+ """
+ Construct SPDX Build and Relationship elements representing dependency
+ relationships in the cmd graph.
+
+ Args:
+ cmd_graph: The dependency graph of a kernel build.
+ file_elements: Mapping of filesystem paths (PathStr) to their
+ corresponding SPDX File elements.
+ high_level_build_element: The SPDX Build element representing the overall build process/root.
+ spdx_id_generator: Generator for unique SPDX IDs.
+
+ Returns:
+ list[Build | Relationship]: List of SPDX Build and Relationship elements
+ """
+ high_level_build_ancestorOf_relationship = Relationship(
+ spdxId=spdx_id_generator.generate(),
+ relationshipType="ancestorOf",
+ from_=high_level_build_element,
+ completeness="complete",
+ to=[],
+ )
+
+ # Create a relationship between each node (output file)
+ # and its children (input files)
+ build_and_relationship_elements: list[Build | Relationship] = [high_level_build_ancestorOf_relationship]
+ for node in cmd_graph:
+ # .cmd file dependencies
+ if node.cmd_file is not None:
+ build_element = Build(
+ spdxId=spdx_id_generator.generate(),
+ build_buildType=high_level_build_element.build_buildType,
+ build_buildId=high_level_build_element.build_buildId,
+ comment=node.cmd_file.savedcmd,
+ )
+ build_and_relationship_elements.append(build_element)
+
+ if node.cmd_file_dependencies:
+ hasInput_relationship = Relationship(
+ spdxId=spdx_id_generator.generate(),
+ relationshipType="hasInput",
+ from_=build_element,
+ to=[file_elements[dep.absolute_path] for dep in node.cmd_file_dependencies],
+ )
+ build_and_relationship_elements.append(hasInput_relationship)
+
+ hasOutput_relationship = Relationship(
+ spdxId=spdx_id_generator.generate(),
+ relationshipType="hasOutput",
+ from_=build_element,
+ to=[file_elements[node.absolute_path]],
+ )
+ build_and_relationship_elements.append(hasOutput_relationship)
+
+ high_level_build_ancestorOf_relationship.to.append(build_element)
+
+ # incbin dependencies
+ if len(node.incbin_dependencies) > 0:
+ incbin_dependsOn_relationship = Relationship(
+ spdxId=spdx_id_generator.generate(),
+ relationshipType="dependsOn",
+ comment="\n".join([incbin_dependency.full_statement for incbin_dependency in node.incbin_dependencies]),
+ from_=file_elements[node.absolute_path],
+ to=[
+ file_elements[incbin_dependency.node.absolute_path]
+ for incbin_dependency in node.incbin_dependencies
+ ],
+ )
+ build_and_relationship_elements.append(incbin_dependsOn_relationship)
+
+ # hardcoded dependencies
+ if len(node.hardcoded_dependencies) > 0:
+ hardcoded_dependency_relationship = Relationship(
+ spdxId=spdx_id_generator.generate(),
+ relationshipType="dependsOn",
+ from_=file_elements[node.absolute_path],
+ to=[file_elements[n.absolute_path] for n in node.hardcoded_dependencies],
+ )
+ build_and_relationship_elements.append(hardcoded_dependency_relationship)
+
+ return build_and_relationship_elements
diff --git a/scripts/sbom/sbom/spdx_graph/spdx_graph_model.py b/scripts/sbom/sbom/spdx_graph/spdx_graph_model.py
new file mode 100644
index 0000000000000..682194d4362a2
--- /dev/null
+++ b/scripts/sbom/sbom/spdx_graph/spdx_graph_model.py
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+from dataclasses import dataclass
+from sbom.spdx.core import CreationInfo, SoftwareAgent, SpdxDocument, SpdxObject
+from sbom.spdx.software import Sbom
+from sbom.spdx.spdxId import SpdxIdGenerator
+
+
+@dataclass
+class SpdxGraph:
+ """Represents the complete graph of a single SPDX document."""
+
+ spdx_document: SpdxDocument
+ agent: SoftwareAgent
+ creation_info: CreationInfo
+ sbom: Sbom
+
+ def to_list(self) -> list[SpdxObject]:
+ return [
+ self.spdx_document,
+ self.agent,
+ self.creation_info,
+ self.sbom,
+ *self.sbom.element,
+ ]
+
+
+@dataclass
+class SpdxIdGeneratorCollection:
+ """Holds SPDX ID generators for different document types to ensure globally unique SPDX IDs."""
+
+ base: SpdxIdGenerator
+ source: SpdxIdGenerator
+ build: SpdxIdGenerator
+ output: SpdxIdGenerator
diff --git a/scripts/sbom/sbom/spdx_graph/spdx_output_graph.py b/scripts/sbom/sbom/spdx_graph/spdx_output_graph.py
new file mode 100644
index 0000000000000..ff9b2c31fb04f
--- /dev/null
+++ b/scripts/sbom/sbom/spdx_graph/spdx_output_graph.py
@@ -0,0 +1,187 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+from dataclasses import dataclass
+import os
+from typing import Protocol
+from sbom.environment import Environment
+from sbom.path_utils import PathStr
+from sbom.spdx.build import Build
+from sbom.spdx.core import DictionaryEntry, NamespaceMap, Relationship, SpdxDocument
+from sbom.spdx.simplelicensing import LicenseExpression
+from sbom.spdx.software import File, Package, Sbom
+from sbom.spdx.spdxId import SpdxIdGenerator
+from sbom.spdx_graph.kernel_file import KernelFile
+from sbom.spdx_graph.shared_spdx_elements import SharedSpdxElements
+from sbom.spdx_graph.spdx_graph_model import SpdxGraph, SpdxIdGeneratorCollection
+
+
+class SpdxOutputGraphConfig(Protocol):
+ obj_tree: PathStr
+ src_tree: PathStr
+ build_type: str
+ build_id: str | None
+ package_license: str
+ package_version: str | None
+ package_copyright_text: str | None
+
+
+@dataclass
+class SpdxOutputGraph(SpdxGraph):
+ """SPDX graph representing distributable output files"""
+
+ high_level_build_element: Build
+
+ @classmethod
+ def create(
+ cls,
+ root_files: list[KernelFile],
+ shared_elements: SharedSpdxElements,
+ spdx_id_generators: SpdxIdGeneratorCollection,
+ config: SpdxOutputGraphConfig,
+ ) -> "SpdxOutputGraph":
+ """
+ Args:
+ root_files: List of distributable output files which act as roots
+ of the dependency graph.
+ shared_elements: Shared SPDX elements used across multiple documents.
+ spdx_id_generators: Collection of SPDX ID generators.
+ config: Configuration options.
+
+ Returns:
+ SpdxOutputGraph: The SPDX output graph.
+ """
+ # SpdxDocument
+ spdx_document = SpdxDocument(
+ spdxId=spdx_id_generators.output.generate(),
+ profileConformance=["core", "software", "build", "simpleLicensing"],
+ namespaceMap=[
+ NamespaceMap(prefix=generator.prefix, namespace=generator.namespace)
+ for generator in [spdx_id_generators.output, spdx_id_generators.base]
+ if generator.prefix is not None
+ ],
+ )
+
+ # Sbom
+ sbom = Sbom(
+ spdxId=spdx_id_generators.output.generate(),
+ software_sbomType=["build"],
+ )
+
+ # High-level Build elements
+ config_source_element = KernelFile.create(
+ absolute_path=os.path.join(config.obj_tree, ".config"),
+ obj_tree=config.obj_tree,
+ src_tree=config.src_tree,
+ spdx_id_generators=spdx_id_generators,
+ is_output=True,
+ ).spdx_file_element
+ high_level_build_element, high_level_build_element_hasOutput_relationship = _high_level_build_elements(
+ config.build_type,
+ config.build_id,
+ config_source_element,
+ spdx_id_generators.output,
+ )
+
+ # Root file elements
+ root_file_elements: list[File] = [file.spdx_file_element for file in root_files]
+
+ # Package elements
+ package_elements = [
+ Package(
+ spdxId=spdx_id_generators.output.generate(),
+ name=_get_package_name(file.name),
+ software_packageVersion=config.package_version,
+ software_copyrightText=config.package_copyright_text,
+ comment=f"Architecture={arch}" if (arch := Environment.ARCH() or Environment.SRCARCH()) else None,
+ software_primaryPurpose=file.software_primaryPurpose,
+ )
+ for file in root_file_elements
+ ]
+ package_hasDistributionArtifact_file_relationships = [
+ Relationship(
+ spdxId=spdx_id_generators.output.generate(),
+ relationshipType="hasDistributionArtifact",
+ from_=package,
+ to=[file],
+ )
+ for package, file in zip(package_elements, root_file_elements)
+ ]
+ package_license_expression = LicenseExpression(
+ spdxId=spdx_id_generators.output.generate(),
+ simplelicensing_licenseExpression=config.package_license,
+ )
+ package_hasDeclaredLicense_relationships = [
+ Relationship(
+ spdxId=spdx_id_generators.output.generate(),
+ relationshipType="hasDeclaredLicense",
+ from_=package,
+ to=[package_license_expression],
+ )
+ for package in package_elements
+ ]
+
+ # Update relationships
+ spdx_document.rootElement = [sbom]
+
+ sbom.rootElement = [*package_elements]
+ sbom.element = [
+ config_source_element,
+ high_level_build_element,
+ high_level_build_element_hasOutput_relationship,
+ *root_file_elements,
+ *package_elements,
+ *package_hasDistributionArtifact_file_relationships,
+ package_license_expression,
+ *package_hasDeclaredLicense_relationships,
+ ]
+
+ high_level_build_element_hasOutput_relationship.to = [*root_file_elements]
+
+ output_graph = SpdxOutputGraph(
+ spdx_document,
+ shared_elements.agent,
+ shared_elements.creation_info,
+ sbom,
+ high_level_build_element,
+ )
+ return output_graph
+
+
+def _get_package_name(filename: str) -> str:
+ """
+ Generates a SPDX package name from a filename.
+ Kernel images (bzImage, Image) get a descriptive name, others use the basename of the file.
+ """
+ KERNEL_FILENAMES = ["bzImage", "Image"]
+ basename = os.path.basename(filename)
+ return f"Linux Kernel ({basename})" if basename in KERNEL_FILENAMES else basename
+
+
+def _high_level_build_elements(
+ build_type: str,
+ build_id: str | None,
+ config_source_element: File,
+ spdx_id_generator: SpdxIdGenerator,
+) -> tuple[Build, Relationship]:
+ build_spdxId = spdx_id_generator.generate()
+ high_level_build_element = Build(
+ spdxId=build_spdxId,
+ build_buildType=build_type,
+ build_buildId=build_id if build_id is not None else build_spdxId,
+ build_environment=[
+ DictionaryEntry(key=key, value=value)
+ for key, value in Environment.KERNEL_BUILD_VARIABLES().items()
+ if value
+ ],
+ build_configSourceUri=[config_source_element.spdxId],
+ build_configSourceDigest=config_source_element.verifiedUsing,
+ )
+
+ high_level_build_element_hasOutput_relationship = Relationship(
+ spdxId=spdx_id_generator.generate(),
+ relationshipType="hasOutput",
+ from_=high_level_build_element,
+ to=[],
+ )
+ return high_level_build_element, high_level_build_element_hasOutput_relationship
diff --git a/scripts/sbom/sbom/spdx_graph/spdx_source_graph.py b/scripts/sbom/sbom/spdx_graph/spdx_source_graph.py
new file mode 100644
index 0000000000000..90880212dedd9
--- /dev/null
+++ b/scripts/sbom/sbom/spdx_graph/spdx_source_graph.py
@@ -0,0 +1,130 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+from dataclasses import dataclass
+from sbom.spdx import SpdxIdGenerator
+from sbom.spdx.core import Element, NamespaceMap, Relationship, SpdxDocument
+from sbom.spdx.simplelicensing import LicenseExpression
+from sbom.spdx.software import File, Sbom
+from sbom.spdx_graph.kernel_file import KernelFile
+from sbom.spdx_graph.shared_spdx_elements import SharedSpdxElements
+from sbom.spdx_graph.spdx_graph_model import SpdxGraph, SpdxIdGeneratorCollection
+
+
+@dataclass
+class SpdxSourceGraph(SpdxGraph):
+ """SPDX graph representing source files"""
+
+ @classmethod
+ def create(
+ cls,
+ source_files: list[KernelFile],
+ external_files: list[KernelFile],
+ shared_elements: SharedSpdxElements,
+ spdx_id_generators: SpdxIdGeneratorCollection,
+ ) -> "SpdxSourceGraph":
+ """
+ Args:
+ source_files: List of files within the kernel source tree.
+ external_files: Files outside both source and object trees.
+ shared_elements: Shared SPDX elements used across multiple documents.
+ spdx_id_generators: Collection of SPDX ID generators.
+
+ Returns:
+ SpdxSourceGraph: The SPDX source graph.
+ """
+ # SpdxDocument
+ source_spdx_document = SpdxDocument(
+ spdxId=spdx_id_generators.source.generate(),
+ profileConformance=["core", "software", "simpleLicensing"],
+ namespaceMap=[
+ NamespaceMap(prefix=generator.prefix, namespace=generator.namespace)
+ for generator in [spdx_id_generators.source, spdx_id_generators.base]
+ if generator.prefix is not None
+ ],
+ )
+
+ # Sbom
+ source_sbom = Sbom(
+ spdxId=spdx_id_generators.source.generate(),
+ software_sbomType=["source"],
+ )
+
+ # Src Tree Elements
+ src_tree_element = File(
+ spdxId=spdx_id_generators.source.generate(),
+ name="$(src_tree)",
+ software_fileKind="directory",
+ )
+ src_tree_contains_relationship = Relationship(
+ spdxId=spdx_id_generators.source.generate(),
+ relationshipType="contains",
+ from_=src_tree_element,
+ to=[],
+ )
+
+ # Source file elements
+ source_file_elements: list[Element] = [file.spdx_file_element for file in source_files]
+ external_file_elements: list[Element] = [file.spdx_file_element for file in external_files]
+
+ # Source file license elements
+ source_file_license_identifiers, source_file_license_relationships = source_file_license_elements(
+ source_files, spdx_id_generators.source
+ )
+
+ # Update relationships
+ source_spdx_document.rootElement = [source_sbom]
+ source_sbom.rootElement = [src_tree_element]
+ source_sbom.element = [
+ src_tree_element,
+ src_tree_contains_relationship,
+ *source_file_elements,
+ *external_file_elements,
+ *source_file_license_identifiers,
+ *source_file_license_relationships,
+ ]
+ src_tree_contains_relationship.to = source_file_elements
+
+ source_graph = SpdxSourceGraph(
+ source_spdx_document,
+ shared_elements.agent,
+ shared_elements.creation_info,
+ source_sbom,
+ )
+ return source_graph
+
+
+def source_file_license_elements(
+ source_files: list[KernelFile], spdx_id_generator: SpdxIdGenerator
+) -> tuple[list[LicenseExpression], list[Relationship]]:
+ """
+ Creates SPDX license expressions and links them to the given source files
+ via hasDeclaredLicense relationships.
+
+ Args:
+ source_files: List of files within the kernel source tree.
+ spdx_id_generator: Generator for unique SPDX IDs.
+
+ Returns:
+ Tuple of (license expressions, hasDeclaredLicense relationships).
+ """
+ license_expressions: dict[str, LicenseExpression] = {}
+ for file in source_files:
+ if file.license_identifier is None or file.license_identifier in license_expressions:
+ continue
+ license_expressions[file.license_identifier] = LicenseExpression(
+ spdxId=spdx_id_generator.generate(),
+ simplelicensing_licenseExpression=file.license_identifier,
+ )
+
+ source_file_license_relationships = [
+ Relationship(
+ spdxId=spdx_id_generator.generate(),
+ relationshipType="hasDeclaredLicense",
+ from_=file.spdx_file_element,
+ to=[license_expressions[file.license_identifier]],
+ )
+ for file in source_files
+ if file.license_identifier is not None
+ ]
+ return ([*license_expressions.values()], source_file_license_relationships)
diff --git a/scripts/sbom/tests/__init__.py b/scripts/sbom/tests/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
--- /dev/null
+++ b/scripts/sbom/tests/__init__.py
diff --git a/scripts/sbom/tests/cmd_graph/__init__.py b/scripts/sbom/tests/cmd_graph/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
--- /dev/null
+++ b/scripts/sbom/tests/cmd_graph/__init__.py
diff --git a/scripts/sbom/tests/cmd_graph/test_savedcmd_parser.py b/scripts/sbom/tests/cmd_graph/test_savedcmd_parser.py
new file mode 100644
index 0000000000000..a061a748e1bf9
--- /dev/null
+++ b/scripts/sbom/tests/cmd_graph/test_savedcmd_parser.py
@@ -0,0 +1,443 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+import os
+import unittest
+from unittest.mock import patch
+
+from sbom.cmd_graph.savedcmd_parser import parse_inputs_from_commands
+from sbom.cmd_graph.savedcmd_parser.command_parser_registry import CommandParserRegistry
+import sbom.sbom_logging as sbom_logging
+
+
+class TestSavedCmdParser(unittest.TestCase):
+ def _assert_parsing(self, cmd: str, expected: str, registry: CommandParserRegistry | None = None) -> None:
+ sbom_logging.init()
+ parsed = parse_inputs_from_commands(cmd, fail_on_unknown_build_command=False, registry=registry)
+ target = [] if expected == "" else expected.split(" ")
+ self.assertEqual(parsed, target)
+ errors = sbom_logging._error_logger._message_counts # type: ignore
+ self.assertEqual(errors, {})
+
+ # Compound command tests
+ def test_dd_cat(self):
+ cmd = "(dd if=arch/x86/boot/setup.bin bs=4k conv=sync status=none; cat arch/x86/boot/vmlinux.bin) >arch/x86/boot/bzImage"
+ expected = "arch/x86/boot/setup.bin arch/x86/boot/vmlinux.bin"
+ self._assert_parsing(cmd, expected)
+
+ def test_manual_file_creation(self):
+ cmd = """{ symbase=__dtbo_overlay_bad_unresolved; echo '$(pound)include <asm-generic/vmlinux.lds.h>'; echo '.section .rodata,"a"'; echo '.balign STRUCT_ALIGNMENT'; echo ".global $${symbase}_begin"; echo "$${symbase}_begin:"; echo '.incbin "drivers/of/unittest-data/overlay_bad_unresolved.dtbo" '; echo ".global $${symbase}_end"; echo "$${symbase}_end:"; echo '.balign STRUCT_ALIGNMENT'; } > drivers/of/unittest-data/overlay_bad_unresolved.dtbo.S"""
+ expected = ""
+ self._assert_parsing(cmd, expected)
+
+ def test_cat_xz_wrap(self):
+ cmd = "{ cat arch/x86/boot/compressed/vmlinux.bin | sh ../scripts/xz_wrap.sh; printf \\130\\064\\024\\000; } > arch/x86/boot/compressed/vmlinux.bin.xz"
+ expected = "arch/x86/boot/compressed/vmlinux.bin"
+ self._assert_parsing(cmd, expected)
+
+ def test_printf_sed(self):
+ cmd = r"""{ printf 'static char tomoyo_builtin_profile[] __initdata =\n'; sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/\t"\1\\n"/' -- /dev/null; printf '\t"";\n'; printf 'static char tomoyo_builtin_exception_policy[] __initdata =\n'; sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/\t"\1\\n"/' -- ../security/tomoyo/policy/exception_policy.conf.default; printf '\t"";\n'; printf 'static char tomoyo_builtin_domain_policy[] __initdata =\n'; sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/\t"\1\\n"/' -- /dev/null; printf '\t"";\n'; printf 'static char tomoyo_builtin_manager[] __initdata =\n'; sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/\t"\1\\n"/' -- /dev/null; printf '\t"";\n'; printf 'static char tomoyo_builtin_stat[] __initdata =\n'; sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/\t"\1\\n"/' -- /dev/null; printf '\t"";\n'; } > security/tomoyo/builtin-policy.h"""
+ expected = "../security/tomoyo/policy/exception_policy.conf.default"
+ self._assert_parsing(cmd, expected)
+
+ def test_bin2c_echo(self):
+ cmd = """(echo "static char tomoyo_builtin_profile[] __initdata ="; ./scripts/bin2c </dev/null; echo ";"; echo "static char tomoyo_builtin_exception_policy[] __initdata ="; ./scripts/bin2c <../security/tomoyo/policy/exception_policy.conf.default; echo ";"; echo "static char tomoyo_builtin_domain_policy[] __initdata ="; ./scripts/bin2c </dev/null; echo ";"; echo "static char tomoyo_builtin_manager[] __initdata ="; ./scripts/bin2c </dev/null; echo ";"; echo "static char tomoyo_builtin_stat[] __initdata ="; ./scripts/bin2c </dev/null; echo ";") >security/tomoyo/builtin-policy.h"""
+ expected = "../security/tomoyo/policy/exception_policy.conf.default"
+ self._assert_parsing(cmd, expected)
+
+ def test_cat_colon(self):
+ cmd = "{ cat init/modules.order; cat usr/modules.order; cat arch/x86/modules.order; cat arch/x86/boot/startup/modules.order; cat kernel/modules.order; cat certs/modules.order; cat mm/modules.order; cat fs/modules.order; cat ipc/modules.order; cat security/modules.order; cat crypto/modules.order; cat block/modules.order; cat io_uring/modules.order; cat lib/modules.order; cat arch/x86/lib/modules.order; cat drivers/modules.order; cat sound/modules.order; cat samples/modules.order; cat net/modules.order; cat virt/modules.order; cat arch/x86/pci/modules.order; cat arch/x86/power/modules.order; cat arch/x86/video/modules.order; :; } > modules.order"
+ expected = "init/modules.order usr/modules.order arch/x86/modules.order arch/x86/boot/startup/modules.order kernel/modules.order certs/modules.order mm/modules.order fs/modules.order ipc/modules.order security/modules.order crypto/modules.order block/modules.order io_uring/modules.order lib/modules.order arch/x86/lib/modules.order drivers/modules.order sound/modules.order samples/modules.order net/modules.order virt/modules.order arch/x86/pci/modules.order arch/x86/power/modules.order arch/x86/video/modules.order"
+ self._assert_parsing(cmd, expected)
+
+ def test_cat_zstd(self):
+ cmd = "{ cat arch/x86/boot/compressed/vmlinux.bin arch/x86/boot/compressed/vmlinux.relocs | zstd -22 --ultra; printf \\340\\362\\066\\003; } > arch/x86/boot/compressed/vmlinux.bin.zst"
+ expected = "arch/x86/boot/compressed/vmlinux.bin arch/x86/boot/compressed/vmlinux.relocs"
+ self._assert_parsing(cmd, expected)
+
+ # cat command tests
+ def test_cat_redirect(self):
+ cmd = "cat ../fs/unicode/utf8data.c_shipped > fs/unicode/utf8data.c"
+ expected = "../fs/unicode/utf8data.c_shipped"
+ self._assert_parsing(cmd, expected)
+
+ def test_cat_piped(self):
+ cmd = "cat arch/x86/boot/compressed/vmlinux.bin arch/x86/boot/compressed/vmlinux.relocs | gzip -n -f -9 > arch/x86/boot/compressed/vmlinux.bin.gz"
+ expected = "arch/x86/boot/compressed/vmlinux.bin arch/x86/boot/compressed/vmlinux.relocs"
+ self._assert_parsing(cmd, expected)
+
+ # sed command tests
+ def test_sed(self):
+ cmd = "sed -n 's/.*define *BLIST_\\([A-Z0-9_]*\\) *.*/BLIST_FLAG_NAME(\\1),/p' ../include/scsi/scsi_devinfo.h > drivers/scsi/scsi_devinfo_tbl.c"
+ expected = "../include/scsi/scsi_devinfo.h"
+ self._assert_parsing(cmd, expected)
+
+ # awk command tests
+ def test_awk(self):
+ cmd = "awk -f ../arch/arm64/tools/gen-cpucaps.awk ../arch/arm64/tools/cpucaps > arch/arm64/include/generated/asm/cpucap-defs.h"
+ expected = "../arch/arm64/tools/cpucaps"
+ self._assert_parsing(cmd, expected)
+
+ def test_awk_with_input_redirection(self):
+ cmd = "awk -v N=1 -f ../lib/raid6/unroll.awk < ../lib/raid6/int.uc > lib/raid6/int1.c"
+ expected = "../lib/raid6/int.uc"
+ self._assert_parsing(cmd, expected)
+
+ # openssl command tests
+ def test_openssl(self):
+ cmd = "openssl req -new -nodes -utf8 -sha256 -days 36500 -batch -x509 -config certs/x509.genkey -outform PEM -out certs/signing_key.pem -keyout certs/signing_key.pem 2>&1"
+ expected = ""
+ self._assert_parsing(cmd, expected)
+
+ # gcc/clang command tests
+ def test_gcc(self):
+ cmd = (
+ "gcc -Wp,-MMD,arch/x86/pci/.i386.o.d -nostdinc -I../arch/x86/include -I./arch/x86/include/generated -I../include -I./include -I../arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I../include/uapi -I./include/generated/uapi -include ../include/linux/compiler-version.h -include ../include/linux/kconfig.h -include ../include/linux/compiler_types.h -D__KERNEL__ -fmacro-prefix-map=../= -Werror -std=gnu11 -fshort-wchar -funsigned-char -fno-common -fno-PIE -fno-strict-aliasing -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -fcf-protection=branch -fno-jump-tables -m64 -falign-jumps=1 -falign-loops=1 -mno-80387 -mno-fp-ret-in-387 -mpreferred-stack-boundary=3 -mskip-rax-setup -march=x86-64 -mtune=generic -mno-red-zone -mcmodel=kernel -mstack-protector-guard-reg=gs -mstack-protector-guard-symbol=__ref_stack_chk_guard -Wno-sign-compare -fno-asynchronous-unwind-tables -mindirect-branch=thunk-extern -mindirect-branch-register -mindirect-branch-cs-prefix -mfunction-return=thunk-extern -fno-jump-tables -fpatchable-function-entry=16,16 -fno-delete-null-pointer-checks -O2 -fno-allow-store-data-races -fstack-protector-strong -fomit-frame-pointer -fno-stack-clash-protection -falign-functions=16 -fno-strict-overflow -fno-stack-check -fconserve-stack -fno-builtin-wcslen -Wall -Wextra -Wundef -Werror=implicit-function-declaration -Werror=implicit-int -Werror=return-type -Werror=strict-prototypes -Wno-format-security -Wno-trigraphs -Wno-frame-address -Wno-address-of-packed-member -Wmissing-declarations -Wmissing-prototypes -Wframe-larger-than=2048 -Wno-main -Wvla-larger-than=1 -Wno-pointer-sign -Wcast-function-type -Wno-array-bounds -Wno-stringop-overflow -Wno-alloc-size-larger-than -Wimplicit-fallthrough=5 -Werror=date-time -Werror=incompatible-pointer-types -Werror=designated-init -Wenum-conversion -Wunused -Wno-unused-but-set-variable -Wno-unused-const-variable -Wno-packed-not-aligned -Wno-format-overflow -Wno-format-truncation -Wno-stringop-truncation -Wno-override-init -Wno-missing-field-initializers -Wno-type-limits -Wno-shift-negative-value -Wno-maybe-uninitialized -Wno-sign-compare -Wno-unused-parameter -I../arch/x86/pci -Iarch/x86/pci -DKBUILD_MODFILE="
+ "arch/x86/pci/i386"
+ " -DKBUILD_BASENAME="
+ "i386"
+ " -DKBUILD_MODNAME="
+ "i386"
+ " -D__KBUILD_MODNAME=kmod_i386 -c -o arch/x86/pci/i386.o ../arch/x86/pci/i386.c "
+ )
+ expected = "../arch/x86/pci/i386.c"
+ self._assert_parsing(cmd, expected)
+
+ def test_gcc_linking(self):
+ cmd = "gcc -o arch/x86/tools/relocs arch/x86/tools/relocs_32.o arch/x86/tools/relocs_64.o arch/x86/tools/relocs_common.o"
+ expected = "arch/x86/tools/relocs_32.o arch/x86/tools/relocs_64.o arch/x86/tools/relocs_common.o"
+ self._assert_parsing(cmd, expected)
+
+ def test_gcc_without_compile_flag(self):
+ cmd = "gcc -Wp,-MMD,arch/x86/boot/compressed/.mkpiggy.d -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 -fomit-frame-pointer -std=gnu11 -I ../scripts/include -I../tools/include -I arch/x86/boot/compressed -o arch/x86/boot/compressed/mkpiggy ../arch/x86/boot/compressed/mkpiggy.c"
+ expected = "../arch/x86/boot/compressed/mkpiggy.c"
+ self._assert_parsing(cmd, expected)
+
+ def test_gcc_with_env_override(self):
+ with patch.dict(os.environ, {"CC": "ccache gcc"}):
+ registry = CommandParserRegistry.create()
+ cmd = "gcc -o arch/x86/tools/relocs arch/x86/tools/relocs_32.o arch/x86/tools/relocs_64.o arch/x86/tools/relocs_common.o"
+ expected = "arch/x86/tools/relocs_32.o arch/x86/tools/relocs_64.o arch/x86/tools/relocs_common.o"
+ self._assert_parsing(cmd, expected, registry)
+ self._assert_parsing(f"ccache {cmd}", expected, registry)
+
+ def test_gcc_dts_preprocessing(self):
+ cmd = "gcc -E -Wp,-MMD,drivers/of/.empty_root.dtb.d.pre.tmp -nostdinc -I ../scripts/dtc/include-prefixes -undef -D__DTS__ -x assembler-with-cpp -o drivers/of/.empty_root.dtb.dts.tmp ../drivers/of/empty_root.dts"
+ expected = "../drivers/of/empty_root.dts"
+ self._assert_parsing(cmd, expected)
+
+ def test_clang(self):
+ cmd = """clang -Wp,-MMD,arch/x86/entry/.entry_64_compat.o.d -nostdinc -I../arch/x86/include -I./arch/x86/include/generated -I../include -I./include -I../arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I../include/uapi -I./include/generated/uapi -include ../include/linux/compiler-version.h -include ../include/linux/kconfig.h -D__KERNEL__ --target=x86_64-linux-gnu -fintegrated-as -Werror=unknown-warning-option -Werror=ignored-optimization-argument -Werror=option-ignored -Werror=unused-command-line-argument -fmacro-prefix-map=../= -Werror -D__ASSEMBLY__ -fno-PIE -m64 -I../arch/x86/entry -Iarch/x86/entry -DKBUILD_MODFILE='"arch/x86/entry/entry_64_compat"' -DKBUILD_MODNAME='"entry_64_compat"' -D__KBUILD_MODNAME=kmod_entry_64_compat -c -o arch/x86/entry/entry_64_compat.o ../arch/x86/entry/entry_64_compat.S"""
+ expected = "../arch/x86/entry/entry_64_compat.S"
+ self._assert_parsing(cmd, expected)
+
+ # ld command tests
+ def test_ld(self):
+ cmd = r'ld -o arch/x86/entry/vdso/vdso64.so.dbg -shared --hash-style=both --build-id=sha1 --no-undefined --eh-frame-hdr -Bsymbolic -z noexecstack -m elf_x86_64 -soname linux-vdso.so.1 -z max-page-size=4096 -T arch/x86/entry/vdso/vdso.lds arch/x86/entry/vdso/vdso-note.o arch/x86/entry/vdso/vclock_gettime.o arch/x86/entry/vdso/vgetcpu.o arch/x86/entry/vdso/vgetrandom.o arch/x86/entry/vdso/vgetrandom-chacha.o; if readelf -rW arch/x86/entry/vdso/vdso64.so.dbg | grep -v _NONE | grep -q " R_\w*_"; then (echo >&2 "arch/x86/entry/vdso/vdso64.so.dbg: dynamic relocations are not supported"; rm -f arch/x86/entry/vdso/vdso64.so.dbg; /bin/false); fi'
+ expected = "arch/x86/entry/vdso/vdso-note.o arch/x86/entry/vdso/vclock_gettime.o arch/x86/entry/vdso/vgetcpu.o arch/x86/entry/vdso/vgetrandom.o arch/x86/entry/vdso/vgetrandom-chacha.o"
+ self._assert_parsing(cmd, expected)
+
+ def test_ld_with_env_override(self):
+ with patch.dict(os.environ, {"LD": "some-tool ld"}):
+ registry = CommandParserRegistry.create()
+ cmd = r'ld -o arch/x86/entry/vdso/vdso64.so.dbg -shared --hash-style=both --build-id=sha1 --no-undefined --eh-frame-hdr -Bsymbolic -z noexecstack -m elf_x86_64 -soname linux-vdso.so.1 -z max-page-size=4096 -T arch/x86/entry/vdso/vdso.lds arch/x86/entry/vdso/vdso-note.o arch/x86/entry/vdso/vclock_gettime.o arch/x86/entry/vdso/vgetcpu.o arch/x86/entry/vdso/vgetrandom.o arch/x86/entry/vdso/vgetrandom-chacha.o; if readelf -rW arch/x86/entry/vdso/vdso64.so.dbg | grep -v _NONE | grep -q " R_\w*_"; then (echo >&2 "arch/x86/entry/vdso/vdso64.so.dbg: dynamic relocations are not supported"; rm -f arch/x86/entry/vdso/vdso64.so.dbg; /bin/false); fi'
+ expected = "arch/x86/entry/vdso/vdso-note.o arch/x86/entry/vdso/vclock_gettime.o arch/x86/entry/vdso/vgetcpu.o arch/x86/entry/vdso/vgetrandom.o arch/x86/entry/vdso/vgetrandom-chacha.o"
+ self._assert_parsing(cmd, expected, registry)
+ self._assert_parsing(f"some-tool {cmd}", expected, registry)
+
+ def test_ld_whole_archive(self):
+ cmd = "ld -m elf_x86_64 -z noexecstack -r -o vmlinux.o --whole-archive vmlinux.a --no-whole-archive --start-group --end-group"
+ expected = "vmlinux.a"
+ self._assert_parsing(cmd, expected)
+
+ def test_ld_with_at_symbol(self):
+ cmd = "ld.lld -m elf_x86_64 -z noexecstack -r -o fs/efivarfs/efivarfs.o @fs/efivarfs/efivarfs.mod ; ./tools/objtool/objtool --hacks=jump_label --hacks=noinstr --hacks=skylake --ibt --orc --retpoline --rethunk --static-call --uaccess --prefix=16 --link --module fs/efivarfs/efivarfs.o"
+ expected = "@fs/efivarfs/efivarfs.mod"
+ self._assert_parsing(cmd, expected)
+
+ def test_ld_if_objdump(self):
+ cmd = """ld -o arch/x86/entry/vdso/vdso64.so.dbg -shared --hash-style=both --build-id=sha1 --eh-frame-hdr -Bsymbolic -z noexecstack -m elf_x86_64 -soname linux-vdso.so.1 --no-undefined -z max-page-size=4096 -T arch/x86/entry/vdso/vdso.lds arch/x86/entry/vdso/vdso-note.o arch/x86/entry/vdso/vclock_gettime.o arch/x86/entry/vdso/vgetcpu.o arch/x86/entry/vdso/vsgx.o && sh ./arch/x86/entry/vdso/checkundef.sh 'nm' 'arch/x86/entry/vdso/vdso64.so.dbg'; if objdump -R arch/x86/entry/vdso/vdso64.so.dbg | grep -E -h "R_X86_64_JUMP_SLOT|R_X86_64_GLOB_DAT|R_X86_64_RELATIVE| R_386_GLOB_DAT|R_386_JMP_SLOT|R_386_RELATIVE"; then (echo >&2 "arch/x86/entry/vdso/vdso64.so.dbg: dynamic relocations are not supported"; rm -f arch/x86/entry/vdso/vdso64.so.dbg; /bin/false); fi"""
+ expected = "arch/x86/entry/vdso/vdso-note.o arch/x86/entry/vdso/vclock_gettime.o arch/x86/entry/vdso/vgetcpu.o arch/x86/entry/vdso/vsgx.o"
+ self._assert_parsing(cmd, expected)
+
+ # printf | xargs ar command tests
+ def test_ar_printf(self):
+ cmd = 'rm -f built-in.a; printf "./%s " init/built-in.a usr/built-in.a arch/x86/built-in.a arch/x86/boot/startup/built-in.a kernel/built-in.a certs/built-in.a mm/built-in.a fs/built-in.a ipc/built-in.a security/built-in.a crypto/built-in.a block/built-in.a io_uring/built-in.a lib/built-in.a arch/x86/lib/built-in.a drivers/built-in.a sound/built-in.a net/built-in.a virt/built-in.a arch/x86/pci/built-in.a arch/x86/power/built-in.a arch/x86/video/built-in.a | xargs ar cDPrST built-in.a'
+ expected = "./init/built-in.a ./usr/built-in.a ./arch/x86/built-in.a ./arch/x86/boot/startup/built-in.a ./kernel/built-in.a ./certs/built-in.a ./mm/built-in.a ./fs/built-in.a ./ipc/built-in.a ./security/built-in.a ./crypto/built-in.a ./block/built-in.a ./io_uring/built-in.a ./lib/built-in.a ./arch/x86/lib/built-in.a ./drivers/built-in.a ./sound/built-in.a ./net/built-in.a ./virt/built-in.a ./arch/x86/pci/built-in.a ./arch/x86/power/built-in.a ./arch/x86/video/built-in.a"
+ self._assert_parsing(cmd, expected)
+
+ def test_ar_printf_nested(self):
+ cmd = 'rm -f arch/x86/pci/built-in.a; printf "arch/x86/pci/%s " i386.o init.o mmconfig_64.o direct.o mmconfig-shared.o fixup.o acpi.o legacy.o irq.o common.o early.o bus_numa.o amd_bus.o | xargs ar cDPrST arch/x86/pci/built-in.a'
+ expected = "arch/x86/pci/i386.o arch/x86/pci/init.o arch/x86/pci/mmconfig_64.o arch/x86/pci/direct.o arch/x86/pci/mmconfig-shared.o arch/x86/pci/fixup.o arch/x86/pci/acpi.o arch/x86/pci/legacy.o arch/x86/pci/irq.o arch/x86/pci/common.o arch/x86/pci/early.o arch/x86/pci/bus_numa.o arch/x86/pci/amd_bus.o"
+ self._assert_parsing(cmd, expected)
+
+ # ar command tests
+ def test_ar_reordering(self):
+ cmd = "rm -f vmlinux.a; ar cDPrST vmlinux.a built-in.a lib/lib.a arch/x86/lib/lib.a; ar mPiT $$(ar t vmlinux.a | sed -n 1p) vmlinux.a $$(ar t vmlinux.a | grep -F -f ../scripts/head-object-list.txt)"
+ expected = "built-in.a lib/lib.a arch/x86/lib/lib.a"
+ self._assert_parsing(cmd, expected)
+
+ def test_ar_default(self):
+ cmd = "rm -f lib/lib.a; ar cDPrsT lib/lib.a lib/argv_split.o lib/bug.o lib/buildid.o lib/clz_tab.o lib/cmdline.o lib/cpumask.o lib/ctype.o lib/dec_and_lock.o lib/decompress.o lib/decompress_bunzip2.o lib/decompress_inflate.o lib/decompress_unlz4.o lib/decompress_unlzma.o lib/decompress_unlzo.o lib/decompress_unxz.o lib/decompress_unzstd.o lib/dump_stack.o lib/earlycpio.o lib/extable.o lib/flex_proportions.o lib/idr.o lib/iomem_copy.o lib/irq_regs.o lib/is_single_threaded.o lib/klist.o lib/kobject.o lib/kobject_uevent.o lib/logic_pio.o lib/maple_tree.o lib/memcat_p.o lib/nmi_backtrace.o lib/objpool.o lib/plist.o lib/radix-tree.o lib/ratelimit.o lib/rbtree.o lib/seq_buf.o lib/siphash.o lib/string.o lib/sys_info.o lib/timerqueue.o lib/union_find.o lib/vsprintf.o lib/win_minmax.o lib/xarray.o"
+ expected = "lib/argv_split.o lib/bug.o lib/buildid.o lib/clz_tab.o lib/cmdline.o lib/cpumask.o lib/ctype.o lib/dec_and_lock.o lib/decompress.o lib/decompress_bunzip2.o lib/decompress_inflate.o lib/decompress_unlz4.o lib/decompress_unlzma.o lib/decompress_unlzo.o lib/decompress_unxz.o lib/decompress_unzstd.o lib/dump_stack.o lib/earlycpio.o lib/extable.o lib/flex_proportions.o lib/idr.o lib/iomem_copy.o lib/irq_regs.o lib/is_single_threaded.o lib/klist.o lib/kobject.o lib/kobject_uevent.o lib/logic_pio.o lib/maple_tree.o lib/memcat_p.o lib/nmi_backtrace.o lib/objpool.o lib/plist.o lib/radix-tree.o lib/ratelimit.o lib/rbtree.o lib/seq_buf.o lib/siphash.o lib/string.o lib/sys_info.o lib/timerqueue.o lib/union_find.o lib/vsprintf.o lib/win_minmax.o lib/xarray.o"
+ self._assert_parsing(cmd, expected)
+
+ def test_ar_llvm(self):
+ cmd = "llvm-ar mPiT $$(llvm-ar t vmlinux.a | sed -n 1p) vmlinux.a $$(llvm-ar t vmlinux.a | grep -F -f ../scripts/head-object-list.txt)"
+ expected = ""
+ self._assert_parsing(cmd, expected)
+
+ # nm command tests
+ def test_nm(self):
+ cmd = """llvm-nm -p --defined-only rust/core.o | awk '$$2~/(T|R|D|B)/ && $$3!~/__(pfx|cfi|odr_asan)/ { printf "EXPORT_SYMBOL_RUST_GPL(%s);\n",$$3 }' > rust/exports_core_generated.h"""
+ expected = "rust/core.o"
+ self._assert_parsing(cmd, expected)
+
+ def test_nm_vmlinux(self):
+ cmd = r"nm vmlinux | sed -n -e 's/^\([0-9a-fA-F]*\) [ABbCDGRSTtVW] \(_text\|__start_rodata\|__bss_start\|_end\)$/#define VO_\2 _AC(0x\1,UL)/p' > arch/x86/boot/voffset.h"
+ expected = "vmlinux"
+ self._assert_parsing(cmd, expected)
+
+ # objcopy command tests
+ def test_objcopy(self):
+ cmd = "objcopy --remove-section='.rel*' --remove-section=!'.rel*.dyn' vmlinux.unstripped vmlinux"
+ expected = "vmlinux.unstripped"
+ self._assert_parsing(cmd, expected)
+
+ def test_objcopy_llvm(self):
+ cmd = "llvm-objcopy --remove-section='.rel*' --remove-section=!'.rel*.dyn' vmlinux.unstripped vmlinux"
+ expected = "vmlinux.unstripped"
+ self._assert_parsing(cmd, expected)
+
+ # strip command tests
+ def test_strip(self):
+ cmd = "strip --strip-debug -o drivers/firmware/efi/libstub/mem.stub.o drivers/firmware/efi/libstub/mem.o"
+ expected = "drivers/firmware/efi/libstub/mem.o"
+ self._assert_parsing(cmd, expected)
+
+ # cp command tests
+ def test_cp_truncate(self):
+ cmd = "cp arch/arm64/boot/Image arch/arm64/boot/vmlinux.bin; truncate -s $$(hexdump -s16 -n4 -e '\"%u\"' arch/arm64/boot/Image) arch/arm64/boot/vmlinux.bin"
+ expected = "arch/arm64/boot/Image"
+ self._assert_parsing(cmd, expected)
+
+ # rustc command tests
+ def test_rustc(self):
+ cmd = """OBJTREE=/workspace/linux/kernel_build rustc -Zbinary_dep_depinfo=y -Astable_features -Dnon_ascii_idents -Dunsafe_op_in_unsafe_fn -Wmissing_docs -Wrust_2018_idioms -Wclippy::all -Wclippy::as_ptr_cast_mut -Wclippy::as_underscore -Wclippy::cast_lossless -Wclippy::ignored_unit_patterns -Wclippy::mut_mut -Wclippy::needless_bitwise_bool -Aclippy::needless_lifetimes -Wclippy::no_mangle_with_rust_abi -Wclippy::ptr_as_ptr -Wclippy::ptr_cast_constness -Wclippy::ref_as_ptr -Wclippy::undocumented_unsafe_blocks -Wclippy::unnecessary_safety_comment -Wclippy::unnecessary_safety_doc -Wrustdoc::missing_crate_level_docs -Wrustdoc::unescaped_backticks -Cpanic=abort -Cembed-bitcode=n -Clto=n -Cforce-unwind-tables=n -Ccodegen-units=1 -Csymbol-mangling-version=v0 -Crelocation-model=static -Zfunction-sections=n -Wclippy::float_arithmetic --target=./scripts/target.json -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2 -Zcf-protection=branch -Zno-jump-tables -Ctarget-cpu=x86-64 -Ztune-cpu=generic -Cno-redzone=y -Ccode-model=kernel -Zfunction-return=thunk-extern -Zpatchable-function-entry=16,16 -Copt-level=2 -Cdebug-assertions=n -Coverflow-checks=y -Dwarnings @./include/generated/rustc_cfg --edition=2021 --cfg no_fp_fmt_parse --emit=dep-info=rust/.core.o.d --emit=obj=rust/core.o --emit=metadata=rust/libcore.rmeta --crate-type rlib -L./rust --crate-name core /usr/lib/rust-1.84/lib/rustlib/src/rust/library/core/src/lib.rs --sysroot=/dev/null ;llvm-objcopy --redefine-sym __addsf3=__rust__addsf3 --redefine-sym __eqsf2=__rust__eqsf2 --redefine-sym __extendsfdf2=__rust__extendsfdf2 --redefine-sym __gesf2=__rust__gesf2 --redefine-sym __lesf2=__rust__lesf2 --redefine-sym __ltsf2=__rust__ltsf2 --redefine-sym __mulsf3=__rust__mulsf3 --redefine-sym __nesf2=__rust__nesf2 --redefine-sym __truncdfsf2=__rust__truncdfsf2 --redefine-sym __unordsf2=__rust__unordsf2 --redefine-sym __adddf3=__rust__adddf3 --redefine-sym __eqdf2=__rust__eqdf2 --redefine-sym __ledf2=__rust__ledf2 --redefine-sym __ltdf2=__rust__ltdf2 --redefine-sym __muldf3=__rust__muldf3 --redefine-sym __unorddf2=__rust__unorddf2 --redefine-sym __muloti4=__rust__muloti4 --redefine-sym __multi3=__rust__multi3 --redefine-sym __udivmodti4=__rust__udivmodti4 --redefine-sym __udivti3=__rust__udivti3 --redefine-sym __umodti3=__rust__umodti3 rust/core.o"""
+ expected = "/usr/lib/rust-1.84/lib/rustlib/src/rust/library/core/src/lib.rs rust/core.o"
+ self._assert_parsing(cmd, expected)
+
+ # rustdoc command tests
+ def test_rustdoc(self):
+ cmd = """OBJTREE=/workspace/linux/kernel_build rustdoc --test --edition=2021 -Zbinary_dep_depinfo=y -Astable_features -Dnon_ascii_idents -Dunsafe_op_in_unsafe_fn -Wmissing_docs -Wrust_2018_idioms -Wunreachable_pub -Wclippy::all -Wclippy::as_ptr_cast_mut -Wclippy::as_underscore -Wclippy::cast_lossless -Wclippy::ignored_unit_patterns -Wclippy::mut_mut -Wclippy::needless_bitwise_bool -Aclippy::needless_lifetimes -Wclippy::no_mangle_with_rust_abi -Wclippy::ptr_as_ptr -Wclippy::ptr_cast_constness -Wclippy::ref_as_ptr -Wclippy::undocumented_unsafe_blocks -Wclippy::unnecessary_safety_comment -Wclippy::unnecessary_safety_doc -Wrustdoc::missing_crate_level_docs -Wrustdoc::unescaped_backticks -Cpanic=abort -Cembed-bitcode=n -Clto=n -Cforce-unwind-tables=n -Ccodegen-units=1 -Csymbol-mangling-version=v0 -Crelocation-model=static -Zfunction-sections=n -Wclippy::float_arithmetic --target=aarch64-unknown-none -Ctarget-feature="-neon" -Cforce-unwind-tables=n -Zbranch-protection=pac-ret -Copt-level=2 -Cdebug-assertions=y -Coverflow-checks=y -Dwarnings -Cforce-frame-pointers=y -Zsanitizer=kernel-address -Zsanitizer-recover=kernel-address -Cllvm-args=-asan-mapping-offset=0xdfff800000000000 -Cpasses=sancov-module -Cllvm-args=-sanitizer-coverage-level=3 -Cllvm-args=-sanitizer-coverage-trace-pc -Cllvm-args=-sanitizer-coverage-trace-compares @./include/generated/rustc_cfg -L./rust --extern ffi --extern pin_init --extern kernel --extern build_error --extern macros --extern bindings --extern uapi --no-run --crate-name kernel -Zunstable-options --sysroot=/dev/null --test-builder ./scripts/rustdoc_test_builder ../rust/kernel/lib.rs >/dev/null"""
+ expected = "../rust/kernel/lib.rs"
+ self._assert_parsing(cmd, expected)
+
+ def test_rustdoc_test_gen(self):
+ cmd = "./scripts/rustdoc_test_gen"
+ expected = ""
+ self._assert_parsing(cmd, expected)
+
+ # flex command tests
+ def test_flex(self):
+ cmd = "flex -oscripts/kconfig/lexer.lex.c -L ../scripts/kconfig/lexer.l"
+ expected = "../scripts/kconfig/lexer.l"
+ self._assert_parsing(cmd, expected)
+
+ # bison command tests
+ def test_bison(self):
+ cmd = "bison -o scripts/kconfig/parser.tab.c --defines=scripts/kconfig/parser.tab.h -t -l ../scripts/kconfig/parser.y"
+ expected = "../scripts/kconfig/parser.y"
+ self._assert_parsing(cmd, expected)
+
+ # bindgen command tests
+ def test_bindgen(self):
+ cmd = (
+ "bindgen ../rust/bindings/bindings_helper.h "
+ "--blocklist-type __kernel_s?size_t --blocklist-type __kernel_ptrdiff_t "
+ "--opaque-type xregs_state --opaque-type desc_struct --no-doc-comments "
+ "--rust-target 1.68 --use-core --with-derive-default -o rust/bindings/bindings_generated.rs "
+ "-- -Wp,-MMD,rust/bindings/.bindings_generated.rs.d -nostdinc -I../arch/x86/include "
+ "-include ../include/linux/compiler-version.h -D__KERNEL__ -fintegrated-as -fno-builtin -DMODULE; "
+ "sed -Ei 's/pub const RUST_CONST_HELPER_([a-zA-Z0-9_]*)/pub const \\1/g' rust/bindings/bindings_generated.rs"
+ )
+ expected = "../rust/bindings/bindings_helper.h ../include/linux/compiler-version.h"
+ self._assert_parsing(cmd, expected)
+
+ # perl command tests
+ def test_perl(self):
+ cmd = "perl ../lib/crypto/x86/poly1305-x86_64-cryptogams.pl > lib/crypto/x86/poly1305-x86_64-cryptogams.S"
+ expected = "../lib/crypto/x86/poly1305-x86_64-cryptogams.pl"
+ self._assert_parsing(cmd, expected)
+
+ # link-vmlinux.sh command tests
+ def test_link_vmlinux(self):
+ cmd = '../scripts/link-vmlinux.sh "ld" "-m elf_x86_64 -z noexecstack" "-z max-page-size=0x200000 --build-id=sha1 --orphan-handling=error --emit-relocs --discard-none" "vmlinux.unstripped"; true'
+ expected = "vmlinux.a"
+ self._assert_parsing(cmd, expected)
+
+ def test_link_vmlinux_postlink(self):
+ cmd = '../scripts/link-vmlinux.sh "ld" "-m elf_x86_64 -z noexecstack --no-warn-rwx-segments" "--emit-relocs --discard-none -z max-page-size=0x200000 --build-id=sha1 -X --orphan-handling=error"; make -f ../arch/x86/Makefile.postlink vmlinux'
+ expected = "vmlinux.a"
+ self._assert_parsing(cmd, expected)
+
+ # syscallhdr.sh command tests
+ def test_syscallhdr(self):
+ cmd = "sh ../scripts/syscallhdr.sh --abis common,64 --emit-nr ../arch/x86/entry/syscalls/syscall_64.tbl arch/x86/include/generated/uapi/asm/unistd_64.h"
+ expected = "../arch/x86/entry/syscalls/syscall_64.tbl"
+ self._assert_parsing(cmd, expected)
+
+ # syscalltbl.sh command tests
+ def test_syscalltbl(self):
+ cmd = "sh ../scripts/syscalltbl.sh --abis common,64 ../arch/x86/entry/syscalls/syscall_64.tbl arch/x86/include/generated/asm/syscalls_64.h"
+ expected = "../arch/x86/entry/syscalls/syscall_64.tbl"
+ self._assert_parsing(cmd, expected)
+
+ # mkcapflags.sh command tests
+ def test_mkcapflags(self):
+ cmd = "sh ../arch/x86/kernel/cpu/mkcapflags.sh arch/x86/kernel/cpu/capflags.c ../arch/x86/kernel/cpu/../../include/asm/cpufeatures.h ../arch/x86/kernel/cpu/../../include/asm/vmxfeatures.h ../arch/x86/kernel/cpu/mkcapflags.sh FORCE"
+ expected = "../arch/x86/kernel/cpu/../../include/asm/cpufeatures.h ../arch/x86/kernel/cpu/../../include/asm/vmxfeatures.h"
+ self._assert_parsing(cmd, expected)
+
+ # orc_hash.sh command tests
+ def test_orc_hash(self):
+ cmd = "mkdir -p arch/x86/include/generated/asm/; sh ../scripts/orc_hash.sh < ../arch/x86/include/asm/orc_types.h > arch/x86/include/generated/asm/orc_hash.h"
+ expected = "../arch/x86/include/asm/orc_types.h"
+ self._assert_parsing(cmd, expected)
+
+ # xen-hypercalls.sh command tests
+ def test_xen_hypercalls(self):
+ cmd = "sh '../scripts/xen-hypercalls.sh' arch/x86/include/generated/asm/xen-hypercalls.h ../include/xen/interface/xen-mca.h ../include/xen/interface/xen.h ../include/xen/interface/xenpmu.h"
+ expected = "../include/xen/interface/xen-mca.h ../include/xen/interface/xen.h ../include/xen/interface/xenpmu.h"
+ self._assert_parsing(cmd, expected)
+
+ # gen_initramfs.sh command tests
+ def test_gen_initramfs(self):
+ cmd = "sh ../usr/gen_initramfs.sh -o usr/initramfs_data.cpio -l usr/.initramfs_data.cpio.d ../usr/default_cpio_list"
+ expected = "../usr/default_cpio_list"
+ self._assert_parsing(cmd, expected)
+
+ # mkuboot.sh command tests
+ def test_mkuboot(self):
+ cmd = "bash ../scripts/mkuboot.sh -A arm -O linux -C none -T kernel -a 0x8000 -e 0x8000 -n 'Linux-6.15.0' -d arch/arm/boot/zImage arch/arm/boot/uImage"
+ expected = "arch/arm/boot/zImage"
+ self._assert_parsing(cmd, expected)
+
+ # syscallnr.sh command tests
+ def test_syscallnr(self):
+ cmd = "sh ../arch/arm/tools/syscallnr.sh ../arch/arm/tools/syscall.tbl arch/arm/include/generated/asm/unistd-nr.h"
+ expected = "../arch/arm/tools/syscall.tbl"
+ self._assert_parsing(cmd, expected)
+
+ # gen-kernel-hwcaps.sh command tests
+ def test_gen_kernel_hwcaps(self):
+ cmd = "/bin/sh -e ../arch/arm64/tools/gen-kernel-hwcaps.sh ../arch/arm64/include/uapi/asm/hwcap.h > arch/arm64/include/generated/asm/kernel-hwcap.h"
+ expected = "../arch/arm64/include/uapi/asm/hwcap.h"
+ self._assert_parsing(cmd, expected)
+
+ # vdso2c command tests
+ def test_vdso2c(self):
+ cmd = "arch/x86/entry/vdso/vdso2c arch/x86/entry/vdso/vdso64.so.dbg arch/x86/entry/vdso/vdso64.so arch/x86/entry/vdso/vdso-image-64.c"
+ expected = "arch/x86/entry/vdso/vdso64.so.dbg arch/x86/entry/vdso/vdso64.so"
+ self._assert_parsing(cmd, expected)
+
+ # vdsomunge command tests
+ def test_vdsomunge(self):
+ cmd = "arch/arm64/kernel/vdso32/../../../arm/vdso/vdsomunge arch/arm64/kernel/vdso32/vdso.so.raw arch/arm64/kernel/vdso32/vdso32.so.dbg"
+ expected = "arch/arm64/kernel/vdso32/vdso.so.raw"
+ self._assert_parsing(cmd, expected)
+
+ # mkpiggy command tests
+ def test_mkpiggy(self):
+ cmd = "arch/x86/boot/compressed/mkpiggy arch/x86/boot/compressed/vmlinux.bin.gz > arch/x86/boot/compressed/piggy.S"
+ expected = "arch/x86/boot/compressed/vmlinux.bin.gz"
+ self._assert_parsing(cmd, expected)
+
+ # relocs command tests
+ def test_relocs(self):
+ cmd = "arch/x86/tools/relocs vmlinux.unstripped > arch/x86/boot/compressed/vmlinux.relocs;arch/x86/tools/relocs --abs-relocs vmlinux.unstripped"
+ expected = "vmlinux.unstripped"
+ self._assert_parsing(cmd, expected)
+
+ def test_relocs_with_realmode(self):
+ cmd = (
+ "arch/x86/tools/relocs --realmode arch/x86/realmode/rm/realmode.elf > arch/x86/realmode/rm/realmode.relocs"
+ )
+ expected = "arch/x86/realmode/rm/realmode.elf"
+ self._assert_parsing(cmd, expected)
+
+ # mk_elfconfig command tests
+ def test_mk_elfconfig(self):
+ cmd = "scripts/mod/mk_elfconfig < scripts/mod/empty.o > scripts/mod/elfconfig.h"
+ expected = "scripts/mod/empty.o"
+ self._assert_parsing(cmd, expected)
+
+ # tools/build command tests
+ def test_build(self):
+ cmd = "arch/x86/boot/tools/build arch/x86/boot/setup.bin arch/x86/boot/vmlinux.bin arch/x86/boot/zoffset.h arch/x86/boot/bzImage"
+ expected = "arch/x86/boot/setup.bin arch/x86/boot/vmlinux.bin arch/x86/boot/zoffset.h"
+ self._assert_parsing(cmd, expected)
+
+ # extract-cert command tests
+ def test_extract_cert(self):
+ cmd = 'certs/extract-cert "" certs/signing_key.x509'
+ expected = ""
+ self._assert_parsing(cmd, expected)
+
+ # dtc command tests
+ def test_dtc_cat(self):
+ cmd = "./scripts/dtc/dtc -o drivers/of/empty_root.dtb -b 0 -i../drivers/of/ -i../scripts/dtc/include-prefixes -Wno-unique_unit_address -Wno-unit_address_vs_reg -Wno-avoid_unnecessary_addr_size -Wno-alias_paths -Wno-graph_child_address -Wno-simple_bus_reg -d drivers/of/.empty_root.dtb.d.dtc.tmp drivers/of/.empty_root.dtb.dts.tmp ; cat drivers/of/.empty_root.dtb.d.pre.tmp drivers/of/.empty_root.dtb.d.dtc.tmp > drivers/of/.empty_root.dtb.d"
+ expected = "drivers/of/.empty_root.dtb.dts.tmp drivers/of/.empty_root.dtb.d.pre.tmp drivers/of/.empty_root.dtb.d.dtc.tmp"
+ self._assert_parsing(cmd, expected)
+
+ # pnmtologo command tests
+ def test_pnmtologo(self):
+ cmd = "drivers/video/logo/pnmtologo -t clut224 -n logo_linux_clut224 -o drivers/video/logo/logo_linux_clut224.c ../drivers/video/logo/logo_linux_clut224.ppm"
+ expected = "../drivers/video/logo/logo_linux_clut224.ppm"
+ self._assert_parsing(cmd, expected)
+
+ # relacheck command tests
+ def test_relacheck(self):
+ cmd = "arch/arm64/kernel/pi/relacheck arch/arm64/kernel/pi/idreg-override.pi.o arch/arm64/kernel/pi/idreg-override.o"
+ expected = "arch/arm64/kernel/pi/idreg-override.pi.o"
+ self._assert_parsing(cmd, expected)
+
+ # gen-hyprel command tests
+ def test_gen_hyprel(self):
+ cmd = "arch/arm64/kvm/hyp/nvhe/gen-hyprel arch/arm64/kvm/hyp/nvhe/kvm_nvhe.tmp.o > arch/arm64/kvm/hyp/nvhe/hyp-reloc.S"
+ expected = "arch/arm64/kvm/hyp/nvhe/kvm_nvhe.tmp.o"
+ self._assert_parsing(cmd, expected)
+
+ # mkregtable command tests
+ def test_mkregtable(self):
+ cmd = "drivers/gpu/drm/radeon/mkregtable ../drivers/gpu/drm/radeon/reg_srcs/r100 > drivers/gpu/drm/radeon/r100_reg_safe.h"
+ expected = "../drivers/gpu/drm/radeon/reg_srcs/r100"
+ self._assert_parsing(cmd, expected)
+
+ # genheaders command tests
+ def test_genheaders(self):
+ cmd = "security/selinux/genheaders security/selinux/flask.h security/selinux/av_permissions.h"
+ expected = ""
+ self._assert_parsing(cmd, expected)
+
+ # mkcpustr command tests
+ def test_mkcpustr(self):
+ cmd = "arch/x86/boot/mkcpustr > arch/x86/boot/cpustr.h"
+ expected = ""
+ self._assert_parsing(cmd, expected)
+
+ # polgen command tests
+ def test_polgen(self):
+ cmd = "scripts/ipe/polgen/polgen security/ipe/boot_policy.c"
+ expected = ""
+ self._assert_parsing(cmd, expected)
+
+ # gen_header.py command tests
+ def test_gen_header(self):
+ cmd = "mkdir -p drivers/gpu/drm/msm/generated && python3 ../drivers/gpu/drm/msm/registers/gen_header.py --no-validate --rnn ../drivers/gpu/drm/msm/registers --xml ../drivers/gpu/drm/msm/registers/adreno/a2xx.xml c-defines > drivers/gpu/drm/msm/generated/a2xx.xml.h"
+ expected = "../drivers/gpu/drm/msm/registers/adreno/a2xx.xml"
+ self._assert_parsing(cmd, expected)
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/scripts/sbom/tests/spdx_graph/__init__.py b/scripts/sbom/tests/spdx_graph/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
--- /dev/null
+++ b/scripts/sbom/tests/spdx_graph/__init__.py
diff --git a/scripts/sbom/tests/spdx_graph/test_kernel_file.py b/scripts/sbom/tests/spdx_graph/test_kernel_file.py
new file mode 100644
index 0000000000000..35a63a768ba2a
--- /dev/null
+++ b/scripts/sbom/tests/spdx_graph/test_kernel_file.py
@@ -0,0 +1,35 @@
+# SPDX-License-Identifier: GPL-2.0-only OR MIT
+# Copyright (C) 2025 TNG Technology Consulting GmbH
+
+import unittest
+from pathlib import Path
+import tempfile
+from sbom.spdx_graph.kernel_file import _parse_spdx_license_identifier # type: ignore
+
+
+class TestKernelFile(unittest.TestCase):
+ def setUp(self):
+ self.tmpdir = tempfile.TemporaryDirectory()
+ self.src_tree = Path(self.tmpdir.name)
+
+ def tearDown(self):
+ self.tmpdir.cleanup()
+
+ def test_parse_spdx_license_identifier(self):
+ # REUSE-IgnoreStart
+ test_cases: list[tuple[str, str | None]] = [
+ ("/* SPDX-License-Identifier: MIT*/", "MIT"),
+ ("// SPDX-License-Identifier: GPL-2.0-only", "GPL-2.0-only"),
+ ("# SPDX-License-Identifier: GPL-2.0-only", "GPL-2.0-only"),
+ ("#!/bin/bash\n# SPDX-License-Identifier: GPL-2.0-only", "GPL-2.0-only"),
+ ("/* SPDX-License-Identifier: GPL-2.0-or-later OR MIT */", "GPL-2.0-or-later OR MIT"),
+ ("/* SPDX-License-Identifier: Apache-2.0 */\n extra text", "Apache-2.0"),
+ ("<!-- SPDX-License-Identifier: GPL-2.0 -->", "GPL-2.0"),
+ ("int main() { return 0; }", None),
+ ]
+ # REUSE-IgnoreEnd
+
+ for i, (file_content, expected_identifier) in enumerate(test_cases):
+ file_path = self.src_tree / f"file_{i}.c"
+ file_path.write_text(file_content)
+ self.assertEqual(_parse_spdx_license_identifier(str(file_path)), expected_identifier)