diff options
| author | Mark Brown <broonie@kernel.org> | 2026-05-29 17:41:20 +0100 |
|---|---|---|
| committer | Mark Brown <broonie@kernel.org> | 2026-05-29 17:41:20 +0100 |
| commit | c8741d22c4136a6b19b2055af33bf83cce617eb6 (patch) | |
| tree | 700a7a0f16a7e21b78e9255610722fedc94876e2 /scripts | |
| parent | b7fcd17dd291cd1917f81718e89d1c0fd97c3baf (diff) | |
| parent | 880bae5f1269b4d81bb2a254963e84377cd37bc1 (diff) | |
| download | linux-next-history-c8741d22c4136a6b19b2055af33bf83cce617eb6.tar.gz | |
Merge branch 'spdx-linus' of https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/spdx.git
Diffstat (limited to 'scripts')
38 files changed, 4077 insertions, 0 deletions
diff --git a/scripts/sbom/sbom.py b/scripts/sbom/sbom.py new file mode 100644 index 0000000000000..764175b9c8933 --- /dev/null +++ b/scripts/sbom/sbom.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +""" +Compute software bill of materials in SPDX format describing a kernel build. +""" + +import json +import logging +import os +import sys +import time +import uuid +import sbom.sbom_logging as sbom_logging +from sbom.config import get_config +from sbom.path_utils import is_relative_to +from sbom.spdx import JsonLdSpdxDocument, SpdxIdGenerator +from sbom.spdx.core import CreationInfo, SpdxDocument +from sbom.spdx_graph import SpdxIdGeneratorCollection, build_spdx_graphs +from sbom.cmd_graph import CmdGraph + + +def _exit_with_summary(write_output_on_error: bool = False) -> None: + warning_summary = sbom_logging.summarize_warnings() + error_summary = sbom_logging.summarize_errors() + if warning_summary: + logging.warning(warning_summary) + if error_summary: + logging.error(error_summary) + if not write_output_on_error: + logging.info( + "Use --write-output-on-error to generate output documents even when errors occur. " + "Note that in this case the generated documents may be incomplete." + ) + sys.exit(1) + + +def main(): + # Read config + config = get_config() + + # Configure logging + logging.basicConfig( + level=logging.DEBUG if config.debug else logging.INFO, + format="[%(levelname)s] %(message)s", + ) + + # Build cmd graph + logging.debug("Start building cmd graph") + start_time = time.time() + cmd_graph = CmdGraph.create(config.root_paths, config) + logging.debug(f"Built cmd graph in {time.time() - start_time} seconds") + + # Save used files document + if config.generate_used_files: + if config.src_tree == config.obj_tree: + logging.info( + f"Extracting all files from the cmd graph to {config.used_files_file_name} " + "instead of only source files because source files cannot be " + "reliably classified when the source and object trees are identical.", + ) + used_files = [os.path.relpath(node.absolute_path, config.src_tree) for node in cmd_graph] + logging.debug(f"Found {len(used_files)} files in cmd graph.") + else: + used_files = [ + os.path.relpath(node.absolute_path, config.src_tree) + for node in cmd_graph + if is_relative_to(node.absolute_path, config.src_tree) + and not is_relative_to(node.absolute_path, config.obj_tree) + ] + logging.debug(f"Found {len(used_files)} source files in cmd graph") + if not sbom_logging.has_errors() or config.write_output_on_error: + used_files_path = os.path.join(config.output_directory, config.used_files_file_name) + with open(used_files_path, "w", encoding="utf-8") as f: + f.write("\n".join(str(file_path) for file_path in used_files)) + logging.debug(f"Successfully saved {used_files_path}") + + if config.generate_spdx is False: + _exit_with_summary(config.write_output_on_error) + return + + # Build SPDX Documents + logging.debug("Start generating SPDX graph based on cmd graph") + start_time = time.time() + + # The real uuid will be generated based on the content of the SPDX graphs + # to ensure that the same SPDX document is always assigned the same uuid. + PLACEHOLDER_UUID = "00000000-0000-0000-0000-000000000000" + spdx_id_base_namespace = f"{config.spdxId_prefix}{PLACEHOLDER_UUID}/" + spdx_id_generators = SpdxIdGeneratorCollection( + base=SpdxIdGenerator(prefix="p", namespace=spdx_id_base_namespace), + source=SpdxIdGenerator(prefix="s", namespace=f"{spdx_id_base_namespace}source/"), + build=SpdxIdGenerator(prefix="b", namespace=f"{spdx_id_base_namespace}build/"), + output=SpdxIdGenerator(prefix="o", namespace=f"{spdx_id_base_namespace}output/"), + ) + + spdx_graphs = build_spdx_graphs( + cmd_graph, + spdx_id_generators, + config, + ) + spdx_id_uuid = uuid.uuid5( + uuid.NAMESPACE_URL, + "".join( + json.dumps(element.to_dict()) for spdx_graph in spdx_graphs.values() for element in spdx_graph.to_list() + ), + ) + logging.debug(f"Generated SPDX graph in {time.time() - start_time} seconds") + + if not sbom_logging.has_errors() or config.write_output_on_error: + for kernel_sbom_kind, spdx_graph in spdx_graphs.items(): + spdx_graph_objects = spdx_graph.to_list() + # Add warning and error summary to creation info comment + creation_info = next(element for element in spdx_graph_objects if isinstance(element, CreationInfo)) + creation_info.comment = "\n".join([ + sbom_logging.summarize_warnings(), + sbom_logging.summarize_errors(), + ]).strip() + # Replace Placeholder uuid with real uuid for spdxIds + spdx_document = next(element for element in spdx_graph_objects if isinstance(element, SpdxDocument)) + for namespaceMap in spdx_document.namespaceMap: + namespaceMap.namespace = namespaceMap.namespace.replace(PLACEHOLDER_UUID, str(spdx_id_uuid)) + # Serialize SPDX graph to JSON-LD + spdx_doc = JsonLdSpdxDocument(graph=spdx_graph_objects) + save_path = os.path.join(config.output_directory, config.spdx_file_names[kernel_sbom_kind]) + spdx_doc.save(save_path, config.prettify_json) + logging.debug(f"Successfully saved {save_path}") + + _exit_with_summary(config.write_output_on_error) + + +# Call main method +if __name__ == "__main__": + main() diff --git a/scripts/sbom/sbom/__init__.py b/scripts/sbom/sbom/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d --- /dev/null +++ b/scripts/sbom/sbom/__init__.py diff --git a/scripts/sbom/sbom/cmd_graph/__init__.py b/scripts/sbom/sbom/cmd_graph/__init__.py new file mode 100644 index 0000000000000..9d661a5c3d93f --- /dev/null +++ b/scripts/sbom/sbom/cmd_graph/__init__.py @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from .cmd_graph import CmdGraph +from .cmd_graph_node import CmdGraphNode, CmdGraphNodeConfig + +__all__ = ["CmdGraph", "CmdGraphNode", "CmdGraphNodeConfig"] diff --git a/scripts/sbom/sbom/cmd_graph/cmd_file.py b/scripts/sbom/sbom/cmd_graph/cmd_file.py new file mode 100644 index 0000000000000..dcd63e284a38c --- /dev/null +++ b/scripts/sbom/sbom/cmd_graph/cmd_file.py @@ -0,0 +1,162 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +import os +import re +from dataclasses import dataclass, field +from sbom.cmd_graph.deps_parser import parse_cmd_file_deps +from sbom.cmd_graph.savedcmd_parser import parse_inputs_from_commands +import sbom.sbom_logging as sbom_logging +from sbom.path_utils import PathStr + +SAVEDCMD_PATTERN = re.compile(r"^(saved)?cmd_.*?:=\s*(?P<full_command>.+)$") +SOURCE_PATTERN = re.compile(r"^source.*?:=\s*(?P<source_file>.+)$") + + +@dataclass +class CmdFile: + cmd_file_path: PathStr + savedcmd: str + source: PathStr | None = None + deps: list[str] = field(default_factory=list) + make_rules: list[str] = field(default_factory=list) + + @classmethod + def create(cls, cmd_file_path: PathStr) -> "CmdFile | None": + """ + Parses a .cmd file. + .cmd files are assumed to have one of the following structures: + 1. Full Cmd File + (saved)?cmd_<output> := <command> + source_<output> := <main_input> + deps_<output> := \ + <dependencies> + <output> := $(deps_<output>) + $(deps_<output>): + + 2. Command Only Cmd File + (saved)?cmd_<output> := <command> + + 3. Single Dependency Cmd File + (saved)?cmd_<output> := <command> + <output> : <dependency> + + Args: + cmd_file_path (Path): absolute Path to a .cmd file + + Returns: + cmd_file (CmdFile): Parsed cmd file. + """ + with open(cmd_file_path, "rt", encoding="utf-8") as f: + lines = [line.strip() for line in f.readlines() if line.strip() != "" and not line.startswith("#")] + + # savedcmd + match = SAVEDCMD_PATTERN.match(lines[0] if lines else "") + if match is None: + sbom_logging.error( + "Skip parsing '{cmd_file_path}' because no 'savedcmd_' command was found.", cmd_file_path=cmd_file_path + ) + return None + savedcmd = match.group("full_command") + + # Command Only Cmd File + if len(lines) == 1: + return CmdFile(cmd_file_path, savedcmd) + + # Single Dependency Cmd File + if len(lines) == 2: + parts = lines[1].split(":", 1) + if len(parts) != 2: + sbom_logging.error( + "Skip parsing '{cmd_file_path}'. Expected dependency line '<output>: <dependency>' but got {second_line}", cmd_file_path=cmd_file_path, second_line=lines[1] + ) + return None + dep = parts[1].strip() + return CmdFile(cmd_file_path, savedcmd, deps=[dep]) + + # Full Cmd File + # source + line1 = SOURCE_PATTERN.match(lines[1]) + if line1 is None: + sbom_logging.error( + "Skip parsing '{cmd_file_path}' because no 'source_' entry was found.", cmd_file_path=cmd_file_path + ) + return CmdFile(cmd_file_path, savedcmd) + source = line1.group("source_file") + + # deps + deps: list[str] = [] + i = 3 # lines[2] includes the variable assignment but no actual dependency, so we need to start at lines[3]. + while i < len(lines): + if not lines[i].endswith("\\"): + break + deps.append(lines[i][:-1].strip()) + i += 1 + + # make_rules + make_rules = lines[i:] + + return CmdFile(cmd_file_path, savedcmd, source, deps, make_rules) + + def get_dependencies( + self: "CmdFile", target_path: PathStr, obj_tree: PathStr, fail_on_unknown_build_command: bool + ) -> list[PathStr]: + """ + Parses all dependencies required to build a target file from its cmd file. + + Args: + target_path: path to the target file relative to `obj_tree`. + obj_tree: absolute path to the object tree. + fail_on_unknown_build_command: Whether to fail if an unknown build command is encountered. + + Returns: + list[PathStr]: dependency file paths relative to `obj_tree`. + """ + input_files: list[PathStr] = [ + str(p) for p in parse_inputs_from_commands(self.savedcmd, fail_on_unknown_build_command) + ] + if self.deps: + input_files += [str(p) for p in parse_cmd_file_deps(self.deps)] + input_files = _expand_resolve_files(input_files, obj_tree) + + cmd_file_dependencies: list[PathStr] = [] + for input_file in input_files: + # input files are either absolute or relative to the object tree + if os.path.isabs(input_file): + input_file = os.path.relpath(input_file, obj_tree) + if input_file == target_path: + # Skip target file to prevent cycles. This is necessary because some multi stage commands first create an output and then pass it as input to the next command, e.g., objcopy. + continue + cmd_file_dependencies.append(input_file) + unique_cmd_file_dependencies = list(dict.fromkeys(cmd_file_dependencies)) + return unique_cmd_file_dependencies + + +def _expand_resolve_files(input_files: list[PathStr], obj_tree: PathStr) -> list[PathStr]: + """ + Expands resolve files which may reference additional files via '@' notation. + + Args: + input_files (list[PathStr]): List of file paths relative to the object tree, where paths starting with '@' refer to files + containing further file paths, each on a separate line. + obj_tree: Absolute path to the root of the object tree. + + Returns: + list[PathStr]: Flattened list of all input file paths, with any nested '@' file references resolved recursively. + """ + expanded_input_files: list[PathStr] = [] + for input_file in input_files: + if not input_file.startswith("@"): + expanded_input_files.append(input_file) + continue + resolve_file_path = os.path.join(obj_tree, input_file.removeprefix("@")) + if not os.path.exists(resolve_file_path): + sbom_logging.error( + "Skip resolving '{resolve_file_path}' because the response file does not exist.", + resolve_file_path=resolve_file_path, + ) + continue + with open(resolve_file_path, "rt", encoding="utf-8") as f: + resolve_file_content = [line_stripped for line in f.readlines() if (line_stripped := line.strip())] + expanded_input_files += _expand_resolve_files(resolve_file_content, obj_tree) + return expanded_input_files diff --git a/scripts/sbom/sbom/cmd_graph/cmd_graph.py b/scripts/sbom/sbom/cmd_graph/cmd_graph.py new file mode 100644 index 0000000000000..2f57965237f44 --- /dev/null +++ b/scripts/sbom/sbom/cmd_graph/cmd_graph.py @@ -0,0 +1,46 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from collections import deque +from dataclasses import dataclass, field +from typing import Iterator + +from sbom.cmd_graph.cmd_graph_node import CmdGraphNode, CmdGraphNodeConfig +from sbom.path_utils import PathStr + + +@dataclass +class CmdGraph: + """Directed acyclic graph of build dependencies primarily inferred from .cmd files produced during kernel builds""" + + roots: list[CmdGraphNode] = field(default_factory=list) + + @classmethod + def create(cls, root_paths: list[PathStr], config: CmdGraphNodeConfig) -> "CmdGraph": + """ + Recursively builds a dependency graph starting from `root_paths`. + Dependencies are mainly discovered by parsing the `.cmd` files. + + Args: + root_paths (list[PathStr]): List of paths to root outputs relative to obj_tree + config (CmdGraphNodeConfig): Configuration options + + Returns: + CmdGraph: A graph of all build dependencies for the given root files. + """ + node_cache: dict[PathStr, CmdGraphNode] = {} + root_nodes = [CmdGraphNode.create(root_path, config, node_cache) for root_path in root_paths] + return CmdGraph(root_nodes) + + def __iter__(self) -> Iterator[CmdGraphNode]: + """Traverse the graph in breadth-first order, yielding each unique node.""" + visited: set[PathStr] = set() + node_stack: deque[CmdGraphNode] = deque(self.roots) + while len(node_stack) > 0: + node = node_stack.popleft() + if node.absolute_path in visited: + continue + + visited.add(node.absolute_path) + node_stack.extend(node.children) + yield node diff --git a/scripts/sbom/sbom/cmd_graph/cmd_graph_node.py b/scripts/sbom/sbom/cmd_graph/cmd_graph_node.py new file mode 100644 index 0000000000000..61f3a8140cea3 --- /dev/null +++ b/scripts/sbom/sbom/cmd_graph/cmd_graph_node.py @@ -0,0 +1,142 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from dataclasses import dataclass, field +from itertools import chain +import logging +import os +from typing import Iterator, Protocol + +from sbom import sbom_logging +from sbom.cmd_graph.cmd_file import CmdFile +from sbom.cmd_graph.hardcoded_dependencies import get_hardcoded_dependencies +from sbom.cmd_graph.incbin_parser import parse_incbin_statements +from sbom.path_utils import PathStr, has_link, is_relative_to + + +@dataclass +class IncbinDependency: + node: "CmdGraphNode" + full_statement: str + + +class CmdGraphNodeConfig(Protocol): + obj_tree: PathStr + src_tree: PathStr + fail_on_unknown_build_command: bool + + +@dataclass +class CmdGraphNode: + """A node in the cmd graph representing a single file and its dependencies.""" + + absolute_path: PathStr + """Absolute path to the file this node represents.""" + + cmd_file: CmdFile | None = None + """Parsed .cmd file describing how the file at absolute_path was built, or None if not available.""" + + cmd_file_dependencies: list["CmdGraphNode"] = field(default_factory=list) + incbin_dependencies: list[IncbinDependency] = field(default_factory=list) + hardcoded_dependencies: list["CmdGraphNode"] = field(default_factory=list) + + @property + def children(self) -> Iterator["CmdGraphNode"]: + seen: set[PathStr] = set() + for node in chain( + self.cmd_file_dependencies, + (dep.node for dep in self.incbin_dependencies), + self.hardcoded_dependencies, + ): + if node.absolute_path not in seen: + seen.add(node.absolute_path) + yield node + + @classmethod + def create( + cls, + target_path: PathStr, + config: CmdGraphNodeConfig, + cache: dict[PathStr, "CmdGraphNode"] | None = None, + depth: int = 0, + ) -> "CmdGraphNode": + """ + Recursively builds a dependency graph starting from `target_path`. + Dependencies are mainly discovered by parsing the `.<target_path.name>.cmd` file. + + Args: + target_path: Path to the target file relative to obj_tree. + config: Config options + cache: Tracks processed nodes to prevent cycles. + depth: Internal parameter to track the current recursion depth. + + Returns: + CmdGraphNode: cmd graph node representing the target file + """ + if cache is None: + cache = {} + + target_path_absolute = ( + os.path.realpath(p) + if has_link(p:=os.path.join(config.obj_tree, target_path)) + else os.path.normpath(p) + ) + + if target_path_absolute in cache: + return cache[target_path_absolute] + + if depth == 0: + logging.debug(f"Build node: {target_path}") + + cmd_file_path = _to_cmd_path(target_path_absolute) + cmd_file = CmdFile.create(cmd_file_path) if os.path.exists(cmd_file_path) else None + node = CmdGraphNode(target_path_absolute, cmd_file) + cache[target_path_absolute] = node + + if not os.path.exists(target_path_absolute): + error_or_warning = ( + sbom_logging.error + if is_relative_to(target_path_absolute, config.obj_tree) + or is_relative_to(target_path_absolute, config.src_tree) + else sbom_logging.warning + ) + error_or_warning( + "Skip parsing '{target_path_absolute}' because file does not exist", + target_path_absolute=target_path_absolute, + ) + return node + + # Search for dependencies to add to the graph as child nodes. Child paths are always relative to the output tree. + def _build_child_node(child_path: PathStr) -> "CmdGraphNode": + return CmdGraphNode.create(child_path, config, cache, depth + 1) + + node.hardcoded_dependencies = [ + _build_child_node(hardcoded_dependency_path) + for hardcoded_dependency_path in get_hardcoded_dependencies( + target_path_absolute, config.obj_tree, config.src_tree + ) + ] + + if cmd_file is not None: + node.cmd_file_dependencies = [ + _build_child_node(cmd_file_dependency_path) + for cmd_file_dependency_path in cmd_file.get_dependencies( + target_path, config.obj_tree, config.fail_on_unknown_build_command + ) + ] + + if node.absolute_path.endswith(".S"): + node.incbin_dependencies = [ + IncbinDependency( + node=_build_child_node(incbin_statement.path), + full_statement=incbin_statement.full_statement, + ) + for incbin_statement in parse_incbin_statements(node.absolute_path) + ] + + return node + + +def _to_cmd_path(path: PathStr) -> PathStr: + name = os.path.basename(path) + return path.removesuffix(name) + f".{name}.cmd" diff --git a/scripts/sbom/sbom/cmd_graph/deps_parser.py b/scripts/sbom/sbom/cmd_graph/deps_parser.py new file mode 100644 index 0000000000000..6a2d92f0778ce --- /dev/null +++ b/scripts/sbom/sbom/cmd_graph/deps_parser.py @@ -0,0 +1,52 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +import re +import sbom.sbom_logging as sbom_logging +from sbom.path_utils import PathStr + +# Match dependencies on config files +# Example match: "$(wildcard include/config/CONFIG_SOMETHING)" +CONFIG_PATTERN = re.compile(r"\$\(wildcard (include/config/[^)]+)\)") + +# Match dependencies on the objtool binary +# Example match: "$(wildcard ./tools/objtool/objtool)" +OBJTOOL_PATTERN = re.compile(r"\$\(wildcard \./tools/objtool/objtool\)") + +# Match any Makefile wildcard reference +# Example match: "$(wildcard path/to/file)" +WILDCARD_PATTERN = re.compile(r"\$\(wildcard (?P<path>[^)]+)\)") + +# Match ordinary paths: +# - ^(\/)?: Optionally starts with a '/' +# - (([\w\-\.,+~=@ ]*)\/)*: Zero or more directory levels +# - [\w\-\.,+~=@ ]+$: Path component (file or directory) +# Example matches: "/foo/bar.c", "dir1/dir2/file.txt", "plainfile" +VALID_PATH_PATTERN = re.compile(r"^(\/)?(([\w\-\.,+~=@ ]*)\/)*[\w\-\.,+~=@ ]+$") + + +def parse_cmd_file_deps(deps: list[str]) -> list[PathStr]: + """ + Parse dependency strings of a .cmd file and return valid input file paths. + + Args: + deps: List of dependency strings as found in `.cmd` files. + + Returns: + input_files: List of input file paths + """ + input_files: list[PathStr] = [] + for dep in deps: + dep = dep.strip() + match dep: + case _ if CONFIG_PATTERN.match(dep) or OBJTOOL_PATTERN.match(dep): + # config paths like include/config/<CONFIG_NAME> should not be included in the graph + continue + case _ if match := WILDCARD_PATTERN.match(dep): + path = match.group("path") + input_files.append(path) + case _ if VALID_PATH_PATTERN.match(dep): + input_files.append(dep) + case _: + sbom_logging.error("Skip parsing dependency {dep} because of unrecognized format", dep=dep) + return input_files diff --git a/scripts/sbom/sbom/cmd_graph/hardcoded_dependencies.py b/scripts/sbom/sbom/cmd_graph/hardcoded_dependencies.py new file mode 100644 index 0000000000000..2eb04d30f4e67 --- /dev/null +++ b/scripts/sbom/sbom/cmd_graph/hardcoded_dependencies.py @@ -0,0 +1,87 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +import os +from typing import Callable +import sbom.sbom_logging as sbom_logging +from sbom.path_utils import PathStr, is_relative_to +from sbom.environment import Environment + +HARDCODED_DEPENDENCIES: dict[str, list[str]] = { + # defined in linux/Kbuild + "include/generated/rq-offsets.h": ["kernel/sched/rq-offsets.s"], + "kernel/sched/rq-offsets.s": ["include/generated/asm-offsets.h"], + "include/generated/bounds.h": ["kernel/bounds.s"], + "include/generated/asm-offsets.h": ["arch/{arch}/kernel/asm-offsets.s"], +} +""" +Maps file paths to the list of dependencies required to build them +which are not tracked by the .cmd dependency mechanism. +Paths are relative to either the source tree or the object tree. +""" + +def get_hardcoded_dependencies(path: PathStr, obj_tree: PathStr, src_tree: PathStr) -> list[PathStr]: + """ + Some files in the kernel build process are not tracked by the .cmd dependency mechanism. + Parsing these dependencies programmatically is too complex for the scope of this project. + Therefore, this function provides manually defined dependencies to be added to the build graph. + + Args: + path: absolute path to a file within the src tree or object tree. + obj_tree: absolute Path to the base directory of the object tree. + src_tree: absolute Path to the `linux` source directory. + + Returns: + list[PathStr]: A list of dependency file paths (relative to the object tree) required to build the file at the given path. + """ + if is_relative_to(path, obj_tree): + path = os.path.relpath(path, obj_tree) + elif is_relative_to(path, src_tree): + path = os.path.relpath(path, src_tree) + + if path not in HARDCODED_DEPENDENCIES: + return [] + + template_variables: dict[str, Callable[[], str | None]] = { + "arch": lambda: _get_arch(path), + } + + dependencies: list[PathStr] = [] + for dependency_template in HARDCODED_DEPENDENCIES[path]: + dependency = _evaluate_template(dependency_template, template_variables) + if dependency is None: + continue + if os.path.exists(os.path.join(obj_tree, dependency)): + dependencies.append(dependency) + elif os.path.exists(dependency_absolute := os.path.join(src_tree, dependency)): + dependencies.append(os.path.relpath(dependency_absolute, obj_tree)) + else: + sbom_logging.error( + "Skip hardcoded dependency '{dependency}' for '{path}' because the dependency lies neither in the src tree nor the object tree.", + dependency=dependency, + path=path, + ) + + return dependencies + + +def _evaluate_template(template: str, variables: dict[str, Callable[[], str | None]]) -> str | None: + for key, value_function in variables.items(): + template_key = "{" + key + "}" + if template_key in template: + value = value_function() + if value is None: + return None + template = template.replace(template_key, value) + return template + + +def _get_arch(path: PathStr): + srcarch = Environment.SRCARCH() + if srcarch is None: + sbom_logging.error( + "Skipped architecture specific hardcoded dependency for '{path}' because the SRCARCH environment variable was not set.", + path=path, + ) + return None + return srcarch diff --git a/scripts/sbom/sbom/cmd_graph/incbin_parser.py b/scripts/sbom/sbom/cmd_graph/incbin_parser.py new file mode 100644 index 0000000000000..ca289c2b8888b --- /dev/null +++ b/scripts/sbom/sbom/cmd_graph/incbin_parser.py @@ -0,0 +1,42 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from dataclasses import dataclass +import re + +from sbom.path_utils import PathStr + +INCBIN_PATTERN = re.compile(r'\s*\.incbin\s+"(?P<path>[^"]+)"') +"""Regex pattern for matching `.incbin "<path>"` statements.""" + + +@dataclass +class IncbinStatement: + """A parsed `.incbin "<path>"` directive.""" + + path: PathStr + """path to the file referenced by the `.incbin` directive.""" + + full_statement: str + """Full `.incbin "<path>"` statement as it originally appeared in the file.""" + + +def parse_incbin_statements(absolute_path: PathStr) -> list[IncbinStatement]: + """ + Parses `.incbin` directives from an `.S` assembly file. + + Args: + absolute_path: Absolute path to the `.S` assembly file. + + Returns: + list[IncbinStatement]: Parsed `.incbin` statements. + """ + with open(absolute_path, "rt", encoding="utf-8") as f: + content = f.read() + return [ + IncbinStatement( + path=match.group("path"), + full_statement=match.group(0).strip(), + ) + for match in INCBIN_PATTERN.finditer(content) + ] diff --git a/scripts/sbom/sbom/cmd_graph/savedcmd_parser/__init__.py b/scripts/sbom/sbom/cmd_graph/savedcmd_parser/__init__.py new file mode 100644 index 0000000000000..d13876af4dfda --- /dev/null +++ b/scripts/sbom/sbom/cmd_graph/savedcmd_parser/__init__.py @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from sbom.cmd_graph.savedcmd_parser.savedcmd_parser import parse_inputs_from_commands + +__all__ = ["parse_inputs_from_commands"] diff --git a/scripts/sbom/sbom/cmd_graph/savedcmd_parser/command_parser_registry.py b/scripts/sbom/sbom/cmd_graph/savedcmd_parser/command_parser_registry.py new file mode 100644 index 0000000000000..a48040b2c13c8 --- /dev/null +++ b/scripts/sbom/sbom/cmd_graph/savedcmd_parser/command_parser_registry.py @@ -0,0 +1,516 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +import re +import shlex +from typing import Callable, Iterator + +import sbom.sbom_logging as sbom_logging +from sbom.environment import Environment +from sbom.cmd_graph.savedcmd_parser.command_splitter import IfBlock, split_commands +from sbom.cmd_graph.savedcmd_parser.tokenizer import ( + CmdParsingError, + Option, + Positional, + tokenize_single_command, + tokenize_single_command_positionals_only, +) +from sbom.path_utils import PathStr + +CommandParser = Callable[[str], list[PathStr]] +CommandParserRegistryEntry = tuple[re.Pattern[str], CommandParser] + + +def _parse_dd_command(command: str) -> list[PathStr]: + match = re.match(r"dd.*?if=(\S+)", command) + if match: + return [match.group(1)] + return [] + + +def _parse_cat_command(command: str) -> list[PathStr]: + positionals = tokenize_single_command_positionals_only(command) + # expect positionals to be ["cat", input1, input2, ...] + return [p for p in positionals[1:]] + + +def _parse_compound_command(command: str) -> list[PathStr]: + compound_command_parsers: list[CommandParserRegistryEntry] = [ + (re.compile(r"dd\b"), _parse_dd_command), + (re.compile(r"cat.*?\|"), lambda c: _parse_cat_command(c.split("|")[0])), + (re.compile(r"cat\b[^|>]*$"), _parse_cat_command), + (re.compile(r"echo\b"), _parse_noop), + (re.compile(r"\S+="), _parse_noop), + (re.compile(r"printf\b"), _parse_noop), + (re.compile(r"sed\b"), _parse_sed_command), + ( + re.compile(r"(.*/)scripts/bin2c\s*<"), + lambda c: [input] if (input := c.split("<")[1].split(">")[0].strip()) != "/dev/null" else [], + ), + (re.compile(r"^:$"), _parse_noop), + ] + + match = re.match(r"\s*[\(\{](.*)[\)\}]\s*>", command, re.DOTALL) + if match is None: + raise CmdParsingError("No inner commands found for compound command") + input_files: list[PathStr] = [] + inner_commands = split_commands(match.group(1)) + for inner_command in inner_commands: + if isinstance(inner_command, IfBlock): + sbom_logging.error( + "Skip parsing inner command {inner_command} of compound command because IfBlock is not supported", + inner_command=inner_command, + ) + continue + + parser = next((parser for pattern, parser in compound_command_parsers if pattern.match(inner_command)), None) + if parser is None: + sbom_logging.error( + "Skip parsing inner command {inner_command} of compound command because no matching parser was found", + inner_command=inner_command, + ) + continue + try: + input_files += parser(inner_command) + except (CmdParsingError, IndexError) as e: + sbom_logging.error( + "Skip parsing inner command {inner_command} of compound command because of command parsing error: {error_message}", + inner_command=inner_command, + error_message=str(e), + ) + return input_files + + +def _parse_objcopy_command(command: str) -> list[PathStr]: + command_parts = tokenize_single_command(command, flag_options=["-S", "-w"]) + positionals = [part.value for part in command_parts if isinstance(part, Positional)] + # expect positionals to be ['objcopy', input_file] or ['objcopy', input_file, output_file] + return [positionals[1]] + + +def _parse_link_vmlinux_command(command: str) -> list[PathStr]: + """ + For simplicity we do not parse the `scripts/link-vmlinux.sh` script. + Instead the `vmlinux.a` dependency is just hardcoded for now. + """ + return ["vmlinux.a"] + + +def _parse_cp_command(command: str) -> list[PathStr]: + positionals = tokenize_single_command_positionals_only(command) + # expect positionals to be ["cp", input1, ..., destination] + return positionals[1:-1] + + +def _parse_noop(command: str) -> list[PathStr]: + """ + No-op parser for commands with no input files (e.g., 'rm', 'true'). + Returns an empty list. + """ + return [] + + +def _parse_ar_command(command: str) -> list[PathStr]: + positionals = tokenize_single_command_positionals_only(command) + # expect positionals to be ['ar', flags, output, input1, input2, ...] + flags = positionals[1] + if "r" not in flags: + # 'r' option indicates that new files are added to the archive. + # If this option is missing we won't find any relevant input files. + return [] + return positionals[3:] + + +def _parse_ar_piped_xargs_command(command: str) -> list[PathStr]: + printf_command, _ = command.split("|", 1) + positionals = tokenize_single_command_positionals_only(printf_command.strip()) + # expect positionals to be ['printf', '{prefix_path}%s ', input1, input2, ...] + prefix_path = positionals[1].removesuffix("%s ") + return [f"{prefix_path}{filename}" for filename in positionals[2:]] + + +def _parse_gcc_or_clang_command(command: str) -> list[PathStr]: + parts = shlex.split(command) + # compile mode: expect last positional argument ending in a source file extension to be the input file + for part in reversed(parts): + if not part.startswith("-") and any(part.endswith(suffix) for suffix in [".c", ".S", ".dts"]): + return [part] + + # linking mode: expect all .o files to be the inputs + return [p for p in parts if p.endswith(".o")] + + +def _parse_rustc_command(command: str) -> list[PathStr]: + parts = shlex.split(command) + # expect last positional argument ending in `.rs` to be the input file + for part in reversed(parts): + if not part.startswith("-") and part.endswith(".rs"): + return [part] + raise CmdParsingError("Could not find .rs input source file") + + +def _parse_rustdoc_command(command: str) -> list[PathStr]: + parts = shlex.split(command) + # expect last positional argument ending in `.rs` to be the input file + for part in reversed(parts): + if not part.startswith("-") and part.endswith(".rs"): + return [part] + raise CmdParsingError("Could not find .rs input source file") + + +def _parse_syscallhdr_command(command: str) -> list[PathStr]: + command_parts = tokenize_single_command(command.strip(), flag_options=["--emit-nr"]) + positionals = [p.value for p in command_parts if isinstance(p, Positional)] + # expect positionals to be ["sh", path/to/syscallhdr.sh, input, output] + return [positionals[2]] + + +def _parse_syscalltbl_command(command: str) -> list[PathStr]: + command_parts = tokenize_single_command(command.strip()) + positionals = [p.value for p in command_parts if isinstance(p, Positional)] + # expect positionals to be ["sh", path/to/syscalltbl.sh, input, output] + return [positionals[2]] + + +def _parse_mkcapflags_command(command: str) -> list[PathStr]: + positionals = tokenize_single_command_positionals_only(command) + # expect positionals to be ["sh", path/to/mkcapflags.sh, output, input1, input2] + return [positionals[3], positionals[4]] + + +def _parse_orc_hash_command(command: str) -> list[PathStr]: + positionals = tokenize_single_command_positionals_only(command) + # expect positionals to be ["sh", path/to/orc_hash.sh, '<', input, '>', output] + return [positionals[3]] + + +def _parse_xen_hypercalls_command(command: str) -> list[PathStr]: + positionals = tokenize_single_command_positionals_only(command) + # expect positionals to be ["sh", path/to/xen-hypercalls.sh, output, input1, input2, ...] + return positionals[3:] + + +def _parse_gen_initramfs_command(command: str) -> list[PathStr]: + command_parts = tokenize_single_command(command) + positionals = [p.value for p in command_parts if isinstance(p, Positional)] + # expect positionals to be ["sh", path/to/gen_initramfs.sh, input1, input2, ...] + return positionals[2:] + + +def _parse_vdso2c_command(command: str) -> list[PathStr]: + positionals = tokenize_single_command_positionals_only(command) + # expect positionals to be ['vdso2c', raw_input, stripped_input, output] + return [positionals[1], positionals[2]] + + +def _parse_vdsomunge_command(command: str) -> list[PathStr]: + positionals = tokenize_single_command_positionals_only(command) + # expect positionals to be ['vdsomunge', input, output] + return [positionals[1]] + + +def _parse_ld_command(command: str) -> list[PathStr]: + command_parts = tokenize_single_command( + command=command.strip(), + flag_options=[ + "-shared", + "--no-undefined", + "--eh-frame-hdr", + "-Bsymbolic", + "-r", + "--no-ld-generated-unwind-info", + "--no-dynamic-linker", + "-pie", + "--no-dynamic-linker--whole-archive", + "--whole-archive", + "--no-whole-archive", + "--start-group", + "--end-group", + ], + ) + positionals = [p.value for p in command_parts if isinstance(p, Positional)] + # expect positionals to be ["ld", input1, input2, ...] + return positionals[1:] + + +def _parse_sed_command(command: str) -> list[PathStr]: + command_parts = shlex.split(command) + # expect command parts to be ["sed", *, input] + input = command_parts[-1] + if input == "/dev/null": + return [] + return [input] + + +def _parse_awk(command: str) -> list[PathStr]: + command_parts = tokenize_single_command(command) + options = [p for p in command_parts if isinstance(p, Option)] + positionals = [p.value for p in command_parts if isinstance(p, Positional)] + has_script_file = any(p.name == "-f" for p in options) + # With -f option: expect ["awk", input1, input2, ...] + # Without -f option: expect ["awk", inline_program, input1, input2, ...] + return positionals[1:] if has_script_file else positionals[2:] + + +def _parse_nm_piped_command(command: str) -> list[PathStr]: + nm_command, _ = command.split("|", 1) + command_parts = tokenize_single_command( + command=nm_command.strip(), + flag_options=["-p", "--defined-only"], + ) + positionals = [p.value for p in command_parts if isinstance(p, Positional)] + # expect positionals to be ["nm", input1, input2, ...] + return [p for p in positionals[1:]] + + +def _parse_pnm_to_logo_command(command: str) -> list[PathStr]: + command_parts = shlex.split(command) + # expect command parts to be ["pnmtologo", <options>, input] + return [command_parts[-1]] + + +def _parse_relacheck(command: str) -> list[PathStr]: + positionals = tokenize_single_command_positionals_only(command) + # expect positionals to be ["relacheck", input, log_reference] + return [positionals[1]] + + +def _parse_gen_hyprel_command(command: str) -> list[PathStr]: + gen_hyprel_command, _ = command.split(">", 1) + command_parts = shlex.split(gen_hyprel_command) + # expect command_parts to be ["gen-hyprel", input] + return [command_parts[1]] + + +def _parse_perl_command(command: str) -> list[PathStr]: + positionals = tokenize_single_command_positionals_only(command.strip()) + # expect positionals to be ["perl", input] + return [positionals[1]] + + +def _parse_strip_command(command: str) -> list[PathStr]: + command_parts = tokenize_single_command(command, flag_options=["--strip-debug"]) + positionals = [p.value for p in command_parts if isinstance(p, Positional)] + # expect positionals to be ["strip", input1, input2, ...] + return positionals[1:] + + +def _parse_mkpiggy_command(command: str) -> list[PathStr]: + mkpiggy_command, _ = command.split(">", 1) + positionals = tokenize_single_command_positionals_only(mkpiggy_command) + # expect positionals to be ["mkpiggy", input] + return [positionals[1]] + + +def _parse_relocs_command(command: str) -> list[PathStr]: + if ">" not in command: + # Only consider relocs commands that redirect output to a file. + # If there's no redirection, we assume it produces no output file and therefore has no input we care about. + return [] + relocs_command, _ = command.split(">", 1) + command_parts = shlex.split(relocs_command) + # expect command_parts to be ["relocs", options, input] + return [command_parts[-1]] + + +def _parse_mk_elfconfig_command(command: str) -> list[PathStr]: + positionals = tokenize_single_command_positionals_only(command) + # expect positionals to be ["mk_elfconfig", "<", input, ">", output] + return [positionals[2]] + + +def _parse_flex_command(command: str) -> list[PathStr]: + parts = shlex.split(command) + # expect last positional argument ending in `.l` to be the input file + for part in reversed(parts): + if not part.startswith("-") and part.endswith(".l"): + return [part] + raise CmdParsingError("Could not find .l input source file in command") + + +def _parse_bison_command(command: str) -> list[PathStr]: + parts = shlex.split(command) + # expect last positional argument ending in `.y` to be the input file + for part in reversed(parts): + if not part.startswith("-") and part.endswith(".y"): + return [part] + raise CmdParsingError("Could not find input .y input source file in command") + + +def _parse_tools_build_command(command: str) -> list[PathStr]: + positionals = tokenize_single_command_positionals_only(command) + # expect positionals to be ["tools/build", "input1", "input2", "input3", "output"] + return positionals[1:-1] + + +def _parse_extract_cert_command(command: str) -> list[PathStr]: + command_parts = shlex.split(command) + # expect command parts to be [path/to/extract-cert, input, output] + input = command_parts[1] + if not input: + return [] + return [input] + + +def _parse_dtc_command(command: str) -> list[PathStr]: + wno_flags = [command_part for command_part in shlex.split(command) if command_part.startswith("-Wno-")] + command_parts = tokenize_single_command(command, flag_options=wno_flags) + positionals = [p.value for p in command_parts if isinstance(p, Positional)] + # expect positionals to be [path/to/dtc, input] + return [positionals[1]] + + +def _parse_bindgen_command(command: str) -> list[PathStr]: + command_parts = shlex.split(command) + header_file_input_paths = [part for part in command_parts if part.endswith(".h")] + return header_file_input_paths + + +def _parse_gen_header(command: str) -> list[PathStr]: + command_parts = shlex.split(command) + # expect command parts to be ["python3", path/to/gen_headers.py, ..., "--xml", input] + i = next((i for i, token in enumerate(command_parts) if token == "--xml"), None) + if i is None: + raise CmdParsingError(f"Expected --xml input file in gen_headers command but got {command}") + return [command_parts[i + 1]] + +def _parse_mkuboot_command(command: str) -> list[PathStr]: + command_parts = tokenize_single_command(command) + # mkuboot.sh passes all args to mkimage; -d specifies the data/input image file + for part in command_parts: + if isinstance(part, Option) and part.name == "-d" and part.value is not None: + return [part.value] + raise CmdParsingError("Could not find -d (data file) option in mkuboot.sh command") + + +def _parse_syscallnr_command(command: str) -> list[PathStr]: + command_parts = tokenize_single_command(command.strip()) + positionals = [p.value for p in command_parts if isinstance(p, Positional)] + # expect positionals to be ["sh", path/to/syscallnr.sh, input, output] + return [positionals[2]] + + +def _parse_gen_kernel_hwcaps_command(command: str) -> list[PathStr]: + command_parts = tokenize_single_command(command.strip(), flag_options=["-e"]) + positionals = [p.value for p in command_parts if isinstance(p, Positional)] + # expect positionals to be ["sh", path/to/gen-kernel-hwcaps.sh, input] + return [positionals[2]] + + +class CommandParserRegistry: + """ + Registry mapping command patterns to their input-file parsers. + """ + + def __init__(self, entries: list[CommandParserRegistryEntry]) -> None: + self._entries = entries + + def __iter__(self) -> Iterator[CommandParserRegistryEntry]: + return iter(self._entries) + + @staticmethod + def create() -> "CommandParserRegistry": + def env_or_default_pattern(env_value: str | None, default_pattern: str) -> str: + if env_value is None or not env_value.strip(): + return default_pattern + return rf"(?:{re.escape(env_value.strip())}|{default_pattern})" + + cc_pattern = env_or_default_pattern(Environment.CC(), r"([^\s]+-)?(gcc|clang)") + ld_pattern = env_or_default_pattern(Environment.LD(), r"([^\s]+-)?ld") + ar_pattern = env_or_default_pattern(Environment.AR(), r"([^\s]+-)?ar") + nm_pattern = env_or_default_pattern(Environment.NM(), r"([^\s]+-)?nm") + objcopy_pattern = env_or_default_pattern(Environment.OBJCOPY(), r"([^\s]+-)?objcopy") + strip_pattern = env_or_default_pattern(Environment.STRIP(), r"([^\s]+-)?strip") + + entries: list[CommandParserRegistryEntry] = [ + # Compound commands + (re.compile(r"\(.*?\)\s*>", re.DOTALL), _parse_compound_command), + (re.compile(r"\{.*?\}\s*>", re.DOTALL), _parse_compound_command), + # Standard Unix utilities and system tools + (re.compile(r"^rm\b"), _parse_noop), + (re.compile(r"^mkdir\b"), _parse_noop), + (re.compile(r"^touch\b"), _parse_noop), + (re.compile(r"^cp\b"), _parse_cp_command), + (re.compile(r"^truncate\b"), _parse_noop), + (re.compile(r"^cat\b.*?[\|>]"), lambda c: _parse_cat_command(c.split("|")[0].split(">")[0])), + (re.compile(r"^echo[^|]*$"), _parse_noop), + (re.compile(r"^sed.*?>"), lambda c: _parse_sed_command(c.split(">")[0])), + (re.compile(r"^sed\b"), _parse_noop), + (re.compile(r"^awk.*?<.*?>"), lambda c: [c.split("<")[1].split(">")[0]]), + (re.compile(r"^awk.*?>"), lambda c: _parse_awk(c.split(">")[0])), + (re.compile(r"^(/bin/)?true\b"), _parse_noop), + (re.compile(r"^(/bin/)?false\b"), _parse_noop), + (re.compile(r"^openssl\s+req.*?-new.*?-keyout"), _parse_noop), + # Compilers and code generators + # (C/LLVM toolchain, Rust, Flex/Bison, Bindgen, Perl, etc.) + ( + re.compile(rf"^{cc_pattern}\b"), + lambda command: _parse_gcc_or_clang_command(re.sub(rf"^{cc_pattern}\b", "gcc", command, count=1)), + ), + ( + re.compile(rf"^{ld_pattern}\b"), + lambda command: _parse_ld_command(re.sub(rf"^{ld_pattern}\b", "ld", command, count=1)), + ), + ( + re.compile(rf"^printf\b.*\| xargs {ar_pattern}\b"), + lambda command: _parse_ar_piped_xargs_command( + re.sub(rf"xargs {ar_pattern}\b", "xargs ar", command, count=1) + ), + ), + ( + re.compile(rf"^{ar_pattern}\b"), + lambda command: _parse_ar_command(re.sub(rf"^{ar_pattern}\b", "ar", command, count=1)), + ), + ( + re.compile(rf"^{nm_pattern}\b.*?\|"), + lambda command: _parse_nm_piped_command(re.sub(rf"^{nm_pattern}\b", "nm", command, count=1)), + ), + ( + re.compile(rf"^{objcopy_pattern}\b"), + lambda command: _parse_objcopy_command(re.sub(rf"^{objcopy_pattern}\b", "objcopy", command, count=1)), + ), + ( + re.compile(rf"^{strip_pattern}\b"), + lambda command: _parse_strip_command(re.sub(rf"^{strip_pattern}\b", "strip", command, count=1)), + ), + (re.compile(r".*?rustc\b"), _parse_rustc_command), + (re.compile(r".*?rustdoc\b"), _parse_rustdoc_command), + (re.compile(r"^flex\b"), _parse_flex_command), + (re.compile(r"^bison\b"), _parse_bison_command), + (re.compile(r"^bindgen\b"), _parse_bindgen_command), + (re.compile(r"^perl\b"), _parse_perl_command), + # Kernel-specific build scripts and tools + (re.compile(r"^(.*/)?link-vmlinux\.sh\b"), _parse_link_vmlinux_command), + (re.compile(r"sh (.*/)?syscallhdr\.sh\b"), _parse_syscallhdr_command), + (re.compile(r"sh (.*/)?syscalltbl\.sh\b"), _parse_syscalltbl_command), + (re.compile(r"sh (.*/)?mkcapflags\.sh\b"), _parse_mkcapflags_command), + (re.compile(r"sh (.*/)?orc_hash\.sh\b"), _parse_orc_hash_command), + (re.compile(r"sh (.*/)?xen-hypercalls\.sh\b"), _parse_xen_hypercalls_command), + (re.compile(r"sh (.*/)?gen_initramfs\.sh\b"), _parse_gen_initramfs_command), + (re.compile(r"sh (.*/)?checkundef\.sh\b"), _parse_noop), + (re.compile(r"(bash|sh) (.*/)?mkuboot\.sh\b"), _parse_mkuboot_command), + (re.compile(r"sh (.*/)?syscallnr\.sh\b"), _parse_syscallnr_command), + (re.compile(r"(/bin/)?sh (.*/)?gen-kernel-hwcaps\.sh\b"), lambda c: _parse_gen_kernel_hwcaps_command(c.split(">")[0])), + (re.compile(r"(.*/)?vdso2c\b"), _parse_vdso2c_command), + (re.compile(r"(.*/)?vdsomunge\b"), _parse_vdsomunge_command), + (re.compile(r"^(.*/)?mkpiggy.*?>"), _parse_mkpiggy_command), + (re.compile(r"^(.*/)?relocs\b"), _parse_relocs_command), + (re.compile(r"^(.*/)?mk_elfconfig.*?<.*?>"), _parse_mk_elfconfig_command), + (re.compile(r"^(.*/)?tools/build\b"), _parse_tools_build_command), + (re.compile(r"^(.*/)?certs/extract-cert"), _parse_extract_cert_command), + (re.compile(r"^(.*/)?scripts/dtc/dtc\b"), _parse_dtc_command), + (re.compile(r"^(.*/)?pnmtologo\b"), _parse_pnm_to_logo_command), + (re.compile(r"^(.*/)?kernel/pi/relacheck"), _parse_relacheck), + (re.compile(r"^(.*/)?gen-hyprel\b"), _parse_gen_hyprel_command), + (re.compile(r"^drivers/gpu/drm/radeon/mkregtable"), lambda c: [c.split(" ")[1]]), + (re.compile(r"(.*/)?genheaders\b"), _parse_noop), + (re.compile(r"^(.*/)?mkcpustr\s+>"), _parse_noop), + (re.compile(r"^(.*/)polgen\b"), _parse_noop), + (re.compile(r"make -f .*/arch/x86/Makefile\.postlink"), _parse_noop), + (re.compile(r"^(.*/)?raid6/mktables\s+>"), _parse_noop), + (re.compile(r"^(.*/)?objtool\b"), _parse_noop), + (re.compile(r"^(.*/)?module/gen_test_kallsyms.sh"), _parse_noop), + (re.compile(r"^(.*/)?gen_header.py"), _parse_gen_header), + (re.compile(r"^(.*/)?scripts/rustdoc_test_gen"), _parse_noop), + ] + return CommandParserRegistry(entries) diff --git a/scripts/sbom/sbom/cmd_graph/savedcmd_parser/command_splitter.py b/scripts/sbom/sbom/cmd_graph/savedcmd_parser/command_splitter.py new file mode 100644 index 0000000000000..4749f4bd669ea --- /dev/null +++ b/scripts/sbom/sbom/cmd_graph/savedcmd_parser/command_splitter.py @@ -0,0 +1,128 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +import re +from dataclasses import dataclass + + +# If Block pattern to match a simple, single-level if-then-fi block. Nested If blocks are not supported. +IF_BLOCK_PATTERN = re.compile( + r""" + ^if(.*?);\s* # Match 'if <condition>;' (non-greedy) + then(.*?);\s* # Match 'then <body>;' (non-greedy) + fi\b # Match 'fi' + """, + re.VERBOSE, +) + + +@dataclass +class IfBlock: + condition: str + then_statement: str + + +def _unwrap_outer_parentheses(s: str) -> str: + s = s.strip() + if not (s.startswith("(") and s.endswith(")")): + return s + + count = 0 + for i, char in enumerate(s): + if char == "(": + count += 1 + elif char == ")": + count -= 1 + # If count is 0 before the end, outer parentheses don't match + if count == 0 and i != len(s) - 1: + return s + + # outer parentheses do match, unwrap once + return _unwrap_outer_parentheses(s[1:-1]) + + +def _find_first_top_level_command_separator( + commands: str, separators: list[str] = [";", "&&"] +) -> tuple[int | None, int | None]: + def is_escaped(index: int) -> bool: + preceding = commands[:index] + return (len(preceding) - len(preceding.rstrip("\\"))) % 2 == 1 + + in_single_quote = False + in_double_quote = False + in_curly_braces = 0 + in_braces = 0 + for i, char in enumerate(commands): + if char == "'" and not in_double_quote and not is_escaped(i): + # Toggle single quote state (unless inside double quotes or escaped) + in_single_quote = not in_single_quote + elif char == '"' and not in_single_quote and not is_escaped(i): + # Toggle double quote state (unless inside single quotes or escaped) + in_double_quote = not in_double_quote + + if in_single_quote or in_double_quote: + continue + + # Toggle braces state + if char == "{": + in_curly_braces += 1 + if char == "}": + in_curly_braces -= 1 + + if char == "(": + in_braces += 1 + if char == ")": + in_braces -= 1 + + if in_curly_braces > 0 or in_braces > 0: + continue + + # return found separator position and separator length + for separator in separators: + if commands[i : i + len(separator)] == separator: + return i, len(separator) + + return None, None + + +def split_commands(commands: str) -> list[str | IfBlock]: + """ + Splits a string of command-line commands into individual parts. + + This function handles: + - Top-level command separators (e.g., `;` and `&&`) to split multiple commands. + - Conditional if-blocks, returning them as `IfBlock` instances. + - Preserves the order of commands and trims whitespace. + + Args: + commands (str): The raw command string. + + Returns: + list[str | IfBlock]: A list of single commands or `IfBlock` objects. + """ + single_commands: list[str | IfBlock] = [] + remaining_commands = _unwrap_outer_parentheses(commands) + while len(remaining_commands) > 0: + remaining_commands = remaining_commands.strip() + + # if block + matched_if = IF_BLOCK_PATTERN.match(remaining_commands) + if matched_if: + condition, then_statement = matched_if.groups() + single_commands.append(IfBlock(condition.strip(), then_statement.strip())) + full_matched = matched_if.group(0) + remaining_commands = remaining_commands.removeprefix(full_matched).lstrip("; \n") + continue + + # command until next separator + separator_position, separator_length = _find_first_top_level_command_separator(remaining_commands) + if separator_position is not None and separator_length is not None: + single_commands.append(remaining_commands[:separator_position].strip()) + remaining_commands = remaining_commands[separator_position + separator_length :].strip() + continue + + # single last command + single_commands.append(remaining_commands) + break + + return single_commands diff --git a/scripts/sbom/sbom/cmd_graph/savedcmd_parser/savedcmd_parser.py b/scripts/sbom/sbom/cmd_graph/savedcmd_parser/savedcmd_parser.py new file mode 100644 index 0000000000000..6a7ea4787aa16 --- /dev/null +++ b/scripts/sbom/sbom/cmd_graph/savedcmd_parser/savedcmd_parser.py @@ -0,0 +1,67 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +import sbom.sbom_logging as sbom_logging +from sbom.cmd_graph.savedcmd_parser.command_splitter import IfBlock, split_commands +from sbom.cmd_graph.savedcmd_parser.command_parser_registry import CommandParserRegistry +from sbom.cmd_graph.savedcmd_parser.tokenizer import CmdParsingError +from sbom.path_utils import PathStr + +DEFAULT_COMMAND_PARSER_REGISTRY = CommandParserRegistry.create() + + +def parse_inputs_from_commands( + commands: str, + fail_on_unknown_build_command: bool, + registry: CommandParserRegistry | None = None, +) -> list[PathStr]: + """ + Extract input files referenced in a set of command-line commands. + + Args: + commands (str): Command line expression to parse. + fail_on_unknown_build_command (bool): Whether to fail if an unknown build command is encountered. If False, errors are logged as warnings. + registry (CommandParserRegistry | None): Registry of single command parsers. + + Returns: + list[PathStr]: List of input file paths required by the commands. + """ + + def log_error_or_warning(message: str, /, **kwargs: str) -> None: + if fail_on_unknown_build_command: + sbom_logging.error(message, **kwargs) + else: + sbom_logging.warning(message, **kwargs) + + if registry is None: + registry = DEFAULT_COMMAND_PARSER_REGISTRY + + input_files: list[PathStr] = [] + for single_command in split_commands(commands): + if isinstance(single_command, IfBlock): + inputs = parse_inputs_from_commands(single_command.then_statement, fail_on_unknown_build_command, registry) + if inputs: + log_error_or_warning( + "Skipped parsing command {then_statement} because input files in IfBlock 'then' statement are not supported", + then_statement=single_command.then_statement, + ) + continue + + matched_parser = next((parser for pattern, parser in registry if pattern.match(single_command)), None) + if matched_parser is None: + log_error_or_warning( + "Skipped parsing command {single_command} because no matching parser was found", + single_command=single_command, + ) + continue + try: + inputs = matched_parser(single_command) + input_files.extend(inputs) + except (CmdParsingError, IndexError) as e: + log_error_or_warning( + "Skipped parsing command {single_command} because of command parsing error: {error_message}", + single_command=single_command, + error_message=str(e), + ) + + return [input.strip().rstrip("/") for input in input_files] diff --git a/scripts/sbom/sbom/cmd_graph/savedcmd_parser/tokenizer.py b/scripts/sbom/sbom/cmd_graph/savedcmd_parser/tokenizer.py new file mode 100644 index 0000000000000..1bf081f40be78 --- /dev/null +++ b/scripts/sbom/sbom/cmd_graph/savedcmd_parser/tokenizer.py @@ -0,0 +1,92 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +import re +import shlex +from dataclasses import dataclass +from typing import Union + + +class CmdParsingError(Exception): + pass + + +@dataclass +class Option: + name: str + value: str | None = None + + +@dataclass +class Positional: + value: str + + +_SUBCOMMAND_PATTERN = re.compile(r"\$\$\(([^()]*)\)") +"""Pattern to match $$(...) blocks""" + + +def tokenize_single_command(command: str, flag_options: list[str] | None = None) -> list[Union[Option, Positional]]: + """ + Parse a shell command into a list of Options and Positionals. + - Positional: the command and any positional arguments. + - Options: handles flags and options with values provided as space-separated, or equals-sign + (e.g., '--opt val', '--opt=val', '--flag'). + + Args: + command: Command line string. + flag_options: Options that are flags without values (e.g., '--verbose'). + + Returns: + List of `Option` and `Positional` objects in command order. + """ + + # Wrap all $$(...) blocks in double quotes to prevent shlex from splitting them. + command_with_protected_subcommands = _SUBCOMMAND_PATTERN.sub(lambda m: f'"$$({m.group(1)})"', command) + tokens = shlex.split(command_with_protected_subcommands) + + parsed: list[Option | Positional] = [] + i = 0 + while i < len(tokens): + token = tokens[i] + + # Positional + if not token.startswith("-"): + parsed.append(Positional(token)) + i += 1 + continue + + # Option without value (--flag) + if (token.startswith("-") and i + 1 < len(tokens) and tokens[i + 1].startswith("-")) or ( + flag_options and token in flag_options + ): + parsed.append(Option(name=token)) + i += 1 + continue + + # Option with equals sign (--opt=val) + if "=" in token: + name, value = token.split("=", 1) + parsed.append(Option(name=name, value=value)) + i += 1 + continue + + # Option with space-separated value (--opt val) + if i + 1 < len(tokens) and not tokens[i + 1].startswith("-"): + parsed.append(Option(name=token, value=tokens[i + 1])) + i += 2 + continue + + raise CmdParsingError(f"Unrecognized token: {token} in command {command}") + + return parsed + + +def tokenize_single_command_positionals_only(command: str) -> list[str]: + command_parts = tokenize_single_command(command) + positionals = [p.value for p in command_parts if isinstance(p, Positional)] + if len(positionals) != len(command_parts): + raise CmdParsingError( + f"Invalid command format: expected positional arguments only but got options in command {command}." + ) + return positionals diff --git a/scripts/sbom/sbom/config.py b/scripts/sbom/sbom/config.py new file mode 100644 index 0000000000000..6811f782943eb --- /dev/null +++ b/scripts/sbom/sbom/config.py @@ -0,0 +1,320 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +import argparse +from dataclasses import dataclass +from datetime import datetime, timezone +from enum import Enum +import os +from typing import Any +from sbom.path_utils import PathStr + + +class KernelSpdxDocumentKind(Enum): + SOURCE = "source" + BUILD = "build" + OUTPUT = "output" + + +@dataclass +class KernelSbomConfig: + src_tree: PathStr + """Absolute path to the Linux kernel source directory.""" + + obj_tree: PathStr + """Absolute path to the build output directory.""" + + root_paths: list[PathStr] + """List of paths to root outputs (relative to obj_tree) to base the SBOM on.""" + + generate_spdx: bool + """Whether to generate SPDX SBOM documents. If False, no SPDX files are created.""" + + spdx_file_names: dict[KernelSpdxDocumentKind, str] + """If `generate_spdx` is True, defines the file names for each SPDX SBOM kind + (source, build, output) to store on disk.""" + + generate_used_files: bool + """Whether to generate a flat list of all source files used in the build. + If False, no used-files document is created.""" + + used_files_file_name: str + """If `generate_used_files` is True, specifies the file name for the used-files document.""" + + output_directory: PathStr + """Path to the directory where the generated output documents will be saved.""" + + debug: bool + """Whether to enable debug logging.""" + + fail_on_unknown_build_command: bool + """Whether to fail if an unknown build command is encountered in a .cmd file.""" + + write_output_on_error: bool + """Whether to write output documents even if errors occur.""" + + created: datetime + """Datetime to use for the SPDX created property of the CreationInfo element.""" + + spdxId_prefix: str + """Prefix to use for all SPDX element IDs.""" + + build_type: str + """SPDX buildType property to use for all Build elements.""" + + build_id: str | None + """SPDX buildId property to use for all Build elements.""" + + package_license: str + """License expression applied to all SPDX Packages.""" + + package_version: str | None + """Version string applied to all SPDX Packages.""" + + package_copyright_text: str | None + """Copyright text applied to all SPDX Packages.""" + + prettify_json: bool + """Whether to pretty-print generated SPDX JSON documents.""" + + +def _parse_cli_arguments(parser: argparse.ArgumentParser) -> dict[str, Any]: + """ + Parse command-line arguments using argparse. + + Returns: + Dictionary of parsed arguments. + """ + parser.add_argument( + "--src-tree", + default="../linux", + help="Path to the kernel source tree (default: ../linux)", + ) + parser.add_argument( + "--obj-tree", + default="../linux/kernel_build", + help="Path to the build output directory (default: ../linux/kernel_build)", + ) + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument( + "--roots", + nargs="+", + help="Space-separated list of paths relative to obj-tree for which the SBOM will be created.\n" + "Cannot be used together with --roots-file.", + ) + group.add_argument( + "--roots-file", + help="Path to a file containing the root paths (one per line). Cannot be used together with --roots.", + ) + parser.add_argument( + "--generate-spdx", + action="store_true", + default=False, + help=( + "Whether to create sbom-source.spdx.json, sbom-build.spdx.json and " + "sbom-output.spdx.json documents (default: False)" + ), + ) + parser.add_argument( + "--generate-used-files", + action="store_true", + default=False, + help=( + "Whether to create the sbom.used-files.txt file, a flat list of all " + "source files used for the kernel build.\n" + "If src-tree and obj-tree are equal it is not possible to reliably " + "classify source files.\n" + "In this case sbom.used-files.txt will contain all files used for the " + "kernel build including all build artifacts. (default: False)" + ), + ) + parser.add_argument( + "--output-directory", + default=".", + help="Path to the directory where the generated output documents will be stored (default: .)", + ) + parser.add_argument( + "--debug", + action="store_true", + default=False, + help="Enable debug logs (default: False)", + ) + + # Error handling settings + parser.add_argument( + "--do-not-fail-on-unknown-build-command", + action="store_true", + default=False, + help=( + "Whether to fail if an unknown build command is encountered in a .cmd file.\n" + "If set to True, errors are logged as warnings instead. (default: False)" + ), + ) + parser.add_argument( + "--write-output-on-error", + action="store_true", + default=False, + help=( + "Write output documents even if errors occur. The resulting documents " + "may be incomplete.\n" + "A summary of warnings and errors can be found in the 'comment' property " + "of the CreationInfo element. (default: False)" + ), + ) + + # SPDX specific options + spdx_group = parser.add_argument_group("SPDX options", "Options for customizing SPDX document generation") + spdx_group.add_argument( + "--spdxId-prefix", + default="urn:spdx.dev:", + help="The prefix to use for all spdxId properties. (default: urn:spdx.dev:)", + ) + spdx_group.add_argument( + "--build-type", + default="urn:spdx.dev:Kbuild", + help="The SPDX buildType property to use for all Build elements. (default: urn:spdx.dev:Kbuild)", + ) + spdx_group.add_argument( + "--build-id", + default=None, + help="The SPDX buildId property to use for all Build elements.\n" + "If not provided the spdxId of the high level Build element is used as the buildId. (default: None)", + ) + spdx_group.add_argument( + "--package-license", + default="NOASSERTION", + help=( + "The SPDX licenseExpression property to use for the LicenseExpression " + "linked to all SPDX Package elements. (default: NOASSERTION)" + ), + ) + spdx_group.add_argument( + "--package-version", + default=None, + help="The SPDX packageVersion property to use for all SPDX Package elements. (default: None)", + ) + spdx_group.add_argument( + "--package-copyright-text", + default=None, + help=( + "The SPDX copyrightText property to use for all SPDX Package elements.\n" + "If not specified, and if a COPYING file exists in the source tree,\n" + "the package-copyright-text is set to the content of this file. " + "(default: None)" + ), + ) + spdx_group.add_argument( + "--prettify-json", + action="store_true", + default=False, + help="Whether to pretty print the generated spdx.json documents (default: False)", + ) + + args = vars(parser.parse_args()) + return args + + +def get_config() -> KernelSbomConfig: + """ + Parse command-line arguments and construct the configuration object. + + Returns: + KernelSbomConfig: Configuration object with all settings for SBOM generation. + """ + parser = argparse.ArgumentParser( + formatter_class=argparse.RawTextHelpFormatter, + description="Generate SPDX SBOM documents for kernel builds", + ) + args = _parse_cli_arguments(parser) + + # Extract and validate cli arguments + src_tree = os.path.realpath(args["src_tree"]) + obj_tree = os.path.realpath(args["obj_tree"]) + root_paths = [] + if args["roots_file"]: + with open(args["roots_file"], "rt", encoding="utf-8") as f: + root_paths = [root.strip() for root in f.readlines()] + if len(root_paths) == 0: + parser.error("--roots-file must contain at least one path") + else: + root_paths = args["roots"] + _validate_path_arguments(parser, src_tree, obj_tree, root_paths) + + generate_spdx = args["generate_spdx"] + generate_used_files = args["generate_used_files"] + output_directory = os.path.realpath(args["output_directory"]) + debug = args["debug"] + + fail_on_unknown_build_command = not args["do_not_fail_on_unknown_build_command"] + write_output_on_error = args["write_output_on_error"] + + created = datetime.fromtimestamp( + max([os.path.getmtime(os.path.join(obj_tree, root_path)) for root_path in root_paths]), + tz=timezone.utc, + ) + spdxId_prefix = args["spdxId_prefix"] + build_type = args["build_type"] + build_id = args["build_id"] + package_license = args["package_license"] + package_version = args["package_version"] if args["package_version"] is not None else None + package_copyright_text: str | None = None + if args["package_copyright_text"] is not None: + package_copyright_text = args["package_copyright_text"] + elif os.path.isfile(copying_path := os.path.join(src_tree, "COPYING")): + with open(copying_path, "r", encoding="utf-8") as f: + package_copyright_text = f.read() + prettify_json = args["prettify_json"] + + # Hardcoded config + spdx_file_names = { + KernelSpdxDocumentKind.SOURCE: "sbom-source.spdx.json", + KernelSpdxDocumentKind.BUILD: "sbom-build.spdx.json", + KernelSpdxDocumentKind.OUTPUT: "sbom-output.spdx.json", + } + used_files_file_name = "sbom.used-files.txt" + + return KernelSbomConfig( + src_tree=src_tree, + obj_tree=obj_tree, + root_paths=root_paths, + generate_spdx=generate_spdx, + spdx_file_names=spdx_file_names, + generate_used_files=generate_used_files, + used_files_file_name=used_files_file_name, + output_directory=output_directory, + debug=debug, + fail_on_unknown_build_command=fail_on_unknown_build_command, + write_output_on_error=write_output_on_error, + created=created, + spdxId_prefix=spdxId_prefix, + build_type=build_type, + build_id=build_id, + package_license=package_license, + package_version=package_version, + package_copyright_text=package_copyright_text, + prettify_json=prettify_json, + ) + + +def _validate_path_arguments( + parser: argparse.ArgumentParser, + src_tree: PathStr, + obj_tree: PathStr, + root_paths: list[PathStr], +) -> None: + """ + Validate that the provided paths exist. + + Args: + parser: The argument parser, used to emit well-formatted error messages. + src_tree: Absolute path to the source tree. + obj_tree: Absolute path to the object tree. + root_paths: List of root paths relative to obj_tree. + """ + if not os.path.exists(src_tree): + parser.error(f"--src-tree {src_tree} does not exist") + if not os.path.exists(obj_tree): + parser.error(f"--obj-tree {obj_tree} does not exist") + for root_path in root_paths: + if not os.path.isfile(root_path_absolute := os.path.join(obj_tree, root_path)): + parser.error(f"path to root artifact {root_path_absolute} is not a file") diff --git a/scripts/sbom/sbom/environment.py b/scripts/sbom/sbom/environment.py new file mode 100644 index 0000000000000..4304066fe974e --- /dev/null +++ b/scripts/sbom/sbom/environment.py @@ -0,0 +1,192 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +import os + +KERNEL_BUILD_VARIABLES_ALLOWLIST = [ + "AFLAGS_KERNEL", + "AFLAGS_MODULE", + "AR", + "ARCH", + "ARCH_CORE", + "ARCH_DRIVERS", + "ARCH_LIB", + "AWK", + "BASH", + "BINDGEN", + "BITS", + "CC", + "CC_FLAGS_FPU", + "CC_FLAGS_NO_FPU", + "CFLAGS_GCOV", + "CFLAGS_KERNEL", + "CFLAGS_MODULE", + "CHECK", + "CHECKFLAGS", + "CLIPPY_CONF_DIR", + "CONFIG_SHELL", + "CPP", + "CROSS_COMPILE", + "CURDIR", + "GNUMAKEFLAGS", + "HOSTCC", + "HOSTCXX", + "HOSTPKG_CONFIG", + "HOSTRUSTC", + "INSTALLKERNEL", + "INSTALL_DTBS_PATH", + "INSTALL_HDR_PATH", + "INSTALL_PATH", + "KBUILD_AFLAGS", + "KBUILD_AFLAGS_KERNEL", + "KBUILD_AFLAGS_MODULE", + "KBUILD_BUILTIN", + "KBUILD_CFLAGS", + "KBUILD_CFLAGS_KERNEL", + "KBUILD_CFLAGS_MODULE", + "KBUILD_CHECKSRC", + "KBUILD_CLIPPY", + "KBUILD_CPPFLAGS", + "KBUILD_EXTMOD", + "KBUILD_EXTRA_WARN", + "KBUILD_HOSTCFLAGS", + "KBUILD_HOSTCXXFLAGS", + "KBUILD_HOSTLDFLAGS", + "KBUILD_HOSTLDLIBS", + "KBUILD_HOSTRUSTFLAGS", + "KBUILD_IMAGE", + "KBUILD_LDFLAGS", + "KBUILD_LDFLAGS_MODULE", + "KBUILD_LDS", + "KBUILD_MODULES", + "KBUILD_PROCMACROLDFLAGS", + "KBUILD_RUSTFLAGS", + "KBUILD_RUSTFLAGS_KERNEL", + "KBUILD_RUSTFLAGS_MODULE", + "KBUILD_USERCFLAGS", + "KBUILD_USERLDFLAGS", + "KBUILD_VERBOSE", + "KBUILD_VMLINUX_LIBS", + "KBZIP2", + "KCONFIG_CONFIG", + "KERNELDOC", + "KERNELRELEASE", + "KERNELVERSION", + "KGZIP", + "KLZOP", + "LC_COLLATE", + "LC_NUMERIC", + "LD", + "LDFLAGS_MODULE", + "LEX", + "LINUXINCLUDE", + "LZ4", + "LZMA", + "MAKE", + "MAKEFILES", + "MAKEFILE_LIST", + "MAKEFLAGS", + "MAKELEVEL", + "MAKEOVERRIDES", + "MAKE_COMMAND", + "MAKE_HOST", + "MAKE_TERMERR", + "MAKE_TERMOUT", + "MAKE_VERSION", + "MFLAGS", + "MODLIB", + "NM", + "NOSTDINC_FLAGS", + "O", + "OBJCOPY", + "OBJCOPYFLAGS", + "OBJDUMP", + "PAHOLE", + "PATCHLEVEL", + "PERL", + "PYTHON3", + "Q", + "RCS_FIND_IGNORE", + "READELF", + "REALMODE_CFLAGS", + "RESOLVE_BTFIDS", + "RETHUNK_CFLAGS", + "RETHUNK_RUSTFLAGS", + "RETPOLINE_CFLAGS", + "RETPOLINE_RUSTFLAGS", + "RETPOLINE_VDSO_CFLAGS", + "RUSTC", + "RUSTC_BOOTSTRAP", + "RUSTC_OR_CLIPPY", + "RUSTC_OR_CLIPPY_QUIET", + "RUSTDOC", + "RUSTFLAGS_KERNEL", + "RUSTFLAGS_MODULE", + "RUSTFMT", + "SRCARCH", + "STRIP", + "SUBLEVEL", + "SUFFIXES", + "TAR", + "UTS_MACHINE", + "VERSION", + "VPATH", + "XZ", + "YACC", + "ZSTD", + "building_out_of_srctree", + "cross_compiling", + "objtree", + "quiet", + "rust_common_flags", + "srcroot", + "srctree", + "sub_make_done", + "subdir", +] + + +class Environment: + """ + Read-only accessor for kernel build environment variables. + """ + + @classmethod + def KERNEL_BUILD_VARIABLES(cls) -> dict[str, str]: + return { + name: value.strip() + for name in KERNEL_BUILD_VARIABLES_ALLOWLIST + if (value := os.getenv(name)) is not None and value.strip() + } + + @classmethod + def ARCH(cls) -> str | None: + return os.getenv("ARCH") + + @classmethod + def SRCARCH(cls) -> str | None: + return os.getenv("SRCARCH") + + @classmethod + def CC(cls) -> str | None: + return os.getenv("CC") + + @classmethod + def LD(cls) -> str | None: + return os.getenv("LD") + + @classmethod + def AR(cls) -> str | None: + return os.getenv("AR") + + @classmethod + def NM(cls) -> str | None: + return os.getenv("NM") + + @classmethod + def OBJCOPY(cls) -> str | None: + return os.getenv("OBJCOPY") + + @classmethod + def STRIP(cls) -> str | None: + return os.getenv("STRIP") diff --git a/scripts/sbom/sbom/path_utils.py b/scripts/sbom/sbom/path_utils.py new file mode 100644 index 0000000000000..29820046dc884 --- /dev/null +++ b/scripts/sbom/sbom/path_utils.py @@ -0,0 +1,22 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +import os +from functools import lru_cache + +PathStr = str +"""Filesystem path represented as a plain string for better performance than pathlib.Path.""" + + +def is_relative_to(path: PathStr, base: PathStr) -> bool: + return os.path.commonpath([path, base]) == base + +@lru_cache(maxsize=None) +def has_link(path: PathStr) -> bool: + """Returns True if path or any of its ancestor directories is a symlink. Results are cached to avoid duplicate lstat syscalls.""" + if os.path.islink(path): + return True + parent = os.path.dirname(path) + if parent == path: + return False + return has_link(parent) diff --git a/scripts/sbom/sbom/sbom_logging.py b/scripts/sbom/sbom/sbom_logging.py new file mode 100644 index 0000000000000..fbc53cc77ef44 --- /dev/null +++ b/scripts/sbom/sbom/sbom_logging.py @@ -0,0 +1,94 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +import logging +import inspect +from typing import Literal + + +MessageTemplate = str + + +class MessageLogger: + """Logger that suppresses repeated messages and stores a summary of all logged messages.""" + + _messages: dict[MessageTemplate, list[str]] + _message_counts: dict[MessageTemplate, int] + _repeated_logs_limit: int + """Maximum number of repeated messages of the same type to log before suppressing further output.""" + + def __init__(self, level: Literal["error", "warning"], repeated_logs_limit: int = 3) -> None: + self._level = level + self._messages = {} + self._message_counts = {} + self._repeated_logs_limit = repeated_logs_limit + + def log(self, template: MessageTemplate, /, **kwargs: str) -> None: + """Log a message based on a template and optional variables. Example: `log("Missing {path}", path=str(p))`.""" + message = template + for key, value in kwargs.items(): + message = message.replace("{" + key + "}", value) + if template not in self._messages: + self._messages[template] = [] + self._message_counts[template] = 0 + self._message_counts[template] += 1 + if self._message_counts[template] <= self._repeated_logs_limit: + if self._level == "error": + logging.error(message) + elif self._level == "warning": + logging.warning(message) + self._messages[template].append(message) + + def get_summary(self) -> str: + if len(self._messages) == 0: + return "" + summary: list[str] = [f"Summarize {self._level}s:"] + for template, messages in self._messages.items(): + for message in messages: + summary.append(message) + n_suppressed_messages = self._message_counts[template] - self._repeated_logs_limit + if n_suppressed_messages > 0: + instances = "instance" if n_suppressed_messages == 1 else "instances" + summary.append(f"... (Found {n_suppressed_messages} more {instances} of this {self._level})") + return "\n".join(summary) + + def has_messages(self) -> bool: + return len(self._message_counts) > 0 + + +_warning_logger: MessageLogger +_error_logger: MessageLogger + + +def warning(msg_template: MessageTemplate, /, **kwargs: str) -> None: + _warning_logger.log(msg_template, **kwargs) + + +def error(msg_template: MessageTemplate, /, **kwargs: str) -> None: + frame = inspect.currentframe() + caller_frame = frame.f_back if frame else None + info = inspect.getframeinfo(caller_frame) if caller_frame else None + if info: + msg_template = f'File "{info.filename}", line {info.lineno}, in {info.function}\n{msg_template}' + _error_logger.log(msg_template, **kwargs) + + +def summarize_warnings() -> str: + return _warning_logger.get_summary() + + +def summarize_errors() -> str: + return _error_logger.get_summary() + + +def has_errors() -> bool: + return _error_logger.has_messages() + + +def init() -> None: + global _warning_logger, _error_logger + _warning_logger = MessageLogger("warning") + _error_logger = MessageLogger("error") + + +init() diff --git a/scripts/sbom/sbom/spdx/__init__.py b/scripts/sbom/sbom/spdx/__init__.py new file mode 100644 index 0000000000000..4097b59f8f172 --- /dev/null +++ b/scripts/sbom/sbom/spdx/__init__.py @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from .spdxId import SpdxId, SpdxIdGenerator +from .serialization import JsonLdSpdxDocument + +__all__ = ["JsonLdSpdxDocument", "SpdxId", "SpdxIdGenerator"] diff --git a/scripts/sbom/sbom/spdx/build.py b/scripts/sbom/sbom/spdx/build.py new file mode 100644 index 0000000000000..a39ec9c09b16f --- /dev/null +++ b/scripts/sbom/sbom/spdx/build.py @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from dataclasses import dataclass, field +from sbom.spdx.core import DictionaryEntry, Element, Hash + + +@dataclass(kw_only=True) +class Build(Element): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Build/Classes/Build/""" + + type: str = field(init=False, default="build_Build") + build_buildType: str + build_buildId: str + build_environment: list[DictionaryEntry] = field(default_factory=list) + build_configSourceUri: list[str] = field(default_factory=list) + build_configSourceDigest: list[Hash] = field(default_factory=list) diff --git a/scripts/sbom/sbom/spdx/core.py b/scripts/sbom/sbom/spdx/core.py new file mode 100644 index 0000000000000..7eb376a1cd883 --- /dev/null +++ b/scripts/sbom/sbom/spdx/core.py @@ -0,0 +1,170 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from dataclasses import dataclass, field + +from typing import Any, Literal +from sbom.spdx.spdxId import SpdxId + +SPDX_SPEC_VERSION = "3.0.1" + +ExternalIdentifierType = Literal["email", "gitoid", "urlScheme"] +HashAlgorithm = Literal["sha256", "sha512"] +ProfileIdentifierType = Literal["core", "software", "build", "lite", "simpleLicensing"] +RelationshipType = Literal[ + "contains", + "generates", + "hasDeclaredLicense", + "hasInput", + "hasOutput", + "ancestorOf", + "hasDistributionArtifact", + "dependsOn", +] +RelationshipCompleteness = Literal["complete", "incomplete", "noAssertion"] + + +@dataclass +class SpdxObject: + def to_dict(self) -> dict[str, Any]: + def _to_dict(v: Any): + return v.to_dict() if hasattr(v, "to_dict") else v + + d: dict[str, Any] = {} + for field_name in self.__dataclass_fields__: + value = getattr(self, field_name) + if value is None or value == [] or value == "": + continue + + if isinstance(value, Element): + d[field_name] = value.spdxId + elif isinstance(value, list) and len(value) > 0 and isinstance(value[0], Element): # type: ignore + value: list[Element] = value + d[field_name] = [v.spdxId for v in value] + else: + d[field_name] = [_to_dict(v) for v in value] if isinstance(value, list) else _to_dict(value) # type: ignore + return d + + +@dataclass(kw_only=True) +class IntegrityMethod(SpdxObject): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/IntegrityMethod/""" + + +@dataclass(kw_only=True) +class Hash(IntegrityMethod): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/Hash/""" + + type: str = field(init=False, default="Hash") + hashValue: str + algorithm: HashAlgorithm + + +@dataclass(kw_only=True) +class Element(SpdxObject): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/Element/""" + + type: str = field(init=False, default="Element") + spdxId: SpdxId + creationInfo: str = "_:creationinfo" + name: str | None = None + verifiedUsing: list[Hash] = field(default_factory=list) + comment: str | None = None + + +@dataclass(kw_only=True) +class ExternalMap(SpdxObject): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/ExternalMap/""" + + type: str = field(init=False, default="ExternalMap") + externalSpdxId: SpdxId + + +@dataclass(kw_only=True) +class NamespaceMap(SpdxObject): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/NamespaceMap/""" + + type: str = field(init=False, default="NamespaceMap") + prefix: str + namespace: str + + +@dataclass(kw_only=True) +class ElementCollection(Element): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/ElementCollection/""" + + type: str = field(init=False, default="ElementCollection") + element: list[Element] = field(default_factory=list) + rootElement: list[Element] = field(default_factory=list) + profileConformance: list[ProfileIdentifierType] = field(default_factory=list) + + +@dataclass(kw_only=True) +class SpdxDocument(ElementCollection): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/SpdxDocument/""" + + type: str = field(init=False, default="SpdxDocument") + import_: list[ExternalMap] = field(default_factory=list) + namespaceMap: list[NamespaceMap] = field(default_factory=list) + + def to_dict(self) -> dict[str, Any]: + return {("import" if k == "import_" else k): v for k, v in super().to_dict().items()} + + +@dataclass(kw_only=True) +class Agent(Element): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/Agent/""" + + type: str = field(init=False, default="Agent") + + +@dataclass(kw_only=True) +class SoftwareAgent(Agent): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/SoftwareAgent/""" + + type: str = field(init=False, default="SoftwareAgent") + + +@dataclass(kw_only=True) +class CreationInfo(SpdxObject): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/CreationInfo/""" + + type: str = field(init=False, default="CreationInfo") + id: SpdxId = "_:creationinfo" + specVersion: str = SPDX_SPEC_VERSION + createdBy: list[Agent] + created: str + comment: str | None = None + + def to_dict(self) -> dict[str, Any]: + return {("@id" if k == "id" else k): v for k, v in super().to_dict().items()} + + +@dataclass(kw_only=True) +class Relationship(Element): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/Relationship/""" + + type: str = field(init=False, default="Relationship") + relationshipType: RelationshipType + from_: Element # underscore because 'from' is a reserved keyword + to: list[Element] + completeness: RelationshipCompleteness | None = None + + def to_dict(self) -> dict[str, Any]: + return {("from" if k == "from_" else k): v for k, v in super().to_dict().items()} + + +@dataclass(kw_only=True) +class Artifact(Element): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/Artifact/""" + + type: str = field(init=False, default="Artifact") + + +@dataclass(kw_only=True) +class DictionaryEntry(SpdxObject): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Core/Classes/DictionaryEntry/""" + + type: str = field(init=False, default="DictionaryEntry") + key: str + value: str diff --git a/scripts/sbom/sbom/spdx/serialization.py b/scripts/sbom/sbom/spdx/serialization.py new file mode 100644 index 0000000000000..b4df7d368d467 --- /dev/null +++ b/scripts/sbom/sbom/spdx/serialization.py @@ -0,0 +1,62 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +import json +from typing import Any +from sbom.path_utils import PathStr +from sbom.spdx.core import SPDX_SPEC_VERSION, SpdxDocument, SpdxObject + + +class JsonLdSpdxDocument: + """Represents an SPDX document in JSON-LD format for serialization.""" + + graph: list[SpdxObject] + + def __init__(self, graph: list[SpdxObject]) -> None: + """ + Initialize a JSON-LD SPDX document from a graph of SPDX objects. + The graph must contain a single SpdxDocument element. + + Args: + graph: List of SPDX objects representing the complete SPDX document. + """ + self.graph = graph + + @property + def context(self) -> list[str | dict[str, str]]: + spdx_document = next(element for element in self.graph if isinstance(element, SpdxDocument)) + return [ + f"https://spdx.org/rdf/{SPDX_SPEC_VERSION}/spdx-context.jsonld", + {ns.prefix: ns.namespace for ns in spdx_document.namespaceMap}, + ] + + def to_dict(self) -> dict[str, Any]: + """ + Convert the SPDX document to a dictionary representation suitable for JSON serialization. + + Returns: + Dictionary with @context and @graph keys following JSON-LD format. + """ + def _item_to_dict(item: SpdxObject) -> dict: + d = item.to_dict() + if isinstance(item, SpdxDocument): + d.pop("namespaceMap", None) + return d + return { + "@context": self.context, + "@graph": [_item_to_dict(item) for item in self.graph], + } + + def save(self, path: PathStr, prettify: bool) -> None: + """ + Save the SPDX document to a JSON file. + + Args: + path: File path where the document will be saved. + prettify: Whether to pretty-print the JSON with indentation. + """ + with open(path, "w", encoding="utf-8") as f: + if prettify: + json.dump(self.to_dict(), f, indent=2) + else: + json.dump(self.to_dict(), f, separators=(",", ":")) diff --git a/scripts/sbom/sbom/spdx/simplelicensing.py b/scripts/sbom/sbom/spdx/simplelicensing.py new file mode 100644 index 0000000000000..750ddd24ad895 --- /dev/null +++ b/scripts/sbom/sbom/spdx/simplelicensing.py @@ -0,0 +1,20 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from dataclasses import dataclass, field +from sbom.spdx.core import Element + + +@dataclass(kw_only=True) +class AnyLicenseInfo(Element): + """https://spdx.github.io/spdx-spec/v3.0.1/model/SimpleLicensing/Classes/AnyLicenseInfo/""" + + type: str = field(init=False, default="simplelicensing_AnyLicenseInfo") + + +@dataclass(kw_only=True) +class LicenseExpression(AnyLicenseInfo): + """https://spdx.github.io/spdx-spec/v3.0.1/model/SimpleLicensing/Classes/LicenseExpression/""" + + type: str = field(init=False, default="simplelicensing_LicenseExpression") + simplelicensing_licenseExpression: str diff --git a/scripts/sbom/sbom/spdx/software.py b/scripts/sbom/sbom/spdx/software.py new file mode 100644 index 0000000000000..2f46de7c31679 --- /dev/null +++ b/scripts/sbom/sbom/spdx/software.py @@ -0,0 +1,69 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from dataclasses import dataclass, field +from typing import Literal +from sbom.spdx.core import Artifact, ElementCollection, IntegrityMethod + + +SbomType = Literal["source", "build"] +FileKindType = Literal["file", "directory"] +SoftwarePurpose = Literal[ + "source", + "archive", + "library", + "file", + "data", + "configuration", + "executable", + "module", + "application", + "documentation", + "other", +] +ContentIdentifierType = Literal["gitoid", "swhid"] + + +@dataclass(kw_only=True) +class Sbom(ElementCollection): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Software/Classes/Sbom/""" + + type: str = field(init=False, default="software_Sbom") + software_sbomType: list[SbomType] = field(default_factory=list) + + +@dataclass(kw_only=True) +class ContentIdentifier(IntegrityMethod): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Software/Classes/ContentIdentifier/""" + + type: str = field(init=False, default="software_ContentIdentifier") + software_contentIdentifierType: ContentIdentifierType + software_contentIdentifierValue: str + + +@dataclass(kw_only=True) +class SoftwareArtifact(Artifact): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Software/Classes/SoftwareArtifact/""" + + type: str = field(init=False, default="software_Artifact") + software_primaryPurpose: SoftwarePurpose | None = None + software_copyrightText: str | None = None + software_contentIdentifier: list[ContentIdentifier] = field(default_factory=list) + + +@dataclass(kw_only=True) +class Package(SoftwareArtifact): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Software/Classes/Package/""" + + type: str = field(init=False, default="software_Package") + name: str # type: ignore + software_packageVersion: str | None = None + + +@dataclass(kw_only=True) +class File(SoftwareArtifact): + """https://spdx.github.io/spdx-spec/v3.0.1/model/Software/Classes/File/""" + + type: str = field(init=False, default="software_File") + name: str # type: ignore + software_fileKind: FileKindType | None = None diff --git a/scripts/sbom/sbom/spdx/spdxId.py b/scripts/sbom/sbom/spdx/spdxId.py new file mode 100644 index 0000000000000..589e85c5f7064 --- /dev/null +++ b/scripts/sbom/sbom/spdx/spdxId.py @@ -0,0 +1,36 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from itertools import count +from typing import Iterator + +SpdxId = str + + +class SpdxIdGenerator: + _namespace: str + _prefix: str | None = None + _counter: Iterator[int] + + def __init__(self, namespace: str, prefix: str | None = None) -> None: + """ + Initialize the SPDX ID generator with a namespace. + + Args: + namespace: The full namespace to use for generated IDs. + prefix: Optional. If provided, generated IDs will use this prefix instead of the full namespace. + """ + self._namespace = namespace + self._prefix = prefix + self._counter = count(0) + + def generate(self) -> SpdxId: + return f"{f'{self._prefix}:' if self._prefix else self._namespace}{next(self._counter)}" + + @property + def prefix(self) -> str | None: + return self._prefix + + @property + def namespace(self) -> str: + return self._namespace diff --git a/scripts/sbom/sbom/spdx_graph/__init__.py b/scripts/sbom/sbom/spdx_graph/__init__.py new file mode 100644 index 0000000000000..3557b1d51bf93 --- /dev/null +++ b/scripts/sbom/sbom/spdx_graph/__init__.py @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from .build_spdx_graphs import build_spdx_graphs +from .spdx_graph_model import SpdxIdGeneratorCollection + +__all__ = ["build_spdx_graphs", "SpdxIdGeneratorCollection"] diff --git a/scripts/sbom/sbom/spdx_graph/build_spdx_graphs.py b/scripts/sbom/sbom/spdx_graph/build_spdx_graphs.py new file mode 100644 index 0000000000000..ee24e9eaf603c --- /dev/null +++ b/scripts/sbom/sbom/spdx_graph/build_spdx_graphs.py @@ -0,0 +1,83 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from datetime import datetime +from typing import Protocol + +import logging +from sbom.config import KernelSpdxDocumentKind +from sbom.cmd_graph import CmdGraph +from sbom.path_utils import PathStr +from sbom.spdx_graph.kernel_file import KernelFileCollection +from sbom.spdx_graph.spdx_graph_model import SpdxGraph, SpdxIdGeneratorCollection +from sbom.spdx_graph.shared_spdx_elements import SharedSpdxElements +from sbom.spdx_graph.spdx_source_graph import SpdxSourceGraph +from sbom.spdx_graph.spdx_build_graph import SpdxBuildGraph +from sbom.spdx_graph.spdx_output_graph import SpdxOutputGraph + + +class SpdxGraphConfig(Protocol): + obj_tree: PathStr + src_tree: PathStr + created: datetime + build_type: str + build_id: str | None + package_license: str + package_version: str | None + package_copyright_text: str | None + + +def build_spdx_graphs( + cmd_graph: CmdGraph, + spdx_id_generators: SpdxIdGeneratorCollection, + config: SpdxGraphConfig, +) -> dict[KernelSpdxDocumentKind, SpdxGraph]: + """ + Builds SPDX graphs (output, source, and build) based on a cmd dependency graph. + If the source and object trees are identical, no dedicated source graph can be created. + In that case the source files are added to the build graph instead. + + Args: + cmd_graph: The dependency graph of a kernel build. + spdx_id_generators: Collection of SPDX ID generators. + config: Configuration options. + + Returns: + Dictionary of SPDX graphs + """ + shared_elements = SharedSpdxElements.create(spdx_id_generators.base, config.created) + kernel_files = KernelFileCollection.create(cmd_graph, config.obj_tree, config.src_tree, spdx_id_generators) + output_graph = SpdxOutputGraph.create( + root_files=list(kernel_files.output.values()), + shared_elements=shared_elements, + spdx_id_generators=spdx_id_generators, + config=config, + ) + spdx_graphs: dict[KernelSpdxDocumentKind, SpdxGraph] = { + KernelSpdxDocumentKind.OUTPUT: output_graph, + } + + if len(kernel_files.source) > 0: + spdx_graphs[KernelSpdxDocumentKind.SOURCE] = SpdxSourceGraph.create( + source_files=list(kernel_files.source.values()), + external_files=list(kernel_files.external.values()), + shared_elements=shared_elements, + spdx_id_generators=spdx_id_generators, + ) + else: + logging.info( + "Skipped creating a dedicated source SBOM because source files cannot be " + "reliably classified when the source and object trees are identical. " + "Added source files to the build SBOM instead." + ) + + build_graph = SpdxBuildGraph.create( + cmd_graph, + kernel_files, + shared_elements, + output_graph.high_level_build_element, + spdx_id_generators, + ) + spdx_graphs[KernelSpdxDocumentKind.BUILD] = build_graph + + return spdx_graphs diff --git a/scripts/sbom/sbom/spdx_graph/kernel_file.py b/scripts/sbom/sbom/spdx_graph/kernel_file.py new file mode 100644 index 0000000000000..505f25f66ebba --- /dev/null +++ b/scripts/sbom/sbom/spdx_graph/kernel_file.py @@ -0,0 +1,315 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from dataclasses import dataclass +from enum import Enum +import hashlib +import os +import re +from sbom.cmd_graph import CmdGraph +from sbom.path_utils import PathStr, is_relative_to +from sbom.spdx import SpdxId, SpdxIdGenerator +from sbom.spdx.core import Hash +from sbom.spdx.software import ContentIdentifier, File, SoftwarePurpose +import sbom.sbom_logging as sbom_logging +from sbom.spdx_graph.spdx_graph_model import SpdxIdGeneratorCollection + + +class KernelFileLocation(Enum): + """Represents the location of a file relative to the source/object trees.""" + + SOURCE_TREE = "source_tree" + """File is located in the source tree.""" + OBJ_TREE = "obj_tree" + """File is located in the object tree.""" + EXTERNAL = "external" + """File is located outside both source and object trees.""" + BOTH = "both" + """File is located in a folder that is both source and object tree.""" + + +@dataclass +class KernelFile: + """kernel-specific metadata used to generate an SPDX File element.""" + + absolute_path: PathStr + """Absolute path of the file.""" + file_location: KernelFileLocation + """Location of the file relative to the source/object trees.""" + name: str + """Name of the file element. Should be relative to the source tree if + file_location equals SOURCE_TREE and relative to the object tree if + file_location equals OBJ_TREE. If file_location equals EXTERNAL, the + absolute path is used.""" + license_identifier: str | None + """SPDX license ID if file_location equals SOURCE_TREE or BOTH; otherwise None.""" + spdx_id_generator: SpdxIdGenerator + """Generator for the SPDX ID of the file element.""" + + _spdx_file_element: File | None = None + + @classmethod + def create( + cls, + absolute_path: PathStr, + obj_tree: PathStr, + src_tree: PathStr, + spdx_id_generators: SpdxIdGeneratorCollection, + is_output: bool, + ) -> "KernelFile": + is_in_obj_tree = is_relative_to(absolute_path, obj_tree) + is_in_src_tree = is_relative_to(absolute_path, src_tree) + + # file element name should be relative to output or src tree if possible + if not is_in_src_tree and not is_in_obj_tree: + file_element_name = str(absolute_path) + file_location = KernelFileLocation.EXTERNAL + spdx_id_generator = spdx_id_generators.source if src_tree != obj_tree else spdx_id_generators.build + elif is_in_src_tree and src_tree == obj_tree: + file_element_name = os.path.relpath(absolute_path, obj_tree) + file_location = KernelFileLocation.BOTH + spdx_id_generator = spdx_id_generators.output if is_output else spdx_id_generators.build + elif is_in_obj_tree: + file_element_name = os.path.relpath(absolute_path, obj_tree) + file_location = KernelFileLocation.OBJ_TREE + spdx_id_generator = spdx_id_generators.output if is_output else spdx_id_generators.build + else: + file_element_name = os.path.relpath(absolute_path, src_tree) + file_location = KernelFileLocation.SOURCE_TREE + spdx_id_generator = spdx_id_generators.source + + # parse spdx license identifier + license_identifier = ( + _parse_spdx_license_identifier(absolute_path) + if file_location == KernelFileLocation.SOURCE_TREE or file_location == KernelFileLocation.BOTH + else None + ) + + return KernelFile( + absolute_path, + file_location, + file_element_name, + license_identifier, + spdx_id_generator, + ) + + @property + def spdx_file_element(self) -> File: + if self._spdx_file_element is None: + self._spdx_file_element = _build_file_element( + self.absolute_path, + self.name, + self.spdx_id_generator.generate(), + self.file_location, + ) + return self._spdx_file_element + + +@dataclass +class KernelFileCollection: + """Collection of kernel files.""" + + source: dict[PathStr, KernelFile] + build: dict[PathStr, KernelFile] + output: dict[PathStr, KernelFile] + external: dict[PathStr, KernelFile] + + @classmethod + def create( + cls, + cmd_graph: CmdGraph, + obj_tree: PathStr, + src_tree: PathStr, + spdx_id_generators: SpdxIdGeneratorCollection, + ) -> "KernelFileCollection": + source: dict[PathStr, KernelFile] = {} + build: dict[PathStr, KernelFile] = {} + output: dict[PathStr, KernelFile] = {} + external: dict[PathStr, KernelFile] = {} + root_node_paths = {node.absolute_path for node in cmd_graph.roots} + for node in cmd_graph: + is_root = node.absolute_path in root_node_paths + kernel_file = KernelFile.create( + node.absolute_path, + obj_tree, + src_tree, + spdx_id_generators, + is_root, + ) + if is_root: + output[kernel_file.absolute_path] = kernel_file + elif kernel_file.file_location == KernelFileLocation.SOURCE_TREE: + source[kernel_file.absolute_path] = kernel_file + elif kernel_file.file_location == KernelFileLocation.EXTERNAL: + external[kernel_file.absolute_path] = kernel_file + else: + build[kernel_file.absolute_path] = kernel_file + + return KernelFileCollection(source, build, output, external) + + def to_dict(self) -> dict[PathStr, KernelFile]: + return {**self.source, **self.build, **self.output, **self.external} + + +def _build_file_element(absolute_path: PathStr, name: str, spdx_id: SpdxId, file_location: KernelFileLocation) -> File: + verifiedUsing: list[Hash] = [] + content_identifier: list[ContentIdentifier] = [] + if os.path.isfile(absolute_path): + verifiedUsing = [Hash(algorithm="sha256", hashValue=_sha256(absolute_path))] + content_identifier = [ + ContentIdentifier( + software_contentIdentifierType="gitoid", + software_contentIdentifierValue=_git_blob_oid(absolute_path), + ) + ] + elif file_location == KernelFileLocation.EXTERNAL: + sbom_logging.warning( + "Cannot compute hash for {absolute_path} because file does not exist.", + absolute_path=absolute_path, + ) + else: + sbom_logging.error( + "Cannot compute hash for {absolute_path} because file does not exist.", + absolute_path=absolute_path, + ) + + # primary purpose + primary_purpose = _get_primary_purpose(absolute_path) + + return File( + spdxId=spdx_id, + name=name, + verifiedUsing=verifiedUsing, + software_primaryPurpose=primary_purpose, + software_contentIdentifier=content_identifier, + ) + + +def _sha256(file_path: PathStr, chunk_size: int = 1 << 20) -> str: + """Compute the SHA-256 hex digest of a file, reading it in chunks of chunk_size bytes.""" + h = hashlib.sha256() + with open(file_path, "rb") as f: + for chunk in iter(lambda: f.read(chunk_size), b""): + h.update(chunk) + return h.hexdigest() + + +def _git_blob_oid(file_path: str, chunk_size: int = 1 << 20) -> str: + """Compute the Git blob object ID (SHA-1 hex) for a file, like `git hash-object`, reading it in chunks of chunk_size bytes.""" + h = hashlib.sha1() + h.update(f"blob {os.path.getsize(file_path)}\0".encode()) + with open(file_path, "rb") as f: + for chunk in iter(lambda: f.read(chunk_size), b""): + h.update(chunk) + return h.hexdigest() + + +# REUSE-IgnoreStart +SPDX_LICENSE_IDENTIFIER_PATTERN = re.compile( + r"SPDX-License-Identifier:" # literal tag + r"\s*" # optional whitespace after colon + r"(?P<id>.*?)" # license expression (non-greedy, stops before terminator) + r"(?:\s*" # optional whitespace before terminator (not captured) + r"(-->|\*/|$))", # terminator: XML "-->", C-style "*/", or end of line + re.MULTILINE, # match end of each line, not just end of string +) +# REUSE-IgnoreEnd + + +def _parse_spdx_license_identifier(absolute_path: str, max_bytes: int = 512) -> str | None: + """ + Extracts the SPDX-License-Identifier from the beginning of a source file. + + Args: + absolute_path: Path to the source file. + max_bytes: Maximum number of bytes to scan for the license identifier. + + Returns: + The license identifier string (e.g., 'GPL-2.0-only') if found, otherwise None. + """ + try: + with open(absolute_path, "r", encoding="utf-8") as f: + match = SPDX_LICENSE_IDENTIFIER_PATTERN.search(f.read(max_bytes)) + if match: + return match.group("id") + except (UnicodeDecodeError, OSError): + return None + return None + + +def _get_primary_purpose(absolute_path: PathStr) -> SoftwarePurpose | None: + def ends_with(suffixes: list[str]) -> bool: + return any(absolute_path.endswith(suffix) for suffix in suffixes) + + def includes_path_segments(path_segments: list[str]) -> bool: + return any(segment in absolute_path for segment in path_segments) + + # Source code + if ends_with([".c", ".h", ".S", ".s", ".rs", ".pl", "gen_smb1_mapping", "gen_smb2_mapping"]): + return "source" + + # Libraries + if ends_with([".a", ".so", ".so.raw", ".rlib"]): + return "library" + + # Archives + if ends_with([".xz", ".cpio", ".gz", ".tar", ".zip", "piggy_data"]): + return "archive" + + # Applications + if ends_with(["bzImage", "Image", ".efi"]): + return "application" + + # Executables / machine code + if ends_with([".bin", ".elf", "vmlinux", "vmlinux.unstripped", "vmlinuz", "bpfilter_umh"]): + return "executable" + + # Kernel modules + if ends_with([".ko"]): + return "module" + + # Data files + if ends_with( + [ + ".tbl", + ".relocs", + ".rmeta", + ".in", + ".dbg", + ".x509", + ".pbm", + ".ppm", + ".dtb", + ".uc", + ".inc", + ".dts", + ".dtsi", + ".dtbo", + ".xml", + ".ro", + "initramfs_inc_data", + "default_cpio_list", + "x509_certificate_list", + "utf8data.c_shipped", + "blacklist_hash_list", + "x509_revocation_list", + "cpucaps", + "sysreg", + "mach-types", + ] + ) or includes_path_segments(["drivers/gpu/drm/radeon/reg_srcs/"]): + return "data" + + # Configuration files + if ends_with([".pem", ".key", ".conf", ".config", ".cfg", ".bconf"]): + return "configuration" + + # Documentation + if ends_with([".md"]): + return "documentation" + + # Other / miscellaneous + if ends_with([".o", ".tmp"]): + return "other" + + sbom_logging.warning("Could not infer primary purpose for {absolute_path}", absolute_path=absolute_path) diff --git a/scripts/sbom/sbom/spdx_graph/shared_spdx_elements.py b/scripts/sbom/sbom/spdx_graph/shared_spdx_elements.py new file mode 100644 index 0000000000000..115e8778a4671 --- /dev/null +++ b/scripts/sbom/sbom/spdx_graph/shared_spdx_elements.py @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from dataclasses import dataclass +from datetime import datetime, timezone +from sbom.spdx.core import CreationInfo, SoftwareAgent +from sbom.spdx.spdxId import SpdxIdGenerator + + +@dataclass(frozen=True) +class SharedSpdxElements: + agent: SoftwareAgent + creation_info: CreationInfo + + @classmethod + def create(cls, spdx_id_generator: SpdxIdGenerator, created: datetime) -> "SharedSpdxElements": + """ + Creates shared SPDX elements used across multiple documents. + + Args: + spdx_id_generator: Generator for creating SPDX IDs. + created: SPDX 'created' property used for the creation info. + + Returns: + SharedSpdxElements with agent and creation info. + """ + agent = SoftwareAgent( + spdxId=spdx_id_generator.generate(), + name="KernelSbom", + ) + creation_info = CreationInfo(createdBy=[agent], created=created.astimezone(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")) + return SharedSpdxElements(agent=agent, creation_info=creation_info) diff --git a/scripts/sbom/sbom/spdx_graph/spdx_build_graph.py b/scripts/sbom/sbom/spdx_graph/spdx_build_graph.py new file mode 100644 index 0000000000000..4d738bc3b3e24 --- /dev/null +++ b/scripts/sbom/sbom/spdx_graph/spdx_build_graph.py @@ -0,0 +1,318 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from dataclasses import dataclass +from typing import Mapping +from sbom.cmd_graph import CmdGraph +from sbom.path_utils import PathStr +from sbom.spdx import SpdxIdGenerator +from sbom.spdx.build import Build +from sbom.spdx.core import ExternalMap, NamespaceMap, Relationship, SpdxDocument +from sbom.spdx.software import File, Sbom +from sbom.spdx_graph.kernel_file import KernelFileCollection +from sbom.spdx_graph.shared_spdx_elements import SharedSpdxElements +from sbom.spdx_graph.spdx_graph_model import SpdxGraph, SpdxIdGeneratorCollection +from sbom.spdx_graph.spdx_source_graph import source_file_license_elements + + +@dataclass +class SpdxBuildGraph(SpdxGraph): + """SPDX graph representing build dependencies connecting source files and + distributable output files""" + + @classmethod + def create( + cls, + cmd_graph: CmdGraph, + kernel_files: KernelFileCollection, + shared_elements: SharedSpdxElements, + high_level_build_element: Build, + spdx_id_generators: SpdxIdGeneratorCollection, + ) -> "SpdxBuildGraph": + if len(kernel_files.source) > 0: + return _create_spdx_build_graph( + cmd_graph, + kernel_files, + shared_elements, + high_level_build_element, + spdx_id_generators, + ) + else: + return _create_spdx_build_graph_with_mixed_sources( + cmd_graph, + kernel_files, + shared_elements, + high_level_build_element, + spdx_id_generators, + ) + + +def _create_spdx_build_graph( + cmd_graph: CmdGraph, + kernel_files: KernelFileCollection, + shared_elements: SharedSpdxElements, + high_level_build_element: Build, + spdx_id_generators: SpdxIdGeneratorCollection, +) -> SpdxBuildGraph: + """ + Creates an SPDX build graph where source and output files are referenced + from external documents. + + Args: + cmd_graph: The dependency graph of a kernel build. + kernel_files: Collection of categorized kernel files involved in the build. + shared_elements: SPDX elements shared across multiple documents. + high_level_build_element: The high-level Build element referenced by the build graph. + spdx_id_generators: Collection of generators for SPDX element IDs. + + Returns: + SpdxBuildGraph: The SPDX build graph connecting source files and distributable output files. + """ + # SpdxDocument + build_spdx_document = SpdxDocument( + spdxId=spdx_id_generators.build.generate(), + profileConformance=["core", "software", "build"], + namespaceMap=[ + NamespaceMap(prefix=generator.prefix, namespace=generator.namespace) + for generator in [ + spdx_id_generators.build, + spdx_id_generators.source, + spdx_id_generators.output, + spdx_id_generators.base, + ] + if generator.prefix is not None + ], + ) + + # Sbom + build_sbom = Sbom( + spdxId=spdx_id_generators.build.generate(), + software_sbomType=["build"], + ) + + # Src and object tree elements + obj_tree_element = File( + spdxId=spdx_id_generators.build.generate(), + name="$(obj_tree)", + software_fileKind="directory", + ) + obj_tree_contains_relationship = Relationship( + spdxId=spdx_id_generators.build.generate(), + relationshipType="contains", + from_=obj_tree_element, + to=[], + ) + + # File elements + build_file_elements = [file.spdx_file_element for file in kernel_files.build.values()] + file_relationships = _file_relationships( + cmd_graph=cmd_graph, + file_elements={key: file.spdx_file_element for key, file in kernel_files.to_dict().items()}, + high_level_build_element=high_level_build_element, + spdx_id_generator=spdx_id_generators.build, + ) + + # Update relationships + build_spdx_document.rootElement = [build_sbom] + + build_spdx_document.import_ = [ + *( + ExternalMap(externalSpdxId=file.spdx_file_element.spdxId) + for file in (*kernel_files.source.values(), *kernel_files.external.values()) + ), + ExternalMap(externalSpdxId=high_level_build_element.spdxId), + *(ExternalMap(externalSpdxId=file.spdx_file_element.spdxId) for file in kernel_files.output.values()), + ] + + build_sbom.rootElement = [obj_tree_element] + build_sbom.element = [ + obj_tree_element, + obj_tree_contains_relationship, + *build_file_elements, + *file_relationships, + ] + + obj_tree_contains_relationship.to = [ + *build_file_elements, + *(file.spdx_file_element for file in kernel_files.output.values()), + ] + + # create Spdx graphs + build_graph = SpdxBuildGraph( + build_spdx_document, + shared_elements.agent, + shared_elements.creation_info, + build_sbom, + ) + return build_graph + + +def _create_spdx_build_graph_with_mixed_sources( + cmd_graph: CmdGraph, + kernel_files: KernelFileCollection, + shared_elements: SharedSpdxElements, + high_level_build_element: Build, + spdx_id_generators: SpdxIdGeneratorCollection, +) -> SpdxBuildGraph: + """ + Creates an SPDX build graph where only output files are referenced from + an external document. Source files are included directly in the build graph. + + Args: + cmd_graph: The dependency graph of a kernel build. + kernel_files: Collection of categorized kernel files involved in the build. + shared_elements: SPDX elements shared across multiple documents. + high_level_build_element: The high-level Build element referenced by the build graph. + spdx_id_generators: Collection of generators for SPDX element IDs. + + Returns: + SpdxBuildGraph: The SPDX build graph connecting source files and distributable output files. + """ + # SpdxDocument + build_spdx_document = SpdxDocument( + spdxId=spdx_id_generators.build.generate(), + profileConformance=["core", "software", "build"], + namespaceMap=[ + NamespaceMap(prefix=generator.prefix, namespace=generator.namespace) + for generator in [ + spdx_id_generators.build, + spdx_id_generators.output, + spdx_id_generators.base, + ] + if generator.prefix is not None + ], + ) + + # Sbom + build_sbom = Sbom( + spdxId=spdx_id_generators.build.generate(), + software_sbomType=["build"], + ) + + # File elements + build_file_elements = [file.spdx_file_element for file in kernel_files.build.values()] + external_file_elements = [file.spdx_file_element for file in kernel_files.external.values()] + file_relationships = _file_relationships( + cmd_graph=cmd_graph, + file_elements={key: file.spdx_file_element for key, file in kernel_files.to_dict().items()}, + high_level_build_element=high_level_build_element, + spdx_id_generator=spdx_id_generators.build, + ) + + # Source file license elements + source_file_license_identifiers, source_file_license_relationships = source_file_license_elements( + list(kernel_files.build.values()), spdx_id_generators.build + ) + + # Update relationships + build_spdx_document.rootElement = [build_sbom] + root_file_elements = [file.spdx_file_element for file in kernel_files.output.values()] + build_spdx_document.import_ = [ + ExternalMap(externalSpdxId=high_level_build_element.spdxId), + *(ExternalMap(externalSpdxId=file.spdxId) for file in root_file_elements), + ] + + build_sbom.rootElement = [*root_file_elements] + build_sbom.element = [ + *build_file_elements, + *external_file_elements, + *source_file_license_identifiers, + *source_file_license_relationships, + *file_relationships, + ] + + build_graph = SpdxBuildGraph( + build_spdx_document, + shared_elements.agent, + shared_elements.creation_info, + build_sbom, + ) + return build_graph + + +def _file_relationships( + cmd_graph: CmdGraph, + file_elements: Mapping[PathStr, File], + high_level_build_element: Build, + spdx_id_generator: SpdxIdGenerator, +) -> list[Build | Relationship]: + """ + Construct SPDX Build and Relationship elements representing dependency + relationships in the cmd graph. + + Args: + cmd_graph: The dependency graph of a kernel build. + file_elements: Mapping of filesystem paths (PathStr) to their + corresponding SPDX File elements. + high_level_build_element: The SPDX Build element representing the overall build process/root. + spdx_id_generator: Generator for unique SPDX IDs. + + Returns: + list[Build | Relationship]: List of SPDX Build and Relationship elements + """ + high_level_build_ancestorOf_relationship = Relationship( + spdxId=spdx_id_generator.generate(), + relationshipType="ancestorOf", + from_=high_level_build_element, + completeness="complete", + to=[], + ) + + # Create a relationship between each node (output file) + # and its children (input files) + build_and_relationship_elements: list[Build | Relationship] = [high_level_build_ancestorOf_relationship] + for node in cmd_graph: + # .cmd file dependencies + if node.cmd_file is not None: + build_element = Build( + spdxId=spdx_id_generator.generate(), + build_buildType=high_level_build_element.build_buildType, + build_buildId=high_level_build_element.build_buildId, + comment=node.cmd_file.savedcmd, + ) + build_and_relationship_elements.append(build_element) + + if node.cmd_file_dependencies: + hasInput_relationship = Relationship( + spdxId=spdx_id_generator.generate(), + relationshipType="hasInput", + from_=build_element, + to=[file_elements[dep.absolute_path] for dep in node.cmd_file_dependencies], + ) + build_and_relationship_elements.append(hasInput_relationship) + + hasOutput_relationship = Relationship( + spdxId=spdx_id_generator.generate(), + relationshipType="hasOutput", + from_=build_element, + to=[file_elements[node.absolute_path]], + ) + build_and_relationship_elements.append(hasOutput_relationship) + + high_level_build_ancestorOf_relationship.to.append(build_element) + + # incbin dependencies + if len(node.incbin_dependencies) > 0: + incbin_dependsOn_relationship = Relationship( + spdxId=spdx_id_generator.generate(), + relationshipType="dependsOn", + comment="\n".join([incbin_dependency.full_statement for incbin_dependency in node.incbin_dependencies]), + from_=file_elements[node.absolute_path], + to=[ + file_elements[incbin_dependency.node.absolute_path] + for incbin_dependency in node.incbin_dependencies + ], + ) + build_and_relationship_elements.append(incbin_dependsOn_relationship) + + # hardcoded dependencies + if len(node.hardcoded_dependencies) > 0: + hardcoded_dependency_relationship = Relationship( + spdxId=spdx_id_generator.generate(), + relationshipType="dependsOn", + from_=file_elements[node.absolute_path], + to=[file_elements[n.absolute_path] for n in node.hardcoded_dependencies], + ) + build_and_relationship_elements.append(hardcoded_dependency_relationship) + + return build_and_relationship_elements diff --git a/scripts/sbom/sbom/spdx_graph/spdx_graph_model.py b/scripts/sbom/sbom/spdx_graph/spdx_graph_model.py new file mode 100644 index 0000000000000..682194d4362a2 --- /dev/null +++ b/scripts/sbom/sbom/spdx_graph/spdx_graph_model.py @@ -0,0 +1,36 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from dataclasses import dataclass +from sbom.spdx.core import CreationInfo, SoftwareAgent, SpdxDocument, SpdxObject +from sbom.spdx.software import Sbom +from sbom.spdx.spdxId import SpdxIdGenerator + + +@dataclass +class SpdxGraph: + """Represents the complete graph of a single SPDX document.""" + + spdx_document: SpdxDocument + agent: SoftwareAgent + creation_info: CreationInfo + sbom: Sbom + + def to_list(self) -> list[SpdxObject]: + return [ + self.spdx_document, + self.agent, + self.creation_info, + self.sbom, + *self.sbom.element, + ] + + +@dataclass +class SpdxIdGeneratorCollection: + """Holds SPDX ID generators for different document types to ensure globally unique SPDX IDs.""" + + base: SpdxIdGenerator + source: SpdxIdGenerator + build: SpdxIdGenerator + output: SpdxIdGenerator diff --git a/scripts/sbom/sbom/spdx_graph/spdx_output_graph.py b/scripts/sbom/sbom/spdx_graph/spdx_output_graph.py new file mode 100644 index 0000000000000..ff9b2c31fb04f --- /dev/null +++ b/scripts/sbom/sbom/spdx_graph/spdx_output_graph.py @@ -0,0 +1,187 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from dataclasses import dataclass +import os +from typing import Protocol +from sbom.environment import Environment +from sbom.path_utils import PathStr +from sbom.spdx.build import Build +from sbom.spdx.core import DictionaryEntry, NamespaceMap, Relationship, SpdxDocument +from sbom.spdx.simplelicensing import LicenseExpression +from sbom.spdx.software import File, Package, Sbom +from sbom.spdx.spdxId import SpdxIdGenerator +from sbom.spdx_graph.kernel_file import KernelFile +from sbom.spdx_graph.shared_spdx_elements import SharedSpdxElements +from sbom.spdx_graph.spdx_graph_model import SpdxGraph, SpdxIdGeneratorCollection + + +class SpdxOutputGraphConfig(Protocol): + obj_tree: PathStr + src_tree: PathStr + build_type: str + build_id: str | None + package_license: str + package_version: str | None + package_copyright_text: str | None + + +@dataclass +class SpdxOutputGraph(SpdxGraph): + """SPDX graph representing distributable output files""" + + high_level_build_element: Build + + @classmethod + def create( + cls, + root_files: list[KernelFile], + shared_elements: SharedSpdxElements, + spdx_id_generators: SpdxIdGeneratorCollection, + config: SpdxOutputGraphConfig, + ) -> "SpdxOutputGraph": + """ + Args: + root_files: List of distributable output files which act as roots + of the dependency graph. + shared_elements: Shared SPDX elements used across multiple documents. + spdx_id_generators: Collection of SPDX ID generators. + config: Configuration options. + + Returns: + SpdxOutputGraph: The SPDX output graph. + """ + # SpdxDocument + spdx_document = SpdxDocument( + spdxId=spdx_id_generators.output.generate(), + profileConformance=["core", "software", "build", "simpleLicensing"], + namespaceMap=[ + NamespaceMap(prefix=generator.prefix, namespace=generator.namespace) + for generator in [spdx_id_generators.output, spdx_id_generators.base] + if generator.prefix is not None + ], + ) + + # Sbom + sbom = Sbom( + spdxId=spdx_id_generators.output.generate(), + software_sbomType=["build"], + ) + + # High-level Build elements + config_source_element = KernelFile.create( + absolute_path=os.path.join(config.obj_tree, ".config"), + obj_tree=config.obj_tree, + src_tree=config.src_tree, + spdx_id_generators=spdx_id_generators, + is_output=True, + ).spdx_file_element + high_level_build_element, high_level_build_element_hasOutput_relationship = _high_level_build_elements( + config.build_type, + config.build_id, + config_source_element, + spdx_id_generators.output, + ) + + # Root file elements + root_file_elements: list[File] = [file.spdx_file_element for file in root_files] + + # Package elements + package_elements = [ + Package( + spdxId=spdx_id_generators.output.generate(), + name=_get_package_name(file.name), + software_packageVersion=config.package_version, + software_copyrightText=config.package_copyright_text, + comment=f"Architecture={arch}" if (arch := Environment.ARCH() or Environment.SRCARCH()) else None, + software_primaryPurpose=file.software_primaryPurpose, + ) + for file in root_file_elements + ] + package_hasDistributionArtifact_file_relationships = [ + Relationship( + spdxId=spdx_id_generators.output.generate(), + relationshipType="hasDistributionArtifact", + from_=package, + to=[file], + ) + for package, file in zip(package_elements, root_file_elements) + ] + package_license_expression = LicenseExpression( + spdxId=spdx_id_generators.output.generate(), + simplelicensing_licenseExpression=config.package_license, + ) + package_hasDeclaredLicense_relationships = [ + Relationship( + spdxId=spdx_id_generators.output.generate(), + relationshipType="hasDeclaredLicense", + from_=package, + to=[package_license_expression], + ) + for package in package_elements + ] + + # Update relationships + spdx_document.rootElement = [sbom] + + sbom.rootElement = [*package_elements] + sbom.element = [ + config_source_element, + high_level_build_element, + high_level_build_element_hasOutput_relationship, + *root_file_elements, + *package_elements, + *package_hasDistributionArtifact_file_relationships, + package_license_expression, + *package_hasDeclaredLicense_relationships, + ] + + high_level_build_element_hasOutput_relationship.to = [*root_file_elements] + + output_graph = SpdxOutputGraph( + spdx_document, + shared_elements.agent, + shared_elements.creation_info, + sbom, + high_level_build_element, + ) + return output_graph + + +def _get_package_name(filename: str) -> str: + """ + Generates a SPDX package name from a filename. + Kernel images (bzImage, Image) get a descriptive name, others use the basename of the file. + """ + KERNEL_FILENAMES = ["bzImage", "Image"] + basename = os.path.basename(filename) + return f"Linux Kernel ({basename})" if basename in KERNEL_FILENAMES else basename + + +def _high_level_build_elements( + build_type: str, + build_id: str | None, + config_source_element: File, + spdx_id_generator: SpdxIdGenerator, +) -> tuple[Build, Relationship]: + build_spdxId = spdx_id_generator.generate() + high_level_build_element = Build( + spdxId=build_spdxId, + build_buildType=build_type, + build_buildId=build_id if build_id is not None else build_spdxId, + build_environment=[ + DictionaryEntry(key=key, value=value) + for key, value in Environment.KERNEL_BUILD_VARIABLES().items() + if value + ], + build_configSourceUri=[config_source_element.spdxId], + build_configSourceDigest=config_source_element.verifiedUsing, + ) + + high_level_build_element_hasOutput_relationship = Relationship( + spdxId=spdx_id_generator.generate(), + relationshipType="hasOutput", + from_=high_level_build_element, + to=[], + ) + return high_level_build_element, high_level_build_element_hasOutput_relationship diff --git a/scripts/sbom/sbom/spdx_graph/spdx_source_graph.py b/scripts/sbom/sbom/spdx_graph/spdx_source_graph.py new file mode 100644 index 0000000000000..90880212dedd9 --- /dev/null +++ b/scripts/sbom/sbom/spdx_graph/spdx_source_graph.py @@ -0,0 +1,130 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +from dataclasses import dataclass +from sbom.spdx import SpdxIdGenerator +from sbom.spdx.core import Element, NamespaceMap, Relationship, SpdxDocument +from sbom.spdx.simplelicensing import LicenseExpression +from sbom.spdx.software import File, Sbom +from sbom.spdx_graph.kernel_file import KernelFile +from sbom.spdx_graph.shared_spdx_elements import SharedSpdxElements +from sbom.spdx_graph.spdx_graph_model import SpdxGraph, SpdxIdGeneratorCollection + + +@dataclass +class SpdxSourceGraph(SpdxGraph): + """SPDX graph representing source files""" + + @classmethod + def create( + cls, + source_files: list[KernelFile], + external_files: list[KernelFile], + shared_elements: SharedSpdxElements, + spdx_id_generators: SpdxIdGeneratorCollection, + ) -> "SpdxSourceGraph": + """ + Args: + source_files: List of files within the kernel source tree. + external_files: Files outside both source and object trees. + shared_elements: Shared SPDX elements used across multiple documents. + spdx_id_generators: Collection of SPDX ID generators. + + Returns: + SpdxSourceGraph: The SPDX source graph. + """ + # SpdxDocument + source_spdx_document = SpdxDocument( + spdxId=spdx_id_generators.source.generate(), + profileConformance=["core", "software", "simpleLicensing"], + namespaceMap=[ + NamespaceMap(prefix=generator.prefix, namespace=generator.namespace) + for generator in [spdx_id_generators.source, spdx_id_generators.base] + if generator.prefix is not None + ], + ) + + # Sbom + source_sbom = Sbom( + spdxId=spdx_id_generators.source.generate(), + software_sbomType=["source"], + ) + + # Src Tree Elements + src_tree_element = File( + spdxId=spdx_id_generators.source.generate(), + name="$(src_tree)", + software_fileKind="directory", + ) + src_tree_contains_relationship = Relationship( + spdxId=spdx_id_generators.source.generate(), + relationshipType="contains", + from_=src_tree_element, + to=[], + ) + + # Source file elements + source_file_elements: list[Element] = [file.spdx_file_element for file in source_files] + external_file_elements: list[Element] = [file.spdx_file_element for file in external_files] + + # Source file license elements + source_file_license_identifiers, source_file_license_relationships = source_file_license_elements( + source_files, spdx_id_generators.source + ) + + # Update relationships + source_spdx_document.rootElement = [source_sbom] + source_sbom.rootElement = [src_tree_element] + source_sbom.element = [ + src_tree_element, + src_tree_contains_relationship, + *source_file_elements, + *external_file_elements, + *source_file_license_identifiers, + *source_file_license_relationships, + ] + src_tree_contains_relationship.to = source_file_elements + + source_graph = SpdxSourceGraph( + source_spdx_document, + shared_elements.agent, + shared_elements.creation_info, + source_sbom, + ) + return source_graph + + +def source_file_license_elements( + source_files: list[KernelFile], spdx_id_generator: SpdxIdGenerator +) -> tuple[list[LicenseExpression], list[Relationship]]: + """ + Creates SPDX license expressions and links them to the given source files + via hasDeclaredLicense relationships. + + Args: + source_files: List of files within the kernel source tree. + spdx_id_generator: Generator for unique SPDX IDs. + + Returns: + Tuple of (license expressions, hasDeclaredLicense relationships). + """ + license_expressions: dict[str, LicenseExpression] = {} + for file in source_files: + if file.license_identifier is None or file.license_identifier in license_expressions: + continue + license_expressions[file.license_identifier] = LicenseExpression( + spdxId=spdx_id_generator.generate(), + simplelicensing_licenseExpression=file.license_identifier, + ) + + source_file_license_relationships = [ + Relationship( + spdxId=spdx_id_generator.generate(), + relationshipType="hasDeclaredLicense", + from_=file.spdx_file_element, + to=[license_expressions[file.license_identifier]], + ) + for file in source_files + if file.license_identifier is not None + ] + return ([*license_expressions.values()], source_file_license_relationships) diff --git a/scripts/sbom/tests/__init__.py b/scripts/sbom/tests/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d --- /dev/null +++ b/scripts/sbom/tests/__init__.py diff --git a/scripts/sbom/tests/cmd_graph/__init__.py b/scripts/sbom/tests/cmd_graph/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d --- /dev/null +++ b/scripts/sbom/tests/cmd_graph/__init__.py diff --git a/scripts/sbom/tests/cmd_graph/test_savedcmd_parser.py b/scripts/sbom/tests/cmd_graph/test_savedcmd_parser.py new file mode 100644 index 0000000000000..a061a748e1bf9 --- /dev/null +++ b/scripts/sbom/tests/cmd_graph/test_savedcmd_parser.py @@ -0,0 +1,443 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +import os +import unittest +from unittest.mock import patch + +from sbom.cmd_graph.savedcmd_parser import parse_inputs_from_commands +from sbom.cmd_graph.savedcmd_parser.command_parser_registry import CommandParserRegistry +import sbom.sbom_logging as sbom_logging + + +class TestSavedCmdParser(unittest.TestCase): + def _assert_parsing(self, cmd: str, expected: str, registry: CommandParserRegistry | None = None) -> None: + sbom_logging.init() + parsed = parse_inputs_from_commands(cmd, fail_on_unknown_build_command=False, registry=registry) + target = [] if expected == "" else expected.split(" ") + self.assertEqual(parsed, target) + errors = sbom_logging._error_logger._message_counts # type: ignore + self.assertEqual(errors, {}) + + # Compound command tests + def test_dd_cat(self): + cmd = "(dd if=arch/x86/boot/setup.bin bs=4k conv=sync status=none; cat arch/x86/boot/vmlinux.bin) >arch/x86/boot/bzImage" + expected = "arch/x86/boot/setup.bin arch/x86/boot/vmlinux.bin" + self._assert_parsing(cmd, expected) + + def test_manual_file_creation(self): + cmd = """{ symbase=__dtbo_overlay_bad_unresolved; echo '$(pound)include <asm-generic/vmlinux.lds.h>'; echo '.section .rodata,"a"'; echo '.balign STRUCT_ALIGNMENT'; echo ".global $${symbase}_begin"; echo "$${symbase}_begin:"; echo '.incbin "drivers/of/unittest-data/overlay_bad_unresolved.dtbo" '; echo ".global $${symbase}_end"; echo "$${symbase}_end:"; echo '.balign STRUCT_ALIGNMENT'; } > drivers/of/unittest-data/overlay_bad_unresolved.dtbo.S""" + expected = "" + self._assert_parsing(cmd, expected) + + def test_cat_xz_wrap(self): + cmd = "{ cat arch/x86/boot/compressed/vmlinux.bin | sh ../scripts/xz_wrap.sh; printf \\130\\064\\024\\000; } > arch/x86/boot/compressed/vmlinux.bin.xz" + expected = "arch/x86/boot/compressed/vmlinux.bin" + self._assert_parsing(cmd, expected) + + def test_printf_sed(self): + cmd = r"""{ printf 'static char tomoyo_builtin_profile[] __initdata =\n'; sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/\t"\1\\n"/' -- /dev/null; printf '\t"";\n'; printf 'static char tomoyo_builtin_exception_policy[] __initdata =\n'; sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/\t"\1\\n"/' -- ../security/tomoyo/policy/exception_policy.conf.default; printf '\t"";\n'; printf 'static char tomoyo_builtin_domain_policy[] __initdata =\n'; sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/\t"\1\\n"/' -- /dev/null; printf '\t"";\n'; printf 'static char tomoyo_builtin_manager[] __initdata =\n'; sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/\t"\1\\n"/' -- /dev/null; printf '\t"";\n'; printf 'static char tomoyo_builtin_stat[] __initdata =\n'; sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/\t"\1\\n"/' -- /dev/null; printf '\t"";\n'; } > security/tomoyo/builtin-policy.h""" + expected = "../security/tomoyo/policy/exception_policy.conf.default" + self._assert_parsing(cmd, expected) + + def test_bin2c_echo(self): + cmd = """(echo "static char tomoyo_builtin_profile[] __initdata ="; ./scripts/bin2c </dev/null; echo ";"; echo "static char tomoyo_builtin_exception_policy[] __initdata ="; ./scripts/bin2c <../security/tomoyo/policy/exception_policy.conf.default; echo ";"; echo "static char tomoyo_builtin_domain_policy[] __initdata ="; ./scripts/bin2c </dev/null; echo ";"; echo "static char tomoyo_builtin_manager[] __initdata ="; ./scripts/bin2c </dev/null; echo ";"; echo "static char tomoyo_builtin_stat[] __initdata ="; ./scripts/bin2c </dev/null; echo ";") >security/tomoyo/builtin-policy.h""" + expected = "../security/tomoyo/policy/exception_policy.conf.default" + self._assert_parsing(cmd, expected) + + def test_cat_colon(self): + cmd = "{ cat init/modules.order; cat usr/modules.order; cat arch/x86/modules.order; cat arch/x86/boot/startup/modules.order; cat kernel/modules.order; cat certs/modules.order; cat mm/modules.order; cat fs/modules.order; cat ipc/modules.order; cat security/modules.order; cat crypto/modules.order; cat block/modules.order; cat io_uring/modules.order; cat lib/modules.order; cat arch/x86/lib/modules.order; cat drivers/modules.order; cat sound/modules.order; cat samples/modules.order; cat net/modules.order; cat virt/modules.order; cat arch/x86/pci/modules.order; cat arch/x86/power/modules.order; cat arch/x86/video/modules.order; :; } > modules.order" + expected = "init/modules.order usr/modules.order arch/x86/modules.order arch/x86/boot/startup/modules.order kernel/modules.order certs/modules.order mm/modules.order fs/modules.order ipc/modules.order security/modules.order crypto/modules.order block/modules.order io_uring/modules.order lib/modules.order arch/x86/lib/modules.order drivers/modules.order sound/modules.order samples/modules.order net/modules.order virt/modules.order arch/x86/pci/modules.order arch/x86/power/modules.order arch/x86/video/modules.order" + self._assert_parsing(cmd, expected) + + def test_cat_zstd(self): + cmd = "{ cat arch/x86/boot/compressed/vmlinux.bin arch/x86/boot/compressed/vmlinux.relocs | zstd -22 --ultra; printf \\340\\362\\066\\003; } > arch/x86/boot/compressed/vmlinux.bin.zst" + expected = "arch/x86/boot/compressed/vmlinux.bin arch/x86/boot/compressed/vmlinux.relocs" + self._assert_parsing(cmd, expected) + + # cat command tests + def test_cat_redirect(self): + cmd = "cat ../fs/unicode/utf8data.c_shipped > fs/unicode/utf8data.c" + expected = "../fs/unicode/utf8data.c_shipped" + self._assert_parsing(cmd, expected) + + def test_cat_piped(self): + cmd = "cat arch/x86/boot/compressed/vmlinux.bin arch/x86/boot/compressed/vmlinux.relocs | gzip -n -f -9 > arch/x86/boot/compressed/vmlinux.bin.gz" + expected = "arch/x86/boot/compressed/vmlinux.bin arch/x86/boot/compressed/vmlinux.relocs" + self._assert_parsing(cmd, expected) + + # sed command tests + def test_sed(self): + cmd = "sed -n 's/.*define *BLIST_\\([A-Z0-9_]*\\) *.*/BLIST_FLAG_NAME(\\1),/p' ../include/scsi/scsi_devinfo.h > drivers/scsi/scsi_devinfo_tbl.c" + expected = "../include/scsi/scsi_devinfo.h" + self._assert_parsing(cmd, expected) + + # awk command tests + def test_awk(self): + cmd = "awk -f ../arch/arm64/tools/gen-cpucaps.awk ../arch/arm64/tools/cpucaps > arch/arm64/include/generated/asm/cpucap-defs.h" + expected = "../arch/arm64/tools/cpucaps" + self._assert_parsing(cmd, expected) + + def test_awk_with_input_redirection(self): + cmd = "awk -v N=1 -f ../lib/raid6/unroll.awk < ../lib/raid6/int.uc > lib/raid6/int1.c" + expected = "../lib/raid6/int.uc" + self._assert_parsing(cmd, expected) + + # openssl command tests + def test_openssl(self): + cmd = "openssl req -new -nodes -utf8 -sha256 -days 36500 -batch -x509 -config certs/x509.genkey -outform PEM -out certs/signing_key.pem -keyout certs/signing_key.pem 2>&1" + expected = "" + self._assert_parsing(cmd, expected) + + # gcc/clang command tests + def test_gcc(self): + cmd = ( + "gcc -Wp,-MMD,arch/x86/pci/.i386.o.d -nostdinc -I../arch/x86/include -I./arch/x86/include/generated -I../include -I./include -I../arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I../include/uapi -I./include/generated/uapi -include ../include/linux/compiler-version.h -include ../include/linux/kconfig.h -include ../include/linux/compiler_types.h -D__KERNEL__ -fmacro-prefix-map=../= -Werror -std=gnu11 -fshort-wchar -funsigned-char -fno-common -fno-PIE -fno-strict-aliasing -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx -fcf-protection=branch -fno-jump-tables -m64 -falign-jumps=1 -falign-loops=1 -mno-80387 -mno-fp-ret-in-387 -mpreferred-stack-boundary=3 -mskip-rax-setup -march=x86-64 -mtune=generic -mno-red-zone -mcmodel=kernel -mstack-protector-guard-reg=gs -mstack-protector-guard-symbol=__ref_stack_chk_guard -Wno-sign-compare -fno-asynchronous-unwind-tables -mindirect-branch=thunk-extern -mindirect-branch-register -mindirect-branch-cs-prefix -mfunction-return=thunk-extern -fno-jump-tables -fpatchable-function-entry=16,16 -fno-delete-null-pointer-checks -O2 -fno-allow-store-data-races -fstack-protector-strong -fomit-frame-pointer -fno-stack-clash-protection -falign-functions=16 -fno-strict-overflow -fno-stack-check -fconserve-stack -fno-builtin-wcslen -Wall -Wextra -Wundef -Werror=implicit-function-declaration -Werror=implicit-int -Werror=return-type -Werror=strict-prototypes -Wno-format-security -Wno-trigraphs -Wno-frame-address -Wno-address-of-packed-member -Wmissing-declarations -Wmissing-prototypes -Wframe-larger-than=2048 -Wno-main -Wvla-larger-than=1 -Wno-pointer-sign -Wcast-function-type -Wno-array-bounds -Wno-stringop-overflow -Wno-alloc-size-larger-than -Wimplicit-fallthrough=5 -Werror=date-time -Werror=incompatible-pointer-types -Werror=designated-init -Wenum-conversion -Wunused -Wno-unused-but-set-variable -Wno-unused-const-variable -Wno-packed-not-aligned -Wno-format-overflow -Wno-format-truncation -Wno-stringop-truncation -Wno-override-init -Wno-missing-field-initializers -Wno-type-limits -Wno-shift-negative-value -Wno-maybe-uninitialized -Wno-sign-compare -Wno-unused-parameter -I../arch/x86/pci -Iarch/x86/pci -DKBUILD_MODFILE=" + "arch/x86/pci/i386" + " -DKBUILD_BASENAME=" + "i386" + " -DKBUILD_MODNAME=" + "i386" + " -D__KBUILD_MODNAME=kmod_i386 -c -o arch/x86/pci/i386.o ../arch/x86/pci/i386.c " + ) + expected = "../arch/x86/pci/i386.c" + self._assert_parsing(cmd, expected) + + def test_gcc_linking(self): + cmd = "gcc -o arch/x86/tools/relocs arch/x86/tools/relocs_32.o arch/x86/tools/relocs_64.o arch/x86/tools/relocs_common.o" + expected = "arch/x86/tools/relocs_32.o arch/x86/tools/relocs_64.o arch/x86/tools/relocs_common.o" + self._assert_parsing(cmd, expected) + + def test_gcc_without_compile_flag(self): + cmd = "gcc -Wp,-MMD,arch/x86/boot/compressed/.mkpiggy.d -Wall -Wmissing-prototypes -Wstrict-prototypes -O2 -fomit-frame-pointer -std=gnu11 -I ../scripts/include -I../tools/include -I arch/x86/boot/compressed -o arch/x86/boot/compressed/mkpiggy ../arch/x86/boot/compressed/mkpiggy.c" + expected = "../arch/x86/boot/compressed/mkpiggy.c" + self._assert_parsing(cmd, expected) + + def test_gcc_with_env_override(self): + with patch.dict(os.environ, {"CC": "ccache gcc"}): + registry = CommandParserRegistry.create() + cmd = "gcc -o arch/x86/tools/relocs arch/x86/tools/relocs_32.o arch/x86/tools/relocs_64.o arch/x86/tools/relocs_common.o" + expected = "arch/x86/tools/relocs_32.o arch/x86/tools/relocs_64.o arch/x86/tools/relocs_common.o" + self._assert_parsing(cmd, expected, registry) + self._assert_parsing(f"ccache {cmd}", expected, registry) + + def test_gcc_dts_preprocessing(self): + cmd = "gcc -E -Wp,-MMD,drivers/of/.empty_root.dtb.d.pre.tmp -nostdinc -I ../scripts/dtc/include-prefixes -undef -D__DTS__ -x assembler-with-cpp -o drivers/of/.empty_root.dtb.dts.tmp ../drivers/of/empty_root.dts" + expected = "../drivers/of/empty_root.dts" + self._assert_parsing(cmd, expected) + + def test_clang(self): + cmd = """clang -Wp,-MMD,arch/x86/entry/.entry_64_compat.o.d -nostdinc -I../arch/x86/include -I./arch/x86/include/generated -I../include -I./include -I../arch/x86/include/uapi -I./arch/x86/include/generated/uapi -I../include/uapi -I./include/generated/uapi -include ../include/linux/compiler-version.h -include ../include/linux/kconfig.h -D__KERNEL__ --target=x86_64-linux-gnu -fintegrated-as -Werror=unknown-warning-option -Werror=ignored-optimization-argument -Werror=option-ignored -Werror=unused-command-line-argument -fmacro-prefix-map=../= -Werror -D__ASSEMBLY__ -fno-PIE -m64 -I../arch/x86/entry -Iarch/x86/entry -DKBUILD_MODFILE='"arch/x86/entry/entry_64_compat"' -DKBUILD_MODNAME='"entry_64_compat"' -D__KBUILD_MODNAME=kmod_entry_64_compat -c -o arch/x86/entry/entry_64_compat.o ../arch/x86/entry/entry_64_compat.S""" + expected = "../arch/x86/entry/entry_64_compat.S" + self._assert_parsing(cmd, expected) + + # ld command tests + def test_ld(self): + cmd = r'ld -o arch/x86/entry/vdso/vdso64.so.dbg -shared --hash-style=both --build-id=sha1 --no-undefined --eh-frame-hdr -Bsymbolic -z noexecstack -m elf_x86_64 -soname linux-vdso.so.1 -z max-page-size=4096 -T arch/x86/entry/vdso/vdso.lds arch/x86/entry/vdso/vdso-note.o arch/x86/entry/vdso/vclock_gettime.o arch/x86/entry/vdso/vgetcpu.o arch/x86/entry/vdso/vgetrandom.o arch/x86/entry/vdso/vgetrandom-chacha.o; if readelf -rW arch/x86/entry/vdso/vdso64.so.dbg | grep -v _NONE | grep -q " R_\w*_"; then (echo >&2 "arch/x86/entry/vdso/vdso64.so.dbg: dynamic relocations are not supported"; rm -f arch/x86/entry/vdso/vdso64.so.dbg; /bin/false); fi' + expected = "arch/x86/entry/vdso/vdso-note.o arch/x86/entry/vdso/vclock_gettime.o arch/x86/entry/vdso/vgetcpu.o arch/x86/entry/vdso/vgetrandom.o arch/x86/entry/vdso/vgetrandom-chacha.o" + self._assert_parsing(cmd, expected) + + def test_ld_with_env_override(self): + with patch.dict(os.environ, {"LD": "some-tool ld"}): + registry = CommandParserRegistry.create() + cmd = r'ld -o arch/x86/entry/vdso/vdso64.so.dbg -shared --hash-style=both --build-id=sha1 --no-undefined --eh-frame-hdr -Bsymbolic -z noexecstack -m elf_x86_64 -soname linux-vdso.so.1 -z max-page-size=4096 -T arch/x86/entry/vdso/vdso.lds arch/x86/entry/vdso/vdso-note.o arch/x86/entry/vdso/vclock_gettime.o arch/x86/entry/vdso/vgetcpu.o arch/x86/entry/vdso/vgetrandom.o arch/x86/entry/vdso/vgetrandom-chacha.o; if readelf -rW arch/x86/entry/vdso/vdso64.so.dbg | grep -v _NONE | grep -q " R_\w*_"; then (echo >&2 "arch/x86/entry/vdso/vdso64.so.dbg: dynamic relocations are not supported"; rm -f arch/x86/entry/vdso/vdso64.so.dbg; /bin/false); fi' + expected = "arch/x86/entry/vdso/vdso-note.o arch/x86/entry/vdso/vclock_gettime.o arch/x86/entry/vdso/vgetcpu.o arch/x86/entry/vdso/vgetrandom.o arch/x86/entry/vdso/vgetrandom-chacha.o" + self._assert_parsing(cmd, expected, registry) + self._assert_parsing(f"some-tool {cmd}", expected, registry) + + def test_ld_whole_archive(self): + cmd = "ld -m elf_x86_64 -z noexecstack -r -o vmlinux.o --whole-archive vmlinux.a --no-whole-archive --start-group --end-group" + expected = "vmlinux.a" + self._assert_parsing(cmd, expected) + + def test_ld_with_at_symbol(self): + cmd = "ld.lld -m elf_x86_64 -z noexecstack -r -o fs/efivarfs/efivarfs.o @fs/efivarfs/efivarfs.mod ; ./tools/objtool/objtool --hacks=jump_label --hacks=noinstr --hacks=skylake --ibt --orc --retpoline --rethunk --static-call --uaccess --prefix=16 --link --module fs/efivarfs/efivarfs.o" + expected = "@fs/efivarfs/efivarfs.mod" + self._assert_parsing(cmd, expected) + + def test_ld_if_objdump(self): + cmd = """ld -o arch/x86/entry/vdso/vdso64.so.dbg -shared --hash-style=both --build-id=sha1 --eh-frame-hdr -Bsymbolic -z noexecstack -m elf_x86_64 -soname linux-vdso.so.1 --no-undefined -z max-page-size=4096 -T arch/x86/entry/vdso/vdso.lds arch/x86/entry/vdso/vdso-note.o arch/x86/entry/vdso/vclock_gettime.o arch/x86/entry/vdso/vgetcpu.o arch/x86/entry/vdso/vsgx.o && sh ./arch/x86/entry/vdso/checkundef.sh 'nm' 'arch/x86/entry/vdso/vdso64.so.dbg'; if objdump -R arch/x86/entry/vdso/vdso64.so.dbg | grep -E -h "R_X86_64_JUMP_SLOT|R_X86_64_GLOB_DAT|R_X86_64_RELATIVE| R_386_GLOB_DAT|R_386_JMP_SLOT|R_386_RELATIVE"; then (echo >&2 "arch/x86/entry/vdso/vdso64.so.dbg: dynamic relocations are not supported"; rm -f arch/x86/entry/vdso/vdso64.so.dbg; /bin/false); fi""" + expected = "arch/x86/entry/vdso/vdso-note.o arch/x86/entry/vdso/vclock_gettime.o arch/x86/entry/vdso/vgetcpu.o arch/x86/entry/vdso/vsgx.o" + self._assert_parsing(cmd, expected) + + # printf | xargs ar command tests + def test_ar_printf(self): + cmd = 'rm -f built-in.a; printf "./%s " init/built-in.a usr/built-in.a arch/x86/built-in.a arch/x86/boot/startup/built-in.a kernel/built-in.a certs/built-in.a mm/built-in.a fs/built-in.a ipc/built-in.a security/built-in.a crypto/built-in.a block/built-in.a io_uring/built-in.a lib/built-in.a arch/x86/lib/built-in.a drivers/built-in.a sound/built-in.a net/built-in.a virt/built-in.a arch/x86/pci/built-in.a arch/x86/power/built-in.a arch/x86/video/built-in.a | xargs ar cDPrST built-in.a' + expected = "./init/built-in.a ./usr/built-in.a ./arch/x86/built-in.a ./arch/x86/boot/startup/built-in.a ./kernel/built-in.a ./certs/built-in.a ./mm/built-in.a ./fs/built-in.a ./ipc/built-in.a ./security/built-in.a ./crypto/built-in.a ./block/built-in.a ./io_uring/built-in.a ./lib/built-in.a ./arch/x86/lib/built-in.a ./drivers/built-in.a ./sound/built-in.a ./net/built-in.a ./virt/built-in.a ./arch/x86/pci/built-in.a ./arch/x86/power/built-in.a ./arch/x86/video/built-in.a" + self._assert_parsing(cmd, expected) + + def test_ar_printf_nested(self): + cmd = 'rm -f arch/x86/pci/built-in.a; printf "arch/x86/pci/%s " i386.o init.o mmconfig_64.o direct.o mmconfig-shared.o fixup.o acpi.o legacy.o irq.o common.o early.o bus_numa.o amd_bus.o | xargs ar cDPrST arch/x86/pci/built-in.a' + expected = "arch/x86/pci/i386.o arch/x86/pci/init.o arch/x86/pci/mmconfig_64.o arch/x86/pci/direct.o arch/x86/pci/mmconfig-shared.o arch/x86/pci/fixup.o arch/x86/pci/acpi.o arch/x86/pci/legacy.o arch/x86/pci/irq.o arch/x86/pci/common.o arch/x86/pci/early.o arch/x86/pci/bus_numa.o arch/x86/pci/amd_bus.o" + self._assert_parsing(cmd, expected) + + # ar command tests + def test_ar_reordering(self): + cmd = "rm -f vmlinux.a; ar cDPrST vmlinux.a built-in.a lib/lib.a arch/x86/lib/lib.a; ar mPiT $$(ar t vmlinux.a | sed -n 1p) vmlinux.a $$(ar t vmlinux.a | grep -F -f ../scripts/head-object-list.txt)" + expected = "built-in.a lib/lib.a arch/x86/lib/lib.a" + self._assert_parsing(cmd, expected) + + def test_ar_default(self): + cmd = "rm -f lib/lib.a; ar cDPrsT lib/lib.a lib/argv_split.o lib/bug.o lib/buildid.o lib/clz_tab.o lib/cmdline.o lib/cpumask.o lib/ctype.o lib/dec_and_lock.o lib/decompress.o lib/decompress_bunzip2.o lib/decompress_inflate.o lib/decompress_unlz4.o lib/decompress_unlzma.o lib/decompress_unlzo.o lib/decompress_unxz.o lib/decompress_unzstd.o lib/dump_stack.o lib/earlycpio.o lib/extable.o lib/flex_proportions.o lib/idr.o lib/iomem_copy.o lib/irq_regs.o lib/is_single_threaded.o lib/klist.o lib/kobject.o lib/kobject_uevent.o lib/logic_pio.o lib/maple_tree.o lib/memcat_p.o lib/nmi_backtrace.o lib/objpool.o lib/plist.o lib/radix-tree.o lib/ratelimit.o lib/rbtree.o lib/seq_buf.o lib/siphash.o lib/string.o lib/sys_info.o lib/timerqueue.o lib/union_find.o lib/vsprintf.o lib/win_minmax.o lib/xarray.o" + expected = "lib/argv_split.o lib/bug.o lib/buildid.o lib/clz_tab.o lib/cmdline.o lib/cpumask.o lib/ctype.o lib/dec_and_lock.o lib/decompress.o lib/decompress_bunzip2.o lib/decompress_inflate.o lib/decompress_unlz4.o lib/decompress_unlzma.o lib/decompress_unlzo.o lib/decompress_unxz.o lib/decompress_unzstd.o lib/dump_stack.o lib/earlycpio.o lib/extable.o lib/flex_proportions.o lib/idr.o lib/iomem_copy.o lib/irq_regs.o lib/is_single_threaded.o lib/klist.o lib/kobject.o lib/kobject_uevent.o lib/logic_pio.o lib/maple_tree.o lib/memcat_p.o lib/nmi_backtrace.o lib/objpool.o lib/plist.o lib/radix-tree.o lib/ratelimit.o lib/rbtree.o lib/seq_buf.o lib/siphash.o lib/string.o lib/sys_info.o lib/timerqueue.o lib/union_find.o lib/vsprintf.o lib/win_minmax.o lib/xarray.o" + self._assert_parsing(cmd, expected) + + def test_ar_llvm(self): + cmd = "llvm-ar mPiT $$(llvm-ar t vmlinux.a | sed -n 1p) vmlinux.a $$(llvm-ar t vmlinux.a | grep -F -f ../scripts/head-object-list.txt)" + expected = "" + self._assert_parsing(cmd, expected) + + # nm command tests + def test_nm(self): + cmd = """llvm-nm -p --defined-only rust/core.o | awk '$$2~/(T|R|D|B)/ && $$3!~/__(pfx|cfi|odr_asan)/ { printf "EXPORT_SYMBOL_RUST_GPL(%s);\n",$$3 }' > rust/exports_core_generated.h""" + expected = "rust/core.o" + self._assert_parsing(cmd, expected) + + def test_nm_vmlinux(self): + cmd = r"nm vmlinux | sed -n -e 's/^\([0-9a-fA-F]*\) [ABbCDGRSTtVW] \(_text\|__start_rodata\|__bss_start\|_end\)$/#define VO_\2 _AC(0x\1,UL)/p' > arch/x86/boot/voffset.h" + expected = "vmlinux" + self._assert_parsing(cmd, expected) + + # objcopy command tests + def test_objcopy(self): + cmd = "objcopy --remove-section='.rel*' --remove-section=!'.rel*.dyn' vmlinux.unstripped vmlinux" + expected = "vmlinux.unstripped" + self._assert_parsing(cmd, expected) + + def test_objcopy_llvm(self): + cmd = "llvm-objcopy --remove-section='.rel*' --remove-section=!'.rel*.dyn' vmlinux.unstripped vmlinux" + expected = "vmlinux.unstripped" + self._assert_parsing(cmd, expected) + + # strip command tests + def test_strip(self): + cmd = "strip --strip-debug -o drivers/firmware/efi/libstub/mem.stub.o drivers/firmware/efi/libstub/mem.o" + expected = "drivers/firmware/efi/libstub/mem.o" + self._assert_parsing(cmd, expected) + + # cp command tests + def test_cp_truncate(self): + cmd = "cp arch/arm64/boot/Image arch/arm64/boot/vmlinux.bin; truncate -s $$(hexdump -s16 -n4 -e '\"%u\"' arch/arm64/boot/Image) arch/arm64/boot/vmlinux.bin" + expected = "arch/arm64/boot/Image" + self._assert_parsing(cmd, expected) + + # rustc command tests + def test_rustc(self): + cmd = """OBJTREE=/workspace/linux/kernel_build rustc -Zbinary_dep_depinfo=y -Astable_features -Dnon_ascii_idents -Dunsafe_op_in_unsafe_fn -Wmissing_docs -Wrust_2018_idioms -Wclippy::all -Wclippy::as_ptr_cast_mut -Wclippy::as_underscore -Wclippy::cast_lossless -Wclippy::ignored_unit_patterns -Wclippy::mut_mut -Wclippy::needless_bitwise_bool -Aclippy::needless_lifetimes -Wclippy::no_mangle_with_rust_abi -Wclippy::ptr_as_ptr -Wclippy::ptr_cast_constness -Wclippy::ref_as_ptr -Wclippy::undocumented_unsafe_blocks -Wclippy::unnecessary_safety_comment -Wclippy::unnecessary_safety_doc -Wrustdoc::missing_crate_level_docs -Wrustdoc::unescaped_backticks -Cpanic=abort -Cembed-bitcode=n -Clto=n -Cforce-unwind-tables=n -Ccodegen-units=1 -Csymbol-mangling-version=v0 -Crelocation-model=static -Zfunction-sections=n -Wclippy::float_arithmetic --target=./scripts/target.json -Ctarget-feature=-sse,-sse2,-sse3,-ssse3,-sse4.1,-sse4.2,-avx,-avx2 -Zcf-protection=branch -Zno-jump-tables -Ctarget-cpu=x86-64 -Ztune-cpu=generic -Cno-redzone=y -Ccode-model=kernel -Zfunction-return=thunk-extern -Zpatchable-function-entry=16,16 -Copt-level=2 -Cdebug-assertions=n -Coverflow-checks=y -Dwarnings @./include/generated/rustc_cfg --edition=2021 --cfg no_fp_fmt_parse --emit=dep-info=rust/.core.o.d --emit=obj=rust/core.o --emit=metadata=rust/libcore.rmeta --crate-type rlib -L./rust --crate-name core /usr/lib/rust-1.84/lib/rustlib/src/rust/library/core/src/lib.rs --sysroot=/dev/null ;llvm-objcopy --redefine-sym __addsf3=__rust__addsf3 --redefine-sym __eqsf2=__rust__eqsf2 --redefine-sym __extendsfdf2=__rust__extendsfdf2 --redefine-sym __gesf2=__rust__gesf2 --redefine-sym __lesf2=__rust__lesf2 --redefine-sym __ltsf2=__rust__ltsf2 --redefine-sym __mulsf3=__rust__mulsf3 --redefine-sym __nesf2=__rust__nesf2 --redefine-sym __truncdfsf2=__rust__truncdfsf2 --redefine-sym __unordsf2=__rust__unordsf2 --redefine-sym __adddf3=__rust__adddf3 --redefine-sym __eqdf2=__rust__eqdf2 --redefine-sym __ledf2=__rust__ledf2 --redefine-sym __ltdf2=__rust__ltdf2 --redefine-sym __muldf3=__rust__muldf3 --redefine-sym __unorddf2=__rust__unorddf2 --redefine-sym __muloti4=__rust__muloti4 --redefine-sym __multi3=__rust__multi3 --redefine-sym __udivmodti4=__rust__udivmodti4 --redefine-sym __udivti3=__rust__udivti3 --redefine-sym __umodti3=__rust__umodti3 rust/core.o""" + expected = "/usr/lib/rust-1.84/lib/rustlib/src/rust/library/core/src/lib.rs rust/core.o" + self._assert_parsing(cmd, expected) + + # rustdoc command tests + def test_rustdoc(self): + cmd = """OBJTREE=/workspace/linux/kernel_build rustdoc --test --edition=2021 -Zbinary_dep_depinfo=y -Astable_features -Dnon_ascii_idents -Dunsafe_op_in_unsafe_fn -Wmissing_docs -Wrust_2018_idioms -Wunreachable_pub -Wclippy::all -Wclippy::as_ptr_cast_mut -Wclippy::as_underscore -Wclippy::cast_lossless -Wclippy::ignored_unit_patterns -Wclippy::mut_mut -Wclippy::needless_bitwise_bool -Aclippy::needless_lifetimes -Wclippy::no_mangle_with_rust_abi -Wclippy::ptr_as_ptr -Wclippy::ptr_cast_constness -Wclippy::ref_as_ptr -Wclippy::undocumented_unsafe_blocks -Wclippy::unnecessary_safety_comment -Wclippy::unnecessary_safety_doc -Wrustdoc::missing_crate_level_docs -Wrustdoc::unescaped_backticks -Cpanic=abort -Cembed-bitcode=n -Clto=n -Cforce-unwind-tables=n -Ccodegen-units=1 -Csymbol-mangling-version=v0 -Crelocation-model=static -Zfunction-sections=n -Wclippy::float_arithmetic --target=aarch64-unknown-none -Ctarget-feature="-neon" -Cforce-unwind-tables=n -Zbranch-protection=pac-ret -Copt-level=2 -Cdebug-assertions=y -Coverflow-checks=y -Dwarnings -Cforce-frame-pointers=y -Zsanitizer=kernel-address -Zsanitizer-recover=kernel-address -Cllvm-args=-asan-mapping-offset=0xdfff800000000000 -Cpasses=sancov-module -Cllvm-args=-sanitizer-coverage-level=3 -Cllvm-args=-sanitizer-coverage-trace-pc -Cllvm-args=-sanitizer-coverage-trace-compares @./include/generated/rustc_cfg -L./rust --extern ffi --extern pin_init --extern kernel --extern build_error --extern macros --extern bindings --extern uapi --no-run --crate-name kernel -Zunstable-options --sysroot=/dev/null --test-builder ./scripts/rustdoc_test_builder ../rust/kernel/lib.rs >/dev/null""" + expected = "../rust/kernel/lib.rs" + self._assert_parsing(cmd, expected) + + def test_rustdoc_test_gen(self): + cmd = "./scripts/rustdoc_test_gen" + expected = "" + self._assert_parsing(cmd, expected) + + # flex command tests + def test_flex(self): + cmd = "flex -oscripts/kconfig/lexer.lex.c -L ../scripts/kconfig/lexer.l" + expected = "../scripts/kconfig/lexer.l" + self._assert_parsing(cmd, expected) + + # bison command tests + def test_bison(self): + cmd = "bison -o scripts/kconfig/parser.tab.c --defines=scripts/kconfig/parser.tab.h -t -l ../scripts/kconfig/parser.y" + expected = "../scripts/kconfig/parser.y" + self._assert_parsing(cmd, expected) + + # bindgen command tests + def test_bindgen(self): + cmd = ( + "bindgen ../rust/bindings/bindings_helper.h " + "--blocklist-type __kernel_s?size_t --blocklist-type __kernel_ptrdiff_t " + "--opaque-type xregs_state --opaque-type desc_struct --no-doc-comments " + "--rust-target 1.68 --use-core --with-derive-default -o rust/bindings/bindings_generated.rs " + "-- -Wp,-MMD,rust/bindings/.bindings_generated.rs.d -nostdinc -I../arch/x86/include " + "-include ../include/linux/compiler-version.h -D__KERNEL__ -fintegrated-as -fno-builtin -DMODULE; " + "sed -Ei 's/pub const RUST_CONST_HELPER_([a-zA-Z0-9_]*)/pub const \\1/g' rust/bindings/bindings_generated.rs" + ) + expected = "../rust/bindings/bindings_helper.h ../include/linux/compiler-version.h" + self._assert_parsing(cmd, expected) + + # perl command tests + def test_perl(self): + cmd = "perl ../lib/crypto/x86/poly1305-x86_64-cryptogams.pl > lib/crypto/x86/poly1305-x86_64-cryptogams.S" + expected = "../lib/crypto/x86/poly1305-x86_64-cryptogams.pl" + self._assert_parsing(cmd, expected) + + # link-vmlinux.sh command tests + def test_link_vmlinux(self): + cmd = '../scripts/link-vmlinux.sh "ld" "-m elf_x86_64 -z noexecstack" "-z max-page-size=0x200000 --build-id=sha1 --orphan-handling=error --emit-relocs --discard-none" "vmlinux.unstripped"; true' + expected = "vmlinux.a" + self._assert_parsing(cmd, expected) + + def test_link_vmlinux_postlink(self): + cmd = '../scripts/link-vmlinux.sh "ld" "-m elf_x86_64 -z noexecstack --no-warn-rwx-segments" "--emit-relocs --discard-none -z max-page-size=0x200000 --build-id=sha1 -X --orphan-handling=error"; make -f ../arch/x86/Makefile.postlink vmlinux' + expected = "vmlinux.a" + self._assert_parsing(cmd, expected) + + # syscallhdr.sh command tests + def test_syscallhdr(self): + cmd = "sh ../scripts/syscallhdr.sh --abis common,64 --emit-nr ../arch/x86/entry/syscalls/syscall_64.tbl arch/x86/include/generated/uapi/asm/unistd_64.h" + expected = "../arch/x86/entry/syscalls/syscall_64.tbl" + self._assert_parsing(cmd, expected) + + # syscalltbl.sh command tests + def test_syscalltbl(self): + cmd = "sh ../scripts/syscalltbl.sh --abis common,64 ../arch/x86/entry/syscalls/syscall_64.tbl arch/x86/include/generated/asm/syscalls_64.h" + expected = "../arch/x86/entry/syscalls/syscall_64.tbl" + self._assert_parsing(cmd, expected) + + # mkcapflags.sh command tests + def test_mkcapflags(self): + cmd = "sh ../arch/x86/kernel/cpu/mkcapflags.sh arch/x86/kernel/cpu/capflags.c ../arch/x86/kernel/cpu/../../include/asm/cpufeatures.h ../arch/x86/kernel/cpu/../../include/asm/vmxfeatures.h ../arch/x86/kernel/cpu/mkcapflags.sh FORCE" + expected = "../arch/x86/kernel/cpu/../../include/asm/cpufeatures.h ../arch/x86/kernel/cpu/../../include/asm/vmxfeatures.h" + self._assert_parsing(cmd, expected) + + # orc_hash.sh command tests + def test_orc_hash(self): + cmd = "mkdir -p arch/x86/include/generated/asm/; sh ../scripts/orc_hash.sh < ../arch/x86/include/asm/orc_types.h > arch/x86/include/generated/asm/orc_hash.h" + expected = "../arch/x86/include/asm/orc_types.h" + self._assert_parsing(cmd, expected) + + # xen-hypercalls.sh command tests + def test_xen_hypercalls(self): + cmd = "sh '../scripts/xen-hypercalls.sh' arch/x86/include/generated/asm/xen-hypercalls.h ../include/xen/interface/xen-mca.h ../include/xen/interface/xen.h ../include/xen/interface/xenpmu.h" + expected = "../include/xen/interface/xen-mca.h ../include/xen/interface/xen.h ../include/xen/interface/xenpmu.h" + self._assert_parsing(cmd, expected) + + # gen_initramfs.sh command tests + def test_gen_initramfs(self): + cmd = "sh ../usr/gen_initramfs.sh -o usr/initramfs_data.cpio -l usr/.initramfs_data.cpio.d ../usr/default_cpio_list" + expected = "../usr/default_cpio_list" + self._assert_parsing(cmd, expected) + + # mkuboot.sh command tests + def test_mkuboot(self): + cmd = "bash ../scripts/mkuboot.sh -A arm -O linux -C none -T kernel -a 0x8000 -e 0x8000 -n 'Linux-6.15.0' -d arch/arm/boot/zImage arch/arm/boot/uImage" + expected = "arch/arm/boot/zImage" + self._assert_parsing(cmd, expected) + + # syscallnr.sh command tests + def test_syscallnr(self): + cmd = "sh ../arch/arm/tools/syscallnr.sh ../arch/arm/tools/syscall.tbl arch/arm/include/generated/asm/unistd-nr.h" + expected = "../arch/arm/tools/syscall.tbl" + self._assert_parsing(cmd, expected) + + # gen-kernel-hwcaps.sh command tests + def test_gen_kernel_hwcaps(self): + cmd = "/bin/sh -e ../arch/arm64/tools/gen-kernel-hwcaps.sh ../arch/arm64/include/uapi/asm/hwcap.h > arch/arm64/include/generated/asm/kernel-hwcap.h" + expected = "../arch/arm64/include/uapi/asm/hwcap.h" + self._assert_parsing(cmd, expected) + + # vdso2c command tests + def test_vdso2c(self): + cmd = "arch/x86/entry/vdso/vdso2c arch/x86/entry/vdso/vdso64.so.dbg arch/x86/entry/vdso/vdso64.so arch/x86/entry/vdso/vdso-image-64.c" + expected = "arch/x86/entry/vdso/vdso64.so.dbg arch/x86/entry/vdso/vdso64.so" + self._assert_parsing(cmd, expected) + + # vdsomunge command tests + def test_vdsomunge(self): + cmd = "arch/arm64/kernel/vdso32/../../../arm/vdso/vdsomunge arch/arm64/kernel/vdso32/vdso.so.raw arch/arm64/kernel/vdso32/vdso32.so.dbg" + expected = "arch/arm64/kernel/vdso32/vdso.so.raw" + self._assert_parsing(cmd, expected) + + # mkpiggy command tests + def test_mkpiggy(self): + cmd = "arch/x86/boot/compressed/mkpiggy arch/x86/boot/compressed/vmlinux.bin.gz > arch/x86/boot/compressed/piggy.S" + expected = "arch/x86/boot/compressed/vmlinux.bin.gz" + self._assert_parsing(cmd, expected) + + # relocs command tests + def test_relocs(self): + cmd = "arch/x86/tools/relocs vmlinux.unstripped > arch/x86/boot/compressed/vmlinux.relocs;arch/x86/tools/relocs --abs-relocs vmlinux.unstripped" + expected = "vmlinux.unstripped" + self._assert_parsing(cmd, expected) + + def test_relocs_with_realmode(self): + cmd = ( + "arch/x86/tools/relocs --realmode arch/x86/realmode/rm/realmode.elf > arch/x86/realmode/rm/realmode.relocs" + ) + expected = "arch/x86/realmode/rm/realmode.elf" + self._assert_parsing(cmd, expected) + + # mk_elfconfig command tests + def test_mk_elfconfig(self): + cmd = "scripts/mod/mk_elfconfig < scripts/mod/empty.o > scripts/mod/elfconfig.h" + expected = "scripts/mod/empty.o" + self._assert_parsing(cmd, expected) + + # tools/build command tests + def test_build(self): + cmd = "arch/x86/boot/tools/build arch/x86/boot/setup.bin arch/x86/boot/vmlinux.bin arch/x86/boot/zoffset.h arch/x86/boot/bzImage" + expected = "arch/x86/boot/setup.bin arch/x86/boot/vmlinux.bin arch/x86/boot/zoffset.h" + self._assert_parsing(cmd, expected) + + # extract-cert command tests + def test_extract_cert(self): + cmd = 'certs/extract-cert "" certs/signing_key.x509' + expected = "" + self._assert_parsing(cmd, expected) + + # dtc command tests + def test_dtc_cat(self): + cmd = "./scripts/dtc/dtc -o drivers/of/empty_root.dtb -b 0 -i../drivers/of/ -i../scripts/dtc/include-prefixes -Wno-unique_unit_address -Wno-unit_address_vs_reg -Wno-avoid_unnecessary_addr_size -Wno-alias_paths -Wno-graph_child_address -Wno-simple_bus_reg -d drivers/of/.empty_root.dtb.d.dtc.tmp drivers/of/.empty_root.dtb.dts.tmp ; cat drivers/of/.empty_root.dtb.d.pre.tmp drivers/of/.empty_root.dtb.d.dtc.tmp > drivers/of/.empty_root.dtb.d" + expected = "drivers/of/.empty_root.dtb.dts.tmp drivers/of/.empty_root.dtb.d.pre.tmp drivers/of/.empty_root.dtb.d.dtc.tmp" + self._assert_parsing(cmd, expected) + + # pnmtologo command tests + def test_pnmtologo(self): + cmd = "drivers/video/logo/pnmtologo -t clut224 -n logo_linux_clut224 -o drivers/video/logo/logo_linux_clut224.c ../drivers/video/logo/logo_linux_clut224.ppm" + expected = "../drivers/video/logo/logo_linux_clut224.ppm" + self._assert_parsing(cmd, expected) + + # relacheck command tests + def test_relacheck(self): + cmd = "arch/arm64/kernel/pi/relacheck arch/arm64/kernel/pi/idreg-override.pi.o arch/arm64/kernel/pi/idreg-override.o" + expected = "arch/arm64/kernel/pi/idreg-override.pi.o" + self._assert_parsing(cmd, expected) + + # gen-hyprel command tests + def test_gen_hyprel(self): + cmd = "arch/arm64/kvm/hyp/nvhe/gen-hyprel arch/arm64/kvm/hyp/nvhe/kvm_nvhe.tmp.o > arch/arm64/kvm/hyp/nvhe/hyp-reloc.S" + expected = "arch/arm64/kvm/hyp/nvhe/kvm_nvhe.tmp.o" + self._assert_parsing(cmd, expected) + + # mkregtable command tests + def test_mkregtable(self): + cmd = "drivers/gpu/drm/radeon/mkregtable ../drivers/gpu/drm/radeon/reg_srcs/r100 > drivers/gpu/drm/radeon/r100_reg_safe.h" + expected = "../drivers/gpu/drm/radeon/reg_srcs/r100" + self._assert_parsing(cmd, expected) + + # genheaders command tests + def test_genheaders(self): + cmd = "security/selinux/genheaders security/selinux/flask.h security/selinux/av_permissions.h" + expected = "" + self._assert_parsing(cmd, expected) + + # mkcpustr command tests + def test_mkcpustr(self): + cmd = "arch/x86/boot/mkcpustr > arch/x86/boot/cpustr.h" + expected = "" + self._assert_parsing(cmd, expected) + + # polgen command tests + def test_polgen(self): + cmd = "scripts/ipe/polgen/polgen security/ipe/boot_policy.c" + expected = "" + self._assert_parsing(cmd, expected) + + # gen_header.py command tests + def test_gen_header(self): + cmd = "mkdir -p drivers/gpu/drm/msm/generated && python3 ../drivers/gpu/drm/msm/registers/gen_header.py --no-validate --rnn ../drivers/gpu/drm/msm/registers --xml ../drivers/gpu/drm/msm/registers/adreno/a2xx.xml c-defines > drivers/gpu/drm/msm/generated/a2xx.xml.h" + expected = "../drivers/gpu/drm/msm/registers/adreno/a2xx.xml" + self._assert_parsing(cmd, expected) + + +if __name__ == "__main__": + unittest.main() diff --git a/scripts/sbom/tests/spdx_graph/__init__.py b/scripts/sbom/tests/spdx_graph/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d --- /dev/null +++ b/scripts/sbom/tests/spdx_graph/__init__.py diff --git a/scripts/sbom/tests/spdx_graph/test_kernel_file.py b/scripts/sbom/tests/spdx_graph/test_kernel_file.py new file mode 100644 index 0000000000000..35a63a768ba2a --- /dev/null +++ b/scripts/sbom/tests/spdx_graph/test_kernel_file.py @@ -0,0 +1,35 @@ +# SPDX-License-Identifier: GPL-2.0-only OR MIT +# Copyright (C) 2025 TNG Technology Consulting GmbH + +import unittest +from pathlib import Path +import tempfile +from sbom.spdx_graph.kernel_file import _parse_spdx_license_identifier # type: ignore + + +class TestKernelFile(unittest.TestCase): + def setUp(self): + self.tmpdir = tempfile.TemporaryDirectory() + self.src_tree = Path(self.tmpdir.name) + + def tearDown(self): + self.tmpdir.cleanup() + + def test_parse_spdx_license_identifier(self): + # REUSE-IgnoreStart + test_cases: list[tuple[str, str | None]] = [ + ("/* SPDX-License-Identifier: MIT*/", "MIT"), + ("// SPDX-License-Identifier: GPL-2.0-only", "GPL-2.0-only"), + ("# SPDX-License-Identifier: GPL-2.0-only", "GPL-2.0-only"), + ("#!/bin/bash\n# SPDX-License-Identifier: GPL-2.0-only", "GPL-2.0-only"), + ("/* SPDX-License-Identifier: GPL-2.0-or-later OR MIT */", "GPL-2.0-or-later OR MIT"), + ("/* SPDX-License-Identifier: Apache-2.0 */\n extra text", "Apache-2.0"), + ("<!-- SPDX-License-Identifier: GPL-2.0 -->", "GPL-2.0"), + ("int main() { return 0; }", None), + ] + # REUSE-IgnoreEnd + + for i, (file_content, expected_identifier) in enumerate(test_cases): + file_path = self.src_tree / f"file_{i}.c" + file_path.write_text(file_content) + self.assertEqual(_parse_spdx_license_identifier(str(file_path)), expected_identifier) |
