diff options
| author | Arnaldo Carvalho de Melo <acme@redhat.com> | 2026-05-02 13:26:51 -0300 |
|---|---|---|
| committer | Arnaldo Carvalho de Melo <acme@redhat.com> | 2026-05-29 11:44:33 -0300 |
| commit | 5e3d2175a3fe5cb6ea9c998a5e2a8f8ad524c7bf (patch) | |
| tree | 5eb8caa005d1cd7e2f1dbb051d71c55f9985a3c3 /tools | |
| parent | 15110680e867cf13fe11b13e624be1d945a1fc82 (diff) | |
| download | linux-next-history-5e3d2175a3fe5cb6ea9c998a5e2a8f8ad524c7bf.tar.gz | |
perf session: Add byte-swap and bounds check for PERF_RECORD_BPF_METADATA events
PERF_RECORD_BPF_METADATA has no entry in perf_event__swap_ops[], so its
nr_entries field is never byte-swapped when reading a cross-endian
perf.data file. Downstream processing in
perf_event__fprintf_bpf_metadata() loops over nr_entries, so a
foreign-endian value causes out-of-bounds reads.
Add a swap handler that byte-swaps nr_entries after validating that
header.size is large enough. The entries[] array contains only char
arrays (key/value strings), so no per-entry swap is needed — but ensure
NUL-termination on the writable cross-endian path.
Validate header.size, nr_entries, and string NUL-termination in the
common event delivery path so that native-endian files with malicious
values are also rejected. Snapshot nr_entries via READ_ONCE() before
validation — the event is on a MAP_SHARED mmap that could theoretically
change between the bounds check and the loop.
Changes in v2:
- Snapshot event->header.size via READ_ONCE() into a local variable
to prevent a double-fetch underflow in the max_entries calculation
(Reported-by: sashiko-bot@kernel.org)
- Write back clamped nr_entries to the event on the swap path,
consistent with NAMESPACES and STAT_CONFIG handlers — without
writeback the native path sees the inflated nr and skips the
event entirely (Reported-by: sashiko-bot@kernel.org)
Fixes: ab38e84ba9a8 ("perf record: collect BPF metadata from existing BPF programs")
Reported-by: sashiko-bot@kernel.org # Running on a local machine
Reviewed-by: Ian Rogers <irogers@google.com>
Cc: Blake Jones <blakejones@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Assisted-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools')
| -rw-r--r-- | tools/perf/util/session.c | 89 |
1 files changed, 88 insertions, 1 deletions
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 092fccbea8f80..95eb793026de6 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -961,6 +961,48 @@ static int perf_event__time_conv_swap(union perf_event *event, return 0; } +static int perf_event__bpf_metadata_swap(union perf_event *event, + bool sample_id_all __maybe_unused) +{ + u64 i, nr, max_nr; + + /* Fixed header must fit before accessing nr_entries or prog_name */ + if (event->header.size < sizeof(event->bpf_metadata)) + return -1; + + event->bpf_metadata.nr_entries = bswap_64(event->bpf_metadata.nr_entries); + + /* + * Ensure NUL-termination on the cross-endian path where the + * mapping is writable (MAP_PRIVATE + PROT_WRITE). Fixing + * the string in place is preferred over rejecting because it + * preserves the event for downstream processing — only the + * last byte is lost. + * + * The native-endian path (MAP_SHARED + PROT_READ) cannot + * write, so it validates and skips unterminated events in + * perf_session__process_user_event() instead. The two + * strategies produce different outcomes for the same + * malformed input (fix vs skip), which is inherent in the + * writable-vs-read-only mapping model. + */ + event->bpf_metadata.prog_name[BPF_PROG_NAME_LEN - 1] = '\0'; + + nr = event->bpf_metadata.nr_entries; + max_nr = (event->header.size - sizeof(event->bpf_metadata)) / + sizeof(event->bpf_metadata.entries[0]); + if (nr > max_nr) { + /* Persist clamped value so the native path processes entries, not skips */ + nr = max_nr; + event->bpf_metadata.nr_entries = nr; + } + + for (i = 0; i < nr; i++) { + event->bpf_metadata.entries[i].key[BPF_METADATA_KEY_LEN - 1] = '\0'; + event->bpf_metadata.entries[i].value[BPF_METADATA_VALUE_LEN - 1] = '\0'; + } + return 0; +} static int perf_event__schedstat_cpu_swap(union perf_event *event __maybe_unused, bool sample_id_all __maybe_unused) @@ -1060,6 +1102,7 @@ static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_STAT_ROUND] = perf_event__stat_round_swap, [PERF_RECORD_EVENT_UPDATE] = perf_event__event_update_swap, [PERF_RECORD_TIME_CONV] = perf_event__time_conv_swap, + [PERF_RECORD_BPF_METADATA] = perf_event__bpf_metadata_swap, [PERF_RECORD_SCHEDSTAT_CPU] = perf_event__schedstat_cpu_swap, [PERF_RECORD_SCHEDSTAT_DOMAIN] = perf_event__schedstat_domain_swap, [PERF_RECORD_HEADER_MAX] = NULL, @@ -2203,9 +2246,53 @@ static s64 perf_session__process_user_event(struct perf_session *session, case PERF_RECORD_FINISHED_INIT: err = tool->finished_init(tool, session, event); break; - case PERF_RECORD_BPF_METADATA: + case PERF_RECORD_BPF_METADATA: { + u64 nr_entries, max_entries; + u32 hdr_size = READ_ONCE(event->header.size); + + if (hdr_size < sizeof(event->bpf_metadata)) { + pr_warning("WARNING: PERF_RECORD_BPF_METADATA: header.size (%u) too small, skipping\n", + hdr_size); + err = 0; + break; + } + + /* + * Native-endian files are mmap'd read-only — validate + * NUL-termination instead of writing. + */ + if (strnlen(event->bpf_metadata.prog_name, + BPF_PROG_NAME_LEN) == BPF_PROG_NAME_LEN) { + pr_warning("WARNING: PERF_RECORD_BPF_METADATA: prog_name not null-terminated, skipping\n"); + err = 0; + break; + } + + /* Snapshot — event is mmap'd and could change between reads */ + nr_entries = READ_ONCE(event->bpf_metadata.nr_entries); + max_entries = (hdr_size - sizeof(event->bpf_metadata)) / + sizeof(event->bpf_metadata.entries[0]); + if (nr_entries > max_entries) { + pr_warning("WARNING: PERF_RECORD_BPF_METADATA: nr_entries %" PRIu64 " exceeds max %" PRIu64 ", skipping\n", + nr_entries, max_entries); + err = 0; + break; + } + + for (u64 i = 0; i < nr_entries; i++) { + if (strnlen(event->bpf_metadata.entries[i].key, + BPF_METADATA_KEY_LEN) == BPF_METADATA_KEY_LEN || + strnlen(event->bpf_metadata.entries[i].value, + BPF_METADATA_VALUE_LEN) == BPF_METADATA_VALUE_LEN) { + pr_warning("WARNING: PERF_RECORD_BPF_METADATA: entry %" PRIu64 " key/value not null-terminated, skipping\n", i); + err = 0; + goto out; + } + } + err = tool->bpf_metadata(tool, session, event); break; + } case PERF_RECORD_SCHEDSTAT_CPU: err = tool->schedstat_cpu(tool, session, event); break; |
