aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
authorArnaldo Carvalho de Melo <acme@redhat.com>2026-05-02 12:57:48 -0300
committerArnaldo Carvalho de Melo <acme@redhat.com>2026-05-29 11:44:29 -0300
commit0dcd3a131c8acd9e6aa62ec408af64cd9292d9ff (patch)
treebf87c94e74d24c46ce1aeb5b5d5e26f9f4cc45ac /tools
parentb128a6e3b619ac766452f38188fa0b3e4d67862d (diff)
downloadlinux-next-history-0dcd3a131c8acd9e6aa62ec408af64cd9292d9ff.tar.gz
perf session: Fix PERF_RECORD_READ swap and dump for variable-length events
The kernel dynamically sizes PERF_RECORD_READ based on attr.read_format: only the fields enabled by PERF_FORMAT_TOTAL_TIME_ENABLED, PERF_FORMAT_TOTAL_TIME_RUNNING, PERF_FORMAT_ID, and PERF_FORMAT_LOST are emitted, packed with no gaps. perf_event__read_swap() unconditionally byte-swapped time_enabled, time_running, and id at their fixed struct offsets, causing out-of-bounds access on smaller events and swapping the wrong bytes when not all format fields are present. It also swapped sample_id_all at a fixed offset past the full struct, which is wrong for shorter events. Replace the individual field swaps with a single mem_bswap_64() over the entire tail from value onward. Since every field after pid/tid is u64 regardless of which combination is present, this correctly handles any read_format combination and any trailing sample_id_all fields. Similarly, dump_read() accessed optional fields via fixed struct offsets, displaying values from wrong positions when not all format bits are set. Walk the packed u64 array sequentially instead, with bounds checks against event->header.size. Reviewed-by: Ian Rogers <irogers@google.com> Cc: Jiri Olsa <jolsa@kernel.org> Cc: Namhyung Kim <namhyung@kernel.org> Assisted-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Diffstat (limited to 'tools')
-rw-r--r--tools/perf/util/session.c61
1 files changed, 44 insertions, 17 deletions
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index c4cd8ad6d810a..24f2ba599b807 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -354,17 +354,24 @@ static void perf_event__task_swap(union perf_event *event, bool sample_id_all)
swap_sample_id_all(event, &event->fork + 1);
}
-static void perf_event__read_swap(union perf_event *event, bool sample_id_all)
+static void perf_event__read_swap(union perf_event *event,
+ bool sample_id_all __maybe_unused)
{
+ size_t tail;
+
event->read.pid = bswap_32(event->read.pid);
event->read.tid = bswap_32(event->read.tid);
- event->read.value = bswap_64(event->read.value);
- event->read.time_enabled = bswap_64(event->read.time_enabled);
- event->read.time_running = bswap_64(event->read.time_running);
- event->read.id = bswap_64(event->read.id);
-
- if (sample_id_all)
- swap_sample_id_all(event, &event->read + 1);
+ /*
+ * Everything after pid/tid is u64: the read values (variable
+ * set determined by attr.read_format, which we don't have
+ * here) optionally followed by sample_id_all fields.
+ * Since all are u64, swap the entire remaining tail at once.
+ */
+ tail = event->header.size - offsetof(struct perf_record_read, value);
+ /* mem_bswap_64 rounds up to 8-byte chunks — unaligned tail overruns the buffer */
+ if (tail % sizeof(u64))
+ return;
+ mem_bswap_64(&event->read.value, tail);
}
static void perf_event__aux_swap(union perf_event *event, bool sample_id_all)
@@ -1200,8 +1207,9 @@ static void dump_deferred_callchain(union perf_event *event, struct perf_sample
static void dump_read(struct evsel *evsel, union perf_event *event)
{
- struct perf_record_read *read_event = &event->read;
u64 read_format;
+ __u64 *array;
+ void *end;
if (!dump_trace)
return;
@@ -1213,18 +1221,37 @@ static void dump_read(struct evsel *evsel, union perf_event *event)
return;
read_format = evsel->core.attr.read_format;
+ /*
+ * The kernel packs only the enabled read_format fields
+ * after value, with no gaps. Walk the packed array
+ * instead of using fixed struct offsets.
+ */
+ array = &event->read.value + 1;
+ end = (void *)event + event->header.size;
- if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
- printf("... time enabled : %" PRI_lu64 "\n", read_event->time_enabled);
+ if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
+ if ((void *)(array + 1) > end)
+ return;
+ printf("... time enabled : %" PRI_lu64 "\n", *array++);
+ }
- if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
- printf("... time running : %" PRI_lu64 "\n", read_event->time_running);
+ if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
+ if ((void *)(array + 1) > end)
+ return;
+ printf("... time running : %" PRI_lu64 "\n", *array++);
+ }
- if (read_format & PERF_FORMAT_ID)
- printf("... id : %" PRI_lu64 "\n", read_event->id);
+ if (read_format & PERF_FORMAT_ID) {
+ if ((void *)(array + 1) > end)
+ return;
+ printf("... id : %" PRI_lu64 "\n", *array++);
+ }
- if (read_format & PERF_FORMAT_LOST)
- printf("... lost : %" PRI_lu64 "\n", read_event->lost);
+ if (read_format & PERF_FORMAT_LOST) {
+ if ((void *)(array + 1) > end)
+ return;
+ printf("... lost : %" PRI_lu64 "\n", *array++);
+ }
}
static struct machine *machines__find_for_cpumode(struct machines *machines,