aboutsummaryrefslogtreecommitdiffstats
path: root/init
diff options
authorLinus Torvalds <torvalds@linux-foundation.org>2026-06-15 03:59:45 +0530
committerLinus Torvalds <torvalds@linux-foundation.org>2026-06-15 03:59:45 +0530
commit7e0e7bd60d4a812b694c477716597fcb038b00cb (patch)
tree4ff61d47485803e7dacab1c8ddef0a4c11b512da /init
parentff8747aacaff8266dd751b8a8648fb728dcc3b21 (diff)
parentaa5c4fe3ba0cb2af90bbcfa7a8ef4fefcd5c2370 (diff)
downloadath-7e0e7bd60d4a812b694c477716597fcb038b00cb.tar.gz
Merge tag 'vfs-7.2-rc1.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull misc vfs updates from Christian Brauner: "Features: - Reduce pipe->mutex contention by pre-allocating pages outside the lock in anon_pipe_write(). anon_pipe_write() called alloc_page() once per page while holding pipe->mutex. The allocation can sleep doing direct reclaim and runs memcg charging, which extends the critical section and stalls any concurrent reader on the same mutex. Now up to 8 pages are pre-allocated before the mutex is taken, leftovers are recycled into the per-pipe tmp_page[] cache before unlock, and any remainder is released after unlock, keeping the allocator out of the critical section on both sides. On a writers x readers sweep with 64KB writes against a 1 MB pipe throughput improves 6-28% and average write latency drops 5-22%; under memory pressure - when the cost of holding the mutex across reclaim is highest - throughput improves 21-48% and latency drops 17-33%. The microbenchmark is added to selftests. - uaccess/sockptr: fix the ignored_trailing logic in copy_struct_to_user() to behave as documented and the usize check in copy_struct_from_sockptr() for user pointers, and add copy_struct_{from,to}_bounce_buffer() and copy_struct_to_sockptr() helpers for upcoming users (IPPROTO_SMBDIRECT, IPPROTO_QUIC). - bpf: add a sleepable bpf_real_inode() kfunc that resolves the real inode backing a dentry via d_real_inode(). On overlayfs the inode attached to the dentry doesn't carry the underlying device information; this is used by the filesystem restriction BPF program that was merged into systemd. - docs: add guidelines for submitting new filesystems, motivated by the maintenance burden abandoned and untestable filesystems impose on VFS developers, blocking infrastructure work like folio conversions and iomap migration. Fixes: - libfs: set SB_I_NOEXEC and SB_I_NODEV by default in init_pseudo() and drop the now-redundant assignments in callers. This began as a one-line dma-buf fix for a path_noexec() warning; a pseudo filesystem has no reason not to set SB_I_NOEXEC. All init_pseudo() callers were audited: the only visible effect is on dma-buf where SB_I_NOEXEC silences the warning. - Handle set_blocksize() failures in legacy filesystems (bfs, hpfs, qnx4, jfs, befs, affs, isofs, minix, ntfs3, omfs). Mounting a device with a sector size > PAGE_SIZE crashed roughly half of them; the rest had the same missing error handling pattern. Plus a follow-up releasing the superblock buffer_head when setting the minix v3 block size fails. - mount: honour SB_NOUSER in the new mount API. - fs/fcntl: fix a SOFTIRQ-unsafe lock order in fasync signaling by switching the process-group paths of send_sigio() and send_sigurg() from read_lock(&tasklist_lock) to RCU, matching the single-PID path. - vfs: add an FS_USERNS_DELEGATABLE flag and set it for NFS, fixing delegated NFS mounts (fsopen() in a container with the mount performed by a privileged daemon) that broke when non-init s_user_ns was tied to FS_USERNS_MOUNT. - selftests/namespaces: fix a hang in nsid_test where an unreaped grandchild kept the TAP pipe write-end open, a waitpid(-1) race in listns_efault_test, and a false FAIL on kernels without listns() where the tests should SKIP. - filelock: fix the break_lease() stub signature for CONFIG_FILE_LOCKING=n. - init/initramfs_test: wait for the async initramfs unpacking before running; the test and do_populate_rootfs() share the parser state. - fs/coredump: reduce redundant log noise in validate_coredump_safety(). - iomap: pass the correct length to fserror_report_io() in __iomap_write_begin(). - backing-file: fix the backing_file_open() kerneldoc. Cleanups: - initramfs: refactor the cpio hex header parsing to use hex2bin() instead of the hand-rolled simple_strntoul() which is reverted, and extend the initramfs KUnit tests to cover header fields with 0x prefixes. - Replace __get_free_pages() and friends with kmalloc()/kzalloc() across quota, proc, ocfs2/dlm, nilfs2, nfs, nfsd, libfs, jfs, jbd2, isofs, fuse, select, namespace, configfs, binfmt_misc, bfs, and the do_mounts init code - part of the larger work of replacing page allocator calls with kmalloc(). - Use clear_and_wake_up_bit() in unlock_buffer() and journal_end_buffer_io_sync() instead of open-coding the sequence. - Drop unused VFS exports: unexport drop_super_exclusive(), remove start_removing_user_path_at(), and fold __start_removing_path() into start_removing_path(). - fs/read_write: narrow the __kernel_write() export with EXPORT_SYMBOL_FOR_MODULES(). - vfs: uapi: retire octal and hex constants in favor of (1 << n) for the O_ flags. Finding a free bit for a new flag across the architectures was needlessly hard with the mixed bases. - dcache: add extra sanity checks of dead dentries in dentry_free() via a new DENTRY_WARN_ONCE() that also prints d_flags. - iov_iter: use kmemdup_array() in dup_iter() to harden the allocation against multiplication overflow. - fs/pipe: write to ->poll_usage only once. - vfs: remove an always-taken if-branch in find_next_fd(). - dcache: use kmalloc_flex() for struct external_name in __d_alloc(). - namei: use QSTR() instead of QSTR_INIT() in path_pts(). - sync_file_range: delete dead S_ISLNK code. - Comment fixes: retire a stale comment in fget_task_next() and fix assorted spelling mistakes" * tag 'vfs-7.2-rc1.misc' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (73 commits) backing-file: fix backing_file_open() kerneldoc parameter iomap: pass the correct len to fserror_report_io in __iomap_write_begin vfs: add FS_USERNS_DELEGATABLE flag and set it for NFS filelock: fix break_lease() stub signature for CONFIG_FILE_LOCKING=n vfs: uapi: retire octal and hex numbers in favor of (1 << n) for O_ flags bpf: add bpf_real_inode() kfunc fs/read_write: Do not export __kernel_write() to the entire world libfs: drop redundant SB_I_NOEXEC/SB_I_NODEV in init_pseudo() callers libfs: set SB_I_NOEXEC and SB_I_NODEV by default in init_pseudo() mount: honour SB_NOUSER in the new mount API fs/fcntl: fix SOFTIRQ-unsafe lock order in fasync signaling selftests/pipe: add pipe_bench microbenchmark fs/pipe: pre-allocate pages outside pipe->mutex in anon_pipe_write fs: retire stale comment in fget_task_next() fs: fix spelling mistakes in comment bfs: replace get_zeroed_page() with kzalloc() binfmt_misc: replace __get_free_page() with kmalloc() configfs: replace __get_free_pages() with kzalloc() fs/namespace: use __getname() to allocate mntpath buffer fs/select: replace __get_free_page() with kmalloc() ...
Diffstat (limited to 'init')
-rw-r--r--init/do_mounts.c21
-rw-r--r--init/initramfs.c68
-rw-r--r--init/initramfs_test.c97
3 files changed, 129 insertions, 57 deletions
diff --git a/init/do_mounts.c b/init/do_mounts.c
index 55ed3ac0b70fd..95e0b3a0f711b 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -143,16 +143,14 @@ static int __init do_mount_root(const char *name, const char *fs,
const int flags, const void *data)
{
struct super_block *s;
- struct page *p = NULL;
char *data_page = NULL;
int ret;
if (data) {
/* init_mount() requires a full page as fifth argument */
- p = alloc_page(GFP_KERNEL);
- if (!p)
+ data_page = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!data_page)
return -ENOMEM;
- data_page = page_address(p);
strscpy_pad(data_page, data, PAGE_SIZE);
}
@@ -170,19 +168,20 @@ static int __init do_mount_root(const char *name, const char *fs,
MAJOR(ROOT_DEV), MINOR(ROOT_DEV));
out:
- if (p)
- put_page(p);
+ kfree(data_page);
return ret;
}
void __init mount_root_generic(char *name, char *pretty_name, int flags)
{
- struct page *page = alloc_page(GFP_KERNEL);
- char *fs_names = page_address(page);
+ char *fs_names = kmalloc(PAGE_SIZE, GFP_KERNEL);
char *p;
char b[BDEVNAME_SIZE];
int num_fs, i;
+ if (!fs_names)
+ panic("VFS: Unable to mount root fs: not enough memory");
+
scnprintf(b, BDEVNAME_SIZE, "unknown-block(%u,%u)",
MAJOR(ROOT_DEV), MINOR(ROOT_DEV));
if (root_fs_names)
@@ -242,7 +241,7 @@ retry:
printk("\n");
panic("VFS: Unable to mount root fs on \"%s\" or %s", pretty_name, b);
out:
- put_page(page);
+ kfree(fs_names);
}
#ifdef CONFIG_ROOT_NFS
@@ -343,7 +342,7 @@ static int __init mount_nodev_root(char *root_device_name)
int err = -EINVAL;
int num_fs, i;
- fs_names = (void *)__get_free_page(GFP_KERNEL);
+ fs_names = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!fs_names)
return -EINVAL;
num_fs = split_fs_names(fs_names, PAGE_SIZE);
@@ -360,7 +359,7 @@ static int __init mount_nodev_root(char *root_device_name)
break;
}
- free_page((unsigned long)fs_names);
+ kfree(fs_names);
return err;
}
diff --git a/init/initramfs.c b/init/initramfs.c
index 58db15fb18fd8..20a18fcda48ee 100644
--- a/init/initramfs.c
+++ b/init/initramfs.c
@@ -1,25 +1,28 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/init.h>
#include <linux/async.h>
-#include <linux/export.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-#include <linux/fcntl.h>
#include <linux/delay.h>
-#include <linux/string.h>
#include <linux/dirent.h>
-#include <linux/syscalls.h>
-#include <linux/utime.h>
+#include <linux/export.h>
+#include <linux/fcntl.h>
#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/hex.h>
+#include <linux/init.h>
+#include <linux/init_syscalls.h>
#include <linux/kstrtox.h>
#include <linux/memblock.h>
#include <linux/mm.h>
#include <linux/namei.h>
-#include <linux/init_syscalls.h>
-#include <linux/umh.h>
-#include <linux/security.h>
#include <linux/overflow.h>
+#include <linux/security.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/syscalls.h>
+#include <linux/types.h>
+#include <linux/umh.h>
+#include <linux/utime.h>
+
+#include <asm/byteorder.h>
#include "do_mounts.h"
#include "initramfs_internal.h"
@@ -190,26 +193,30 @@ static __initdata gid_t gid;
static __initdata unsigned rdev;
static __initdata u32 hdr_csum;
-static void __init parse_header(char *s)
+static int __init parse_header(char *s)
{
- unsigned long parsed[13];
- int i;
+ __be32 header[13];
+ int ret;
- for (i = 0, s += 6; i < 13; i++, s += 8)
- parsed[i] = simple_strntoul(s, NULL, 16, 8);
+ ret = hex2bin((u8 *)header, s + 6, sizeof(header));
+ if (ret) {
+ error("damaged header");
+ return ret;
+ }
- ino = parsed[0];
- mode = parsed[1];
- uid = parsed[2];
- gid = parsed[3];
- nlink = parsed[4];
- mtime = parsed[5]; /* breaks in y2106 */
- body_len = parsed[6];
- major = parsed[7];
- minor = parsed[8];
- rdev = new_encode_dev(MKDEV(parsed[9], parsed[10]));
- name_len = parsed[11];
- hdr_csum = parsed[12];
+ ino = be32_to_cpu(header[0]);
+ mode = be32_to_cpu(header[1]);
+ uid = be32_to_cpu(header[2]);
+ gid = be32_to_cpu(header[3]);
+ nlink = be32_to_cpu(header[4]);
+ mtime = be32_to_cpu(header[5]); /* breaks in y2106 */
+ body_len = be32_to_cpu(header[6]);
+ major = be32_to_cpu(header[7]);
+ minor = be32_to_cpu(header[8]);
+ rdev = new_encode_dev(MKDEV(be32_to_cpu(header[9]), be32_to_cpu(header[10])));
+ name_len = be32_to_cpu(header[11]);
+ hdr_csum = be32_to_cpu(header[12]);
+ return 0;
}
/* Finite-state machine */
@@ -289,7 +296,8 @@ static int __init do_header(void)
error("no cpio magic");
return 1;
}
- parse_header(collected);
+ if (parse_header(collected))
+ return 1;
next_header = this_header + N_ALIGN(name_len) + body_len;
next_header = (next_header + 3) & ~3;
state = SkipIt;
diff --git a/init/initramfs_test.c b/init/initramfs_test.c
index 2ce38d9a8fd0f..bc55306d226dc 100644
--- a/init/initramfs_test.c
+++ b/init/initramfs_test.c
@@ -3,7 +3,9 @@
#include <linux/fcntl.h>
#include <linux/file.h>
#include <linux/fs.h>
+#include <linux/init.h>
#include <linux/init_syscalls.h>
+#include <linux/initrd.h>
#include <linux/stringify.h>
#include <linux/timekeeping.h>
#include "initramfs_internal.h"
@@ -27,7 +29,18 @@ struct initramfs_test_cpio {
char *data;
};
-static size_t fill_cpio(struct initramfs_test_cpio *cs, size_t csz, char *out)
+/* regular newc header format */
+#define CPIO_HDR_FMT "%s%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%s"
+/*
+ * Bogus newc header with "0x" prefixes on the uid, gid, and namesize values.
+ * parse_header()/simple_str[n]toul() accepted this, contrary to the initramfs
+ * specification. hex2bin() now fails.
+ */
+#define CPIO_HDR_OX_INJECT \
+ "%s%08x%08x0x%06x0X%06x%08x%08x%08x%08x%08x%08x%08x0x%06x%08x%s"
+
+static size_t fill_cpio(struct initramfs_test_cpio *cs, size_t csz,
+ bool inject_ox, char *out)
{
int i;
size_t off = 0;
@@ -38,9 +51,8 @@ static size_t fill_cpio(struct initramfs_test_cpio *cs, size_t csz, char *out)
size_t thislen;
/* +1 to account for nulterm */
- thislen = sprintf(pos, "%s"
- "%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x%08x"
- "%s",
+ thislen = sprintf(pos,
+ inject_ox ? CPIO_HDR_OX_INJECT : CPIO_HDR_FMT,
c->magic, c->ino, c->mode, c->uid, c->gid, c->nlink,
c->mtime, c->filesize, c->devmajor, c->devminor,
c->rdevmajor, c->rdevminor, c->namesize, c->csum,
@@ -102,7 +114,7 @@ static void __init initramfs_test_extract(struct kunit *test)
/* +3 to cater for any 4-byte end-alignment */
cpio_srcbuf = kzalloc(ARRAY_SIZE(c) * (CPIO_HDRLEN + PATH_MAX + 3),
GFP_KERNEL);
- len = fill_cpio(c, ARRAY_SIZE(c), cpio_srcbuf);
+ len = fill_cpio(c, ARRAY_SIZE(c), false, cpio_srcbuf);
ktime_get_real_ts64(&ts_before);
err = unpack_to_rootfs(cpio_srcbuf, len);
@@ -177,7 +189,7 @@ static void __init initramfs_test_fname_overrun(struct kunit *test)
/* limit overrun to avoid crashes / filp_open() ENAMETOOLONG */
cpio_srcbuf[CPIO_HDRLEN + strlen(c[0].fname) + 20] = '\0';
- len = fill_cpio(c, ARRAY_SIZE(c), cpio_srcbuf);
+ len = fill_cpio(c, ARRAY_SIZE(c), false, cpio_srcbuf);
/* overwrite trailing fname terminator and padding */
suffix_off = len - 1;
while (cpio_srcbuf[suffix_off] == '\0') {
@@ -219,7 +231,7 @@ static void __init initramfs_test_data(struct kunit *test)
cpio_srcbuf = kmalloc(CPIO_HDRLEN + c[0].namesize + c[0].filesize + 6,
GFP_KERNEL);
- len = fill_cpio(c, ARRAY_SIZE(c), cpio_srcbuf);
+ len = fill_cpio(c, ARRAY_SIZE(c), false, cpio_srcbuf);
err = unpack_to_rootfs(cpio_srcbuf, len);
KUNIT_EXPECT_NULL(test, err);
@@ -274,7 +286,7 @@ static void __init initramfs_test_csum(struct kunit *test)
cpio_srcbuf = kmalloc(8192, GFP_KERNEL);
- len = fill_cpio(c, ARRAY_SIZE(c), cpio_srcbuf);
+ len = fill_cpio(c, ARRAY_SIZE(c), false, cpio_srcbuf);
err = unpack_to_rootfs(cpio_srcbuf, len);
KUNIT_EXPECT_NULL(test, err);
@@ -284,7 +296,7 @@ static void __init initramfs_test_csum(struct kunit *test)
/* mess up the csum and confirm that unpack fails */
c[0].csum--;
- len = fill_cpio(c, ARRAY_SIZE(c), cpio_srcbuf);
+ len = fill_cpio(c, ARRAY_SIZE(c), false, cpio_srcbuf);
err = unpack_to_rootfs(cpio_srcbuf, len);
KUNIT_EXPECT_NOT_NULL(test, err);
@@ -306,7 +318,7 @@ static void __init initramfs_test_hardlink(struct kunit *test)
{
char *err, *cpio_srcbuf;
size_t len;
- struct kstat st0, st1;
+ struct kstat st0 = {}, st1 = {};
struct initramfs_test_cpio c[] = { {
.magic = "070701",
.ino = 1,
@@ -330,7 +342,7 @@ static void __init initramfs_test_hardlink(struct kunit *test)
cpio_srcbuf = kmalloc(8192, GFP_KERNEL);
- len = fill_cpio(c, ARRAY_SIZE(c), cpio_srcbuf);
+ len = fill_cpio(c, ARRAY_SIZE(c), false, cpio_srcbuf);
err = unpack_to_rootfs(cpio_srcbuf, len);
KUNIT_EXPECT_NULL(test, err);
@@ -371,7 +383,7 @@ static void __init initramfs_test_many(struct kunit *test)
};
c.namesize = 1 + sprintf(thispath, "initramfs_test_many-%d", i);
- p += fill_cpio(&c, 1, p);
+ p += fill_cpio(&c, 1, false, p);
}
len = p - cpio_srcbuf;
@@ -425,7 +437,7 @@ static void __init initramfs_test_fname_pad(struct kunit *test)
} };
memcpy(tbufs->padded_fname, "padded_fname", sizeof("padded_fname"));
- len = fill_cpio(c, ARRAY_SIZE(c), tbufs->cpio_srcbuf);
+ len = fill_cpio(c, ARRAY_SIZE(c), false, tbufs->cpio_srcbuf);
err = unpack_to_rootfs(tbufs->cpio_srcbuf, len);
KUNIT_EXPECT_NULL(test, err);
@@ -451,7 +463,7 @@ static void __init initramfs_test_fname_path_max(struct kunit *test)
{
char *err;
size_t len;
- struct kstat st0, st1;
+ struct kstat st0 = {}, st1 = {};
char fdata[] = "this file data will not be unpacked";
struct test_fname_path_max {
char fname_oversize[PATH_MAX + 1];
@@ -481,7 +493,7 @@ static void __init initramfs_test_fname_path_max(struct kunit *test)
memcpy(tbufs->fname_oversize, "fname_oversize",
sizeof("fname_oversize") - 1);
memcpy(tbufs->fname_ok, "fname_ok", sizeof("fname_ok") - 1);
- len = fill_cpio(c, ARRAY_SIZE(c), tbufs->cpio_src);
+ len = fill_cpio(c, ARRAY_SIZE(c), false, tbufs->cpio_src);
/* unpack skips over fname_oversize instead of returning an error */
err = unpack_to_rootfs(tbufs->cpio_src, len);
@@ -494,6 +506,45 @@ static void __init initramfs_test_fname_path_max(struct kunit *test)
kfree(tbufs);
}
+static void __init initramfs_test_hdr_hex(struct kunit *test)
+{
+ char *err;
+ size_t len;
+ char fdata[] = "this file data will not be unpacked";
+ struct initramfs_test_bufs {
+ char cpio_src[(CPIO_HDRLEN + PATH_MAX + 3 + sizeof(fdata)) * 2];
+ } *tbufs = kzalloc(sizeof(struct initramfs_test_bufs), GFP_KERNEL);
+ struct initramfs_test_cpio c[] = { {
+ .magic = "070701",
+ .ino = 1,
+ .mode = S_IFREG | 0777,
+ .uid = 0x123456,
+ .gid = 0x123457,
+ .nlink = 1,
+ .namesize = sizeof("initramfs_test_hdr_hex_0"),
+ .fname = "initramfs_test_hdr_hex_0",
+ .filesize = sizeof(fdata),
+ .data = fdata,
+ }, {
+ .magic = "070701",
+ .ino = 2,
+ .mode = S_IFDIR | 0777,
+ .uid = 0x000056,
+ .gid = 0x000057,
+ .nlink = 1,
+ .namesize = sizeof("initramfs_test_hdr_hex_1"),
+ .fname = "initramfs_test_hdr_hex_1",
+ } };
+
+ /* inject_ox=true to add "0x" cpio field prefixes */
+ len = fill_cpio(c, ARRAY_SIZE(c), true, tbufs->cpio_src);
+
+ err = unpack_to_rootfs(tbufs->cpio_src, len);
+ KUNIT_EXPECT_NOT_NULL(test, err);
+
+ kfree(tbufs);
+}
+
/*
* The kunit_case/_suite struct cannot be marked as __initdata as this will be
* used in debugfs to retrieve results after test has run.
@@ -507,11 +558,25 @@ static struct kunit_case __refdata initramfs_test_cases[] = {
KUNIT_CASE(initramfs_test_many),
KUNIT_CASE(initramfs_test_fname_pad),
KUNIT_CASE(initramfs_test_fname_path_max),
+ KUNIT_CASE(initramfs_test_hdr_hex),
{},
};
-static struct kunit_suite initramfs_test_suite = {
+static int __init initramfs_test_init(struct kunit_suite *suite)
+{
+ /*
+ * unpack_to_rootfs() uses module-static state (victim, byte_count,
+ * state, ...). The boot-time async do_populate_rootfs() may still be
+ * running, so wait for it to finish before we call unpack_to_rootfs()
+ * from the test thread, otherwise the two writers race and crash.
+ */
+ wait_for_initramfs();
+ return 0;
+}
+
+static struct kunit_suite __refdata initramfs_test_suite = {
.name = "initramfs",
+ .suite_init = initramfs_test_init,
.test_cases = initramfs_test_cases,
};
kunit_test_init_section_suites(&initramfs_test_suite);