aboutsummaryrefslogtreecommitdiffstats
path: root/tools
diff options
authorMark Brown <broonie@kernel.org>2026-05-29 14:59:13 +0100
committerMark Brown <broonie@kernel.org>2026-05-29 14:59:13 +0100
commitd692b8845cb3cfa4d005a5a360a26f692b09d178 (patch)
tree01330ed4686ce3e141e8bf069b7bc11bf770f6f2 /tools
parent98e9aea604a182a40f8fb4b77ac426b3fdf00031 (diff)
parent4bd540bd9a0d7a2e8403a139e9f7631b06a57e89 (diff)
downloadlinux-next-history-d692b8845cb3cfa4d005a5a360a26f692b09d178.tar.gz
next-20260522/vfs-brauner
# Conflicts: # fs/fuse/dev.c
Diffstat (limited to 'tools')
-rw-r--r--tools/arch/alpha/include/uapi/asm/errno.h2
-rw-r--r--tools/arch/mips/include/uapi/asm/errno.h2
-rw-r--r--tools/arch/parisc/include/uapi/asm/errno.h2
-rw-r--r--tools/arch/sparc/include/uapi/asm/errno.h2
-rw-r--r--tools/include/uapi/asm-generic/errno.h2
-rw-r--r--tools/include/uapi/linux/openat2.h43
-rw-r--r--tools/testing/selftests/filesystems/openat2/.gitignore (renamed from tools/testing/selftests/openat2/.gitignore)0
-rw-r--r--tools/testing/selftests/filesystems/openat2/Makefile (renamed from tools/testing/selftests/openat2/Makefile)9
-rw-r--r--tools/testing/selftests/filesystems/openat2/emptypath_test.c77
-rw-r--r--tools/testing/selftests/filesystems/openat2/helpers.h135
-rw-r--r--tools/testing/selftests/filesystems/openat2/openat2_test.c (renamed from tools/testing/selftests/openat2/openat2_test.c)258
-rw-r--r--tools/testing/selftests/filesystems/openat2/rename_attack_test.c159
-rw-r--r--tools/testing/selftests/filesystems/openat2/resolve_test.c (renamed from tools/testing/selftests/openat2/resolve_test.c)354
-rw-r--r--tools/testing/selftests/namespaces/listns_efault_test.c33
-rw-r--r--tools/testing/selftests/namespaces/nsid_test.c14
-rw-r--r--tools/testing/selftests/openat2/helpers.c109
-rw-r--r--tools/testing/selftests/openat2/helpers.h108
-rw-r--r--tools/testing/selftests/openat2/rename_attack_test.c160
-rw-r--r--tools/testing/selftests/pid_namespace/pid_max.c156
19 files changed, 929 insertions, 696 deletions
diff --git a/tools/arch/alpha/include/uapi/asm/errno.h b/tools/arch/alpha/include/uapi/asm/errno.h
index 6791f6508632e..1a99f38813c79 100644
--- a/tools/arch/alpha/include/uapi/asm/errno.h
+++ b/tools/arch/alpha/include/uapi/asm/errno.h
@@ -127,4 +127,6 @@
#define EHWPOISON 139 /* Memory page has hardware error */
+#define EFTYPE 140 /* Wrong file type for the intended operation */
+
#endif
diff --git a/tools/arch/mips/include/uapi/asm/errno.h b/tools/arch/mips/include/uapi/asm/errno.h
index c01ed91b1ef44..1835a50b69cef 100644
--- a/tools/arch/mips/include/uapi/asm/errno.h
+++ b/tools/arch/mips/include/uapi/asm/errno.h
@@ -126,6 +126,8 @@
#define EHWPOISON 168 /* Memory page has hardware error */
+#define EFTYPE 169 /* Wrong file type for the intended operation */
+
#define EDQUOT 1133 /* Quota exceeded */
diff --git a/tools/arch/parisc/include/uapi/asm/errno.h b/tools/arch/parisc/include/uapi/asm/errno.h
index 8cbc07c1903e4..93194fbb0a80d 100644
--- a/tools/arch/parisc/include/uapi/asm/errno.h
+++ b/tools/arch/parisc/include/uapi/asm/errno.h
@@ -124,4 +124,6 @@
#define EHWPOISON 257 /* Memory page has hardware error */
+#define EFTYPE 258 /* Wrong file type for the intended operation */
+
#endif
diff --git a/tools/arch/sparc/include/uapi/asm/errno.h b/tools/arch/sparc/include/uapi/asm/errno.h
index 4a41e7835fd5b..71940ec9130b4 100644
--- a/tools/arch/sparc/include/uapi/asm/errno.h
+++ b/tools/arch/sparc/include/uapi/asm/errno.h
@@ -117,4 +117,6 @@
#define EHWPOISON 135 /* Memory page has hardware error */
+#define EFTYPE 136 /* Wrong file type for the intended operation */
+
#endif
diff --git a/tools/include/uapi/asm-generic/errno.h b/tools/include/uapi/asm-generic/errno.h
index 92e7ae493ee31..bd78e69e0a43f 100644
--- a/tools/include/uapi/asm-generic/errno.h
+++ b/tools/include/uapi/asm-generic/errno.h
@@ -122,4 +122,6 @@
#define EHWPOISON 133 /* Memory page has hardware error */
+#define EFTYPE 134 /* Wrong file type for the intended operation */
+
#endif
diff --git a/tools/include/uapi/linux/openat2.h b/tools/include/uapi/linux/openat2.h
new file mode 100644
index 0000000000000..4759c471676cc
--- /dev/null
+++ b/tools/include/uapi/linux/openat2.h
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _LINUX_OPENAT2_H
+#define _LINUX_OPENAT2_H
+
+#include <linux/types.h>
+
+/*
+ * Arguments for how openat2(2) should open the target path. If only @flags and
+ * @mode are non-zero, then openat2(2) operates very similarly to openat(2).
+ *
+ * However, unlike openat(2), unknown or invalid bits in @flags result in
+ * -EINVAL rather than being silently ignored. @mode must be zero unless one of
+ * {O_CREAT, O_TMPFILE} are set.
+ *
+ * @flags: O_* flags.
+ * @mode: O_CREAT/O_TMPFILE file mode.
+ * @resolve: RESOLVE_* flags.
+ */
+struct open_how {
+ __u64 flags;
+ __u64 mode;
+ __u64 resolve;
+};
+
+/* how->resolve flags for openat2(2). */
+#define RESOLVE_NO_XDEV 0x01 /* Block mount-point crossings
+ (includes bind-mounts). */
+#define RESOLVE_NO_MAGICLINKS 0x02 /* Block traversal through procfs-style
+ "magic-links". */
+#define RESOLVE_NO_SYMLINKS 0x04 /* Block traversal through all symlinks
+ (implies OEXT_NO_MAGICLINKS) */
+#define RESOLVE_BENEATH 0x08 /* Block "lexical" trickery like
+ "..", symlinks, and absolute
+ paths which escape the dirfd. */
+#define RESOLVE_IN_ROOT 0x10 /* Make all jumps to "/" and ".."
+ be scoped inside the dirfd
+ (similar to chroot(2)). */
+#define RESOLVE_CACHED 0x20 /* Only complete if resolution can be
+ completed through cached lookup. May
+ return -EAGAIN if that's not
+ possible. */
+
+#endif /* _LINUX_OPENAT2_H */
diff --git a/tools/testing/selftests/openat2/.gitignore b/tools/testing/selftests/filesystems/openat2/.gitignore
index 82a4846cbc4b2..82a4846cbc4b2 100644
--- a/tools/testing/selftests/openat2/.gitignore
+++ b/tools/testing/selftests/filesystems/openat2/.gitignore
diff --git a/tools/testing/selftests/openat2/Makefile b/tools/testing/selftests/filesystems/openat2/Makefile
index 185dc76ebb5fc..d848aac96bded 100644
--- a/tools/testing/selftests/openat2/Makefile
+++ b/tools/testing/selftests/filesystems/openat2/Makefile
@@ -1,7 +1,8 @@
# SPDX-License-Identifier: GPL-2.0-or-later
-CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined
-TEST_GEN_PROGS := openat2_test resolve_test rename_attack_test
+CFLAGS += $(KHDR_INCLUDES)
+CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined $(TOOLS_INCLUDES)
+TEST_GEN_PROGS := openat2_test resolve_test rename_attack_test emptypath_test
# gcc requires -static-libasan in order to ensure that Address Sanitizer's
# library is the first one loaded. However, clang already statically links the
@@ -13,6 +14,4 @@ endif
LOCAL_HDRS += helpers.h
-include ../lib.mk
-
-$(TEST_GEN_PROGS): helpers.c
+include ../../lib.mk
diff --git a/tools/testing/selftests/filesystems/openat2/emptypath_test.c b/tools/testing/selftests/filesystems/openat2/emptypath_test.c
new file mode 100644
index 0000000000000..be37ccba57ecf
--- /dev/null
+++ b/tools/testing/selftests/filesystems/openat2/emptypath_test.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/stat.h>
+
+#include "kselftest_harness.h"
+
+#ifndef O_EMPTYPATH
+#define O_EMPTYPATH (1 << 26)
+#endif
+
+#define EMPTYPATH_TEST_FILE "/tmp/emptypath_test"
+
+FIXTURE(emptypath) {
+ int opath_fd;
+};
+
+FIXTURE_SETUP(emptypath)
+{
+ int fd;
+
+ self->opath_fd = -1;
+
+ fd = open(EMPTYPATH_TEST_FILE, O_CREAT | O_WRONLY, S_IRWXU);
+ ASSERT_GE(fd, 0) {
+ TH_LOG("create %s: %s", EMPTYPATH_TEST_FILE, strerror(errno));
+ }
+ close(fd);
+
+ self->opath_fd = open(EMPTYPATH_TEST_FILE, O_PATH);
+ ASSERT_GE(self->opath_fd, 0) {
+ TH_LOG("open %s O_PATH: %s", EMPTYPATH_TEST_FILE, strerror(errno));
+ }
+}
+
+FIXTURE_TEARDOWN(emptypath)
+{
+ if (self->opath_fd >= 0)
+ close(self->opath_fd);
+ unlink(EMPTYPATH_TEST_FILE);
+}
+
+/* An empty path is rejected with ENOENT unless O_EMPTYPATH is set. */
+TEST_F(emptypath, without_flag_returns_enoent)
+{
+ int fd = openat(self->opath_fd, "", O_RDONLY);
+
+ if (fd >= 0)
+ close(fd);
+ ASSERT_LT(fd, 0) {
+ TH_LOG("empty path without O_EMPTYPATH unexpectedly succeeded");
+ }
+ EXPECT_EQ(errno, ENOENT) {
+ TH_LOG("expected ENOENT, got %s", strerror(errno));
+ }
+}
+
+/* O_EMPTYPATH reopens the O_PATH fd through an empty path. */
+TEST_F(emptypath, reopens_opath_fd)
+{
+ int fd = openat(self->opath_fd, "", O_RDONLY | O_EMPTYPATH);
+
+ if (fd < 0 && errno == EINVAL)
+ SKIP(return, "O_EMPTYPATH not supported");
+
+ ASSERT_GE(fd, 0) {
+ TH_LOG("O_EMPTYPATH failed: %s", strerror(errno));
+ }
+ close(fd);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/openat2/helpers.h b/tools/testing/selftests/filesystems/openat2/helpers.h
new file mode 100644
index 0000000000000..3f01fb68c5a6d
--- /dev/null
+++ b/tools/testing/selftests/filesystems/openat2/helpers.h
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Author: Aleksa Sarai <cyphar@cyphar.com>
+ * Copyright (C) 2018-2019 SUSE LLC.
+ * Copyright (C) 2026 Amutable GmbH
+ */
+
+#ifndef __RESOLVEAT_H__
+#define __RESOLVEAT_H__
+
+#define _GNU_SOURCE
+#include <stdint.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/types.h>
+#include <linux/unistd.h>
+#include <linux/openat2.h>
+#include "kselftest_harness.h"
+
+#define BUILD_BUG_ON(e) ((void)(sizeof(struct { int:(-!!(e)); })))
+
+#define OPEN_HOW_SIZE_VER0 24 /* sizeof first published struct */
+#define OPEN_HOW_SIZE_LATEST OPEN_HOW_SIZE_VER0
+
+__maybe_unused
+static bool needs_openat2(const struct open_how *how)
+{
+ return how->resolve != 0;
+}
+
+__maybe_unused
+static int raw_openat2(int dfd, const char *path, void *how, size_t size)
+{
+ int ret = syscall(__NR_openat2, dfd, path, how, size);
+
+ return ret >= 0 ? ret : -errno;
+}
+
+__maybe_unused
+static int sys_openat2(int dfd, const char *path, struct open_how *how)
+{
+ return raw_openat2(dfd, path, how, sizeof(*how));
+}
+
+__maybe_unused
+static int sys_openat(int dfd, const char *path, struct open_how *how)
+{
+ int ret = openat(dfd, path, how->flags, how->mode);
+
+ return ret >= 0 ? ret : -errno;
+}
+
+__maybe_unused
+static int sys_renameat2(int olddirfd, const char *oldpath,
+ int newdirfd, const char *newpath, unsigned int flags)
+{
+ int ret = syscall(__NR_renameat2, olddirfd, oldpath,
+ newdirfd, newpath, flags);
+
+ return ret >= 0 ? ret : -errno;
+}
+
+__maybe_unused
+static int touchat(int dfd, const char *path)
+{
+ int fd = openat(dfd, path, O_CREAT, 0700);
+
+ if (fd >= 0)
+ close(fd);
+ return fd;
+}
+
+__maybe_unused
+static char *fdreadlink(struct __test_metadata *_metadata, int fd)
+{
+ char *target, *tmp;
+
+ ASSERT_GT(asprintf(&tmp, "/proc/self/fd/%d", fd), 0);
+
+ target = malloc(PATH_MAX);
+ ASSERT_NE(target, NULL);
+ memset(target, 0, PATH_MAX);
+
+ ASSERT_GT(readlink(tmp, target, PATH_MAX), 0);
+
+ free(tmp);
+ return target;
+}
+
+__maybe_unused
+static bool fdequal(struct __test_metadata *_metadata, int fd,
+ int dfd, const char *path)
+{
+ char *fdpath, *dfdpath, *other;
+ bool cmp;
+
+ fdpath = fdreadlink(_metadata, fd);
+ dfdpath = fdreadlink(_metadata, dfd);
+
+ if (!path) {
+ ASSERT_GT(asprintf(&other, "%s", dfdpath), 0);
+ } else if (*path == '/') {
+ ASSERT_GT(asprintf(&other, "%s", path), 0);
+ } else {
+ ASSERT_GT(asprintf(&other, "%s/%s", dfdpath, path), 0);
+ }
+
+ cmp = !strcmp(fdpath, other);
+
+ free(fdpath);
+ free(dfdpath);
+ free(other);
+ return cmp;
+}
+
+static bool openat2_supported = false;
+
+__attribute__((constructor))
+static void __detect_openat2_supported(void)
+{
+ struct open_how how = {};
+ int fd;
+
+ BUILD_BUG_ON(sizeof(struct open_how) != OPEN_HOW_SIZE_VER0);
+
+ /* Check openat2(2) support. */
+ fd = sys_openat2(AT_FDCWD, ".", &how);
+ openat2_supported = (fd >= 0);
+
+ if (fd >= 0)
+ close(fd);
+}
+
+#endif /* __RESOLVEAT_H__ */
diff --git a/tools/testing/selftests/openat2/openat2_test.c b/tools/testing/selftests/filesystems/openat2/openat2_test.c
index 0e161ef9e9e48..6f5afbe2d8d31 100644
--- a/tools/testing/selftests/openat2/openat2_test.c
+++ b/tools/testing/selftests/filesystems/openat2/openat2_test.c
@@ -15,8 +15,8 @@
#include <stdbool.h>
#include <string.h>
-#include "kselftest.h"
#include "helpers.h"
+#include "kselftest_harness.h"
/*
* O_LARGEFILE is set to 0 by glibc.
@@ -45,13 +45,29 @@ struct struct_test {
int err;
};
-#define NUM_OPENAT2_STRUCT_TESTS 7
-#define NUM_OPENAT2_STRUCT_VARIATIONS 13
+struct flag_test {
+ const char *name;
+ struct open_how how;
+ int err;
+};
+
+FIXTURE(openat2) {};
-void test_openat2_struct(void)
+FIXTURE_SETUP(openat2)
{
- int misalignments[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 17, 87 };
+ if (!openat2_supported)
+ SKIP(return, "openat2(2) not supported");
+}
+
+FIXTURE_TEARDOWN(openat2) {}
+/*
+ * Verify that the struct size and misalignment handling for openat2(2) is
+ * correct, including that is_zeroed_user() works.
+ */
+TEST_F(openat2, struct_argument_sizes)
+{
+ int misalignments[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 17, 87 };
struct struct_test tests[] = {
/* Normal struct. */
{ .name = "normal struct",
@@ -83,26 +99,14 @@ void test_openat2_struct(void)
.size = sizeof(struct open_how_ext), .err = -E2BIG },
};
- BUILD_BUG_ON(ARRAY_LEN(misalignments) != NUM_OPENAT2_STRUCT_VARIATIONS);
- BUILD_BUG_ON(ARRAY_LEN(tests) != NUM_OPENAT2_STRUCT_TESTS);
-
- for (int i = 0; i < ARRAY_LEN(tests); i++) {
+ for (int i = 0; i < ARRAY_SIZE(tests); i++) {
struct struct_test *test = &tests[i];
struct open_how_ext how_ext = test->arg;
- for (int j = 0; j < ARRAY_LEN(misalignments); j++) {
+ for (int j = 0; j < ARRAY_SIZE(misalignments); j++) {
int fd, misalign = misalignments[j];
- char *fdpath = NULL;
- bool failed;
- void (*resultfn)(const char *msg, ...) = ksft_test_result_pass;
-
void *copy = NULL, *how_copy = &how_ext;
-
- if (!openat2_supported) {
- ksft_print_msg("openat2(2) unsupported\n");
- resultfn = ksft_test_result_skip;
- goto skip;
- }
+ char *fdpath = NULL;
if (misalign) {
/*
@@ -119,50 +123,42 @@ void test_openat2_struct(void)
}
fd = raw_openat2(AT_FDCWD, ".", how_copy, test->size);
- if (test->err >= 0)
- failed = (fd < 0);
- else
- failed = (fd != test->err);
if (fd >= 0) {
- fdpath = fdreadlink(fd);
+ fdpath = fdreadlink(_metadata, fd);
close(fd);
}
- if (failed) {
- resultfn = ksft_test_result_fail;
-
- ksft_print_msg("openat2 unexpectedly returned ");
- if (fdpath)
- ksft_print_msg("%d['%s']\n", fd, fdpath);
- else
- ksft_print_msg("%d (%s)\n", fd, strerror(-fd));
+ if (test->err >= 0) {
+ EXPECT_GE(fd, 0) {
+ TH_LOG("openat2 with %s [misalign=%d] should succeed, got %d (%s)",
+ test->name, misalign,
+ fd, strerror(-fd));
+ }
+ } else {
+ EXPECT_EQ(test->err, fd) {
+ if (fdpath)
+ TH_LOG("openat2 with %s [misalign=%d] should fail with %d (%s), got %d['%s']",
+ test->name, misalign,
+ test->err,
+ strerror(-test->err),
+ fd, fdpath);
+ else
+ TH_LOG("openat2 with %s [misalign=%d] should fail with %d (%s), got %d (%s)",
+ test->name, misalign,
+ test->err,
+ strerror(-test->err),
+ fd, strerror(-fd));
+ }
}
-skip:
- if (test->err >= 0)
- resultfn("openat2 with %s argument [misalign=%d] succeeds\n",
- test->name, misalign);
- else
- resultfn("openat2 with %s argument [misalign=%d] fails with %d (%s)\n",
- test->name, misalign, test->err,
- strerror(-test->err));
-
free(copy);
free(fdpath);
- fflush(stdout);
}
}
}
-struct flag_test {
- const char *name;
- struct open_how how;
- int err;
-};
-
-#define NUM_OPENAT2_FLAG_TESTS 25
-
-void test_openat2_flags(void)
+/* Verify openat2(2) flag and mode validation. */
+TEST_F(openat2, flag_validation)
{
struct flag_test tests[] = {
/* O_TMPFILE is incompatible with O_PATH and O_CREAT. */
@@ -241,20 +237,10 @@ void test_openat2_flags(void)
.how.resolve = 0, .err = -EINVAL },
};
- BUILD_BUG_ON(ARRAY_LEN(tests) != NUM_OPENAT2_FLAG_TESTS);
-
- for (int i = 0; i < ARRAY_LEN(tests); i++) {
+ for (int i = 0; i < ARRAY_SIZE(tests); i++) {
int fd, fdflags = -1;
char *path, *fdpath = NULL;
- bool failed = false;
struct flag_test *test = &tests[i];
- void (*resultfn)(const char *msg, ...) = ksft_test_result_pass;
-
- if (!openat2_supported) {
- ksft_print_msg("openat2(2) unsupported\n");
- resultfn = ksft_test_result_skip;
- goto skip;
- }
path = (test->how.flags & O_CREAT) ? "/tmp/ksft.openat2_tmpfile" : ".";
unlink(path);
@@ -265,74 +251,112 @@ void test_openat2_flags(void)
* Skip the testcase if it failed because not supported
* by FS. (e.g. a valid O_TMPFILE combination on NFS)
*/
- ksft_test_result_skip("openat2 with %s fails with %d (%s)\n",
- test->name, fd, strerror(-fd));
- goto next;
+ TH_LOG("openat2 with %s not supported by FS -- skipping",
+ test->name);
+ continue;
}
- if (test->err >= 0)
- failed = (fd < 0);
- else
- failed = (fd != test->err);
- if (fd >= 0) {
- int otherflags;
-
- fdpath = fdreadlink(fd);
- fdflags = fcntl(fd, F_GETFL);
- otherflags = fcntl(fd, F_GETFD);
- close(fd);
+ if (test->err >= 0) {
+ EXPECT_GE(fd, 0) {
+ TH_LOG("openat2 with %s should succeed, got %d (%s)",
+ test->name, fd, strerror(-fd));
+ }
+ if (fd >= 0) {
+ int otherflags;
- E_assert(fdflags >= 0, "fcntl F_GETFL of new fd");
- E_assert(otherflags >= 0, "fcntl F_GETFD of new fd");
+ fdpath = fdreadlink(_metadata, fd);
+ fdflags = fcntl(fd, F_GETFL);
+ otherflags = fcntl(fd, F_GETFD);
+ close(fd);
- /* O_CLOEXEC isn't shown in F_GETFL. */
- if (otherflags & FD_CLOEXEC)
- fdflags |= O_CLOEXEC;
- /* O_CREAT is hidden from F_GETFL. */
- if (test->how.flags & O_CREAT)
- fdflags |= O_CREAT;
- if (!(test->how.flags & O_LARGEFILE))
- fdflags &= ~O_LARGEFILE;
- failed |= (fdflags != test->how.flags);
- }
+ ASSERT_GE(fdflags, 0);
+ ASSERT_GE(otherflags, 0);
- if (failed) {
- resultfn = ksft_test_result_fail;
+ /* O_CLOEXEC isn't shown in F_GETFL. */
+ if (otherflags & FD_CLOEXEC)
+ fdflags |= O_CLOEXEC;
+ /* O_CREAT is hidden from F_GETFL. */
+ if (test->how.flags & O_CREAT)
+ fdflags |= O_CREAT;
+ if (!(test->how.flags & O_LARGEFILE))
+ fdflags &= ~O_LARGEFILE;
- ksft_print_msg("openat2 unexpectedly returned ");
- if (fdpath)
- ksft_print_msg("%d['%s'] with %X (!= %llX)\n",
- fd, fdpath, fdflags,
- test->how.flags);
- else
- ksft_print_msg("%d (%s)\n", fd, strerror(-fd));
+ EXPECT_EQ(fdflags, (int)test->how.flags) {
+ TH_LOG("openat2 with %s: flags mismatch %X != %llX",
+ test->name, fdflags,
+ (unsigned long long)test->how.flags);
+ }
+ }
+ } else {
+ EXPECT_EQ(test->err, fd) {
+ if (fd >= 0) {
+ fdpath = fdreadlink(_metadata, fd);
+ TH_LOG("openat2 with %s should fail with %d (%s), got %d['%s']",
+ test->name, test->err,
+ strerror(-test->err),
+ fd, fdpath);
+ } else {
+ TH_LOG("openat2 with %s should fail with %d (%s), got %d (%s)",
+ test->name, test->err,
+ strerror(-test->err),
+ fd, strerror(-fd));
+ }
+ }
+ if (fd >= 0)
+ close(fd);
}
-skip:
- if (test->err >= 0)
- resultfn("openat2 with %s succeeds\n", test->name);
- else
- resultfn("openat2 with %s fails with %d (%s)\n",
- test->name, test->err, strerror(-test->err));
-next:
free(fdpath);
- fflush(stdout);
}
}
-#define NUM_TESTS (NUM_OPENAT2_STRUCT_VARIATIONS * NUM_OPENAT2_STRUCT_TESTS + \
- NUM_OPENAT2_FLAG_TESTS)
+#ifndef OPENAT2_REGULAR
+#define OPENAT2_REGULAR ((__u64)1 << 32)
+#endif
+
+#ifndef EFTYPE
+#define EFTYPE 134
+#endif
+
+/* Kernel-internal carrier for OPENAT2_REGULAR (see __O_REGULAR in fcntl.h). */
+#ifndef __O_REGULAR
+#define __O_REGULAR (1 << 30)
+#endif
-int main(int argc, char **argv)
+/* Verify that OPENAT2_REGULAR rejects non-regular files with EFTYPE. */
+TEST_F(openat2, regular_flag)
{
- ksft_print_header();
- ksft_set_plan(NUM_TESTS);
+ struct open_how how = {
+ .flags = OPENAT2_REGULAR | O_RDONLY,
+ };
+ int fd;
- test_openat2_struct();
- test_openat2_flags();
+ fd = sys_openat2(AT_FDCWD, "/dev/null", &how);
+ if (fd == -ENOENT)
+ SKIP(return, "/dev/null does not exist");
- if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0)
- ksft_exit_fail();
- else
- ksft_exit_pass();
+ EXPECT_EQ(-EFTYPE, fd) {
+ TH_LOG("openat2 with OPENAT2_REGULAR should fail with %d (%s), got %d (%s)",
+ -EFTYPE, strerror(EFTYPE), fd, strerror(-fd));
+ }
+ if (fd >= 0)
+ close(fd);
}
+
+/* open()/openat() must keep ignoring the internal __O_REGULAR bit. */
+TEST(legacy_openat_ignores_o_regular)
+{
+ int fd;
+
+ fd = openat(AT_FDCWD, "/dev/null", O_RDONLY | __O_REGULAR);
+ if (fd < 0 && errno == ENOENT)
+ SKIP(return, "/dev/null does not exist");
+
+ ASSERT_GE(fd, 0) {
+ TH_LOG("legacy openat() must ignore the __O_REGULAR carrier bit, got errno %d (%s)",
+ errno, strerror(errno));
+ }
+ close(fd);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/openat2/rename_attack_test.c b/tools/testing/selftests/filesystems/openat2/rename_attack_test.c
new file mode 100644
index 0000000000000..1f33c34f56be9
--- /dev/null
+++ b/tools/testing/selftests/filesystems/openat2/rename_attack_test.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Author: Aleksa Sarai <cyphar@cyphar.com>
+ * Copyright (C) 2018-2019 SUSE LLC.
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/mount.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <syscall.h>
+#include <limits.h>
+#include <unistd.h>
+
+#include "helpers.h"
+#include "kselftest_harness.h"
+
+#define ROUNDS 400000
+
+/* Swap @dirfd/@a and @dirfd/@b constantly. Parent must kill this process. */
+pid_t spawn_attack(struct __test_metadata *_metadata,
+ int dirfd, char *a, char *b)
+{
+ pid_t child = fork();
+ if (child != 0)
+ return child;
+
+ /* If the parent (the test process) dies, kill ourselves too. */
+ ASSERT_EQ(prctl(PR_SET_PDEATHSIG, SIGKILL), 0);
+
+ /* Swap @a and @b. */
+ for (;;)
+ renameat2(dirfd, a, dirfd, b, RENAME_EXCHANGE);
+ exit(1);
+}
+
+/*
+ * Construct a test directory with the following structure:
+ *
+ * root/
+ * |-- a/
+ * | `-- c/
+ * `-- b/
+ */
+FIXTURE(rename_attack) {
+ int dfd;
+ int afd;
+ pid_t child;
+};
+
+FIXTURE_SETUP(rename_attack)
+{
+ char dirname[] = "/tmp/ksft-openat2-rename-attack.XXXXXX";
+
+ self->dfd = -1;
+ self->afd = -1;
+ self->child = 0;
+
+ /* Make the top-level directory. */
+ ASSERT_NE(mkdtemp(dirname), NULL);
+ self->dfd = open(dirname, O_PATH | O_DIRECTORY);
+ ASSERT_GE(self->dfd, 0);
+
+ ASSERT_EQ(mkdirat(self->dfd, "a", 0755), 0);
+ ASSERT_EQ(mkdirat(self->dfd, "b", 0755), 0);
+ ASSERT_EQ(mkdirat(self->dfd, "a/c", 0755), 0);
+
+ self->afd = openat(self->dfd, "a", O_PATH);
+ ASSERT_GE(self->afd, 0);
+
+ self->child = spawn_attack(_metadata, self->dfd, "a/c", "b");
+ ASSERT_GT(self->child, 0);
+}
+
+FIXTURE_TEARDOWN(rename_attack)
+{
+ if (self->child > 0)
+ kill(self->child, SIGKILL);
+ if (self->afd >= 0)
+ close(self->afd);
+ if (self->dfd >= 0)
+ close(self->dfd);
+}
+
+FIXTURE_VARIANT(rename_attack) {
+ int resolve;
+ const char *name;
+};
+
+FIXTURE_VARIANT_ADD(rename_attack, resolve_beneath) {
+ .resolve = RESOLVE_BENEATH,
+ .name = "RESOLVE_BENEATH",
+};
+
+FIXTURE_VARIANT_ADD(rename_attack, resolve_in_root) {
+ .resolve = RESOLVE_IN_ROOT,
+ .name = "RESOLVE_IN_ROOT",
+};
+
+TEST_F_TIMEOUT(rename_attack, test, 120)
+{
+ int escapes = 0, successes = 0, other_errs = 0, exdevs = 0, eagains = 0;
+ char *victim_path = "c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../..";
+ struct open_how how = {
+ .flags = O_PATH,
+ .resolve = variant->resolve,
+ };
+
+ if (!openat2_supported) {
+ how.resolve = 0;
+ TH_LOG("openat2(2) unsupported -- using openat(2) instead");
+ }
+
+ for (int i = 0; i < ROUNDS; i++) {
+ int fd;
+
+ if (openat2_supported)
+ fd = sys_openat2(self->afd, victim_path, &how);
+ else
+ fd = sys_openat(self->afd, victim_path, &how);
+
+ if (fd < 0) {
+ if (fd == -EAGAIN)
+ eagains++;
+ else if (fd == -EXDEV)
+ exdevs++;
+ else if (fd == -ENOENT)
+ escapes++; /* escaped outside and got ENOENT... */
+ else
+ other_errs++; /* unexpected error */
+ } else {
+ if (fdequal(_metadata, fd, self->afd, NULL))
+ successes++;
+ else
+ escapes++; /* we got an unexpected fd */
+ }
+ if (fd >= 0)
+ close(fd);
+ }
+
+ TH_LOG("non-escapes: EAGAIN=%d EXDEV=%d E<other>=%d success=%d",
+ eagains, exdevs, other_errs, successes);
+ ASSERT_EQ(escapes, 0) {
+ TH_LOG("rename attack with %s (%d runs, got %d escapes)",
+ variant->name, ROUNDS, escapes);
+ }
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/openat2/resolve_test.c b/tools/testing/selftests/filesystems/openat2/resolve_test.c
index a76ef15ceb90a..eacde59ce1583 100644
--- a/tools/testing/selftests/openat2/resolve_test.c
+++ b/tools/testing/selftests/filesystems/openat2/resolve_test.c
@@ -14,8 +14,81 @@
#include <stdbool.h>
#include <string.h>
-#include "kselftest.h"
#include "helpers.h"
+#include "kselftest_harness.h"
+
+struct resolve_test {
+ const char *name;
+ const char *dir;
+ const char *path;
+ struct open_how how;
+ bool pass;
+ union {
+ int err;
+ const char *path;
+ } out;
+};
+
+/*
+ * Verify a single resolve test case. This must be called from within a TEST_F
+ * function with _metadata in scope.
+ */
+static void verify_resolve_test(struct __test_metadata *_metadata,
+ int rootfd, int hardcoded_fd,
+ const struct resolve_test *test)
+{
+ struct open_how how = test->how;
+ int dfd, fd;
+ char *fdpath = NULL;
+
+ /* Auto-set O_PATH. */
+ if (!(how.flags & O_CREAT))
+ how.flags |= O_PATH;
+
+ if (test->dir)
+ dfd = openat(rootfd, test->dir, O_PATH | O_DIRECTORY);
+ else
+ dfd = dup(rootfd);
+ ASSERT_GE(dfd, 0) TH_LOG("failed to open dir '%s': %m", test->dir ?: ".");
+ ASSERT_EQ(dup2(dfd, hardcoded_fd), hardcoded_fd);
+
+ fd = sys_openat2(dfd, test->path, &how);
+
+ if (test->pass) {
+ EXPECT_GE(fd, 0) {
+ TH_LOG("%s: expected success, got %d (%s)",
+ test->name, fd, strerror(-fd));
+ }
+ if (fd >= 0) {
+ EXPECT_TRUE(fdequal(_metadata, fd, rootfd, test->out.path)) {
+ fdpath = fdreadlink(_metadata, fd);
+ TH_LOG("%s: wrong path '%s', expected '%s'",
+ test->name, fdpath,
+ test->out.path ?: ".");
+ free(fdpath);
+ }
+ }
+ } else {
+ EXPECT_EQ(test->out.err, fd) {
+ if (fd >= 0) {
+ fdpath = fdreadlink(_metadata, fd);
+ TH_LOG("%s: expected %d (%s), got %d['%s']",
+ test->name, test->out.err,
+ strerror(-test->out.err), fd, fdpath);
+ free(fdpath);
+ } else {
+ TH_LOG("%s: expected %d (%s), got %d (%s)",
+ test->name, test->out.err,
+ strerror(-test->out.err),
+ fd, strerror(-fd));
+ }
+ }
+ }
+
+ if (fd >= 0)
+ close(fd);
+ close(dfd);
+}
/*
* Construct a test directory with the following structure:
@@ -39,101 +112,110 @@
* |-- absself -> /
* |-- self -> ../../root/
* |-- garbageself -> /../../root/
- * |-- passwd -> ../cheeky/../cheeky/../etc/../etc/passwd
- * |-- abspasswd -> /../cheeky/../cheeky/../etc/../etc/passwd
+ * |-- passwd -> ../cheeky/../etc/../etc/passwd
+ * |-- abspasswd -> /../cheeky/../etc/../etc/passwd
* |-- dotdotlink -> ../../../../../../../../../../../../../../etc/passwd
* `-- garbagelink -> /../../../../../../../../../../../../../../etc/passwd
*/
-int setup_testdir(void)
+FIXTURE(openat2_resolve) {
+ int rootfd;
+ int hardcoded_fd;
+ char *hardcoded_fdpath;
+ char *procselfexe;
+};
+
+FIXTURE_SETUP(openat2_resolve)
{
- int dfd, tmpfd;
char dirname[] = "/tmp/ksft-openat2-testdir.XXXXXX";
+ int dfd, tmpfd;
+
+ self->rootfd = -1;
+ self->hardcoded_fd = -1;
+ self->hardcoded_fdpath = NULL;
+ self->procselfexe = NULL;
+
+ /* NOTE: We should be checking for CAP_SYS_ADMIN here... */
+ if (geteuid() != 0)
+ SKIP(return, "all tests require euid == 0");
+ if (!openat2_supported)
+ SKIP(return, "openat2(2) not supported");
/* Unshare and make /tmp a new directory. */
- E_unshare(CLONE_NEWNS);
- E_mount("", "/tmp", "", MS_PRIVATE, "");
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+ ASSERT_EQ(mount("", "/tmp", "", MS_PRIVATE, ""), 0);
/* Make the top-level directory. */
- if (!mkdtemp(dirname))
- ksft_exit_fail_msg("setup_testdir: failed to create tmpdir\n");
+ ASSERT_NE(mkdtemp(dirname), NULL);
dfd = open(dirname, O_PATH | O_DIRECTORY);
- if (dfd < 0)
- ksft_exit_fail_msg("setup_testdir: failed to open tmpdir\n");
+ ASSERT_GE(dfd, 0);
/* A sub-directory which is actually used for tests. */
- E_mkdirat(dfd, "root", 0755);
+ ASSERT_EQ(mkdirat(dfd, "root", 0755), 0);
tmpfd = openat(dfd, "root", O_PATH | O_DIRECTORY);
- if (tmpfd < 0)
- ksft_exit_fail_msg("setup_testdir: failed to open tmpdir\n");
+ ASSERT_GE(tmpfd, 0);
close(dfd);
dfd = tmpfd;
- E_symlinkat("/proc/self/exe", dfd, "procexe");
- E_symlinkat("/proc/self/root", dfd, "procroot");
- E_mkdirat(dfd, "root", 0755);
+ ASSERT_EQ(symlinkat("/proc/self/exe", dfd, "procexe"), 0);
+ ASSERT_EQ(symlinkat("/proc/self/root", dfd, "procroot"), 0);
+ ASSERT_EQ(mkdirat(dfd, "root", 0755), 0);
/* There is no mountat(2), so use chdir. */
- E_mkdirat(dfd, "mnt", 0755);
- E_fchdir(dfd);
- E_mount("tmpfs", "./mnt", "tmpfs", MS_NOSUID | MS_NODEV, "");
- E_symlinkat("../mnt/", dfd, "mnt/self");
- E_symlinkat("/mnt/", dfd, "mnt/absself");
+ ASSERT_EQ(mkdirat(dfd, "mnt", 0755), 0);
+ ASSERT_EQ(fchdir(dfd), 0);
+ ASSERT_EQ(mount("tmpfs", "./mnt", "tmpfs", MS_NOSUID | MS_NODEV, ""), 0);
+ ASSERT_EQ(symlinkat("../mnt/", dfd, "mnt/self"), 0);
+ ASSERT_EQ(symlinkat("/mnt/", dfd, "mnt/absself"), 0);
- E_mkdirat(dfd, "etc", 0755);
- E_touchat(dfd, "etc/passwd");
+ ASSERT_EQ(mkdirat(dfd, "etc", 0755), 0);
+ ASSERT_GE(touchat(dfd, "etc/passwd"), 0);
- E_symlinkat("/newfile3", dfd, "creatlink");
- E_symlinkat("etc/", dfd, "reletc");
- E_symlinkat("etc/passwd", dfd, "relsym");
- E_symlinkat("/etc/", dfd, "absetc");
- E_symlinkat("/etc/passwd", dfd, "abssym");
- E_symlinkat("/cheeky", dfd, "abscheeky");
+ ASSERT_EQ(symlinkat("/newfile3", dfd, "creatlink"), 0);
+ ASSERT_EQ(symlinkat("etc/", dfd, "reletc"), 0);
+ ASSERT_EQ(symlinkat("etc/passwd", dfd, "relsym"), 0);
+ ASSERT_EQ(symlinkat("/etc/", dfd, "absetc"), 0);
+ ASSERT_EQ(symlinkat("/etc/passwd", dfd, "abssym"), 0);
+ ASSERT_EQ(symlinkat("/cheeky", dfd, "abscheeky"), 0);
- E_mkdirat(dfd, "cheeky", 0755);
+ ASSERT_EQ(mkdirat(dfd, "cheeky", 0755), 0);
- E_symlinkat("/", dfd, "cheeky/absself");
- E_symlinkat("../../root/", dfd, "cheeky/self");
- E_symlinkat("/../../root/", dfd, "cheeky/garbageself");
+ ASSERT_EQ(symlinkat("/", dfd, "cheeky/absself"), 0);
+ ASSERT_EQ(symlinkat("../../root/", dfd, "cheeky/self"), 0);
+ ASSERT_EQ(symlinkat("/../../root/", dfd, "cheeky/garbageself"), 0);
- E_symlinkat("../cheeky/../etc/../etc/passwd", dfd, "cheeky/passwd");
- E_symlinkat("/../cheeky/../etc/../etc/passwd", dfd, "cheeky/abspasswd");
+ ASSERT_EQ(symlinkat("../cheeky/../etc/../etc/passwd",
+ dfd, "cheeky/passwd"), 0);
+ ASSERT_EQ(symlinkat("/../cheeky/../etc/../etc/passwd",
+ dfd, "cheeky/abspasswd"), 0);
- E_symlinkat("../../../../../../../../../../../../../../etc/passwd",
- dfd, "cheeky/dotdotlink");
- E_symlinkat("/../../../../../../../../../../../../../../etc/passwd",
- dfd, "cheeky/garbagelink");
+ ASSERT_EQ(symlinkat("../../../../../../../../../../../../../../etc/passwd",
+ dfd, "cheeky/dotdotlink"), 0);
+ ASSERT_EQ(symlinkat("/../../../../../../../../../../../../../../etc/passwd",
+ dfd, "cheeky/garbagelink"), 0);
- return dfd;
-}
-
-struct basic_test {
- const char *name;
- const char *dir;
- const char *path;
- struct open_how how;
- bool pass;
- union {
- int err;
- const char *path;
- } out;
-};
+ self->rootfd = dfd;
-#define NUM_OPENAT2_OPATH_TESTS 88
+ self->hardcoded_fd = open("/dev/null", O_RDONLY);
+ ASSERT_GE(self->hardcoded_fd, 0);
+ ASSERT_GE(asprintf(&self->hardcoded_fdpath, "self/fd/%d",
+ self->hardcoded_fd), 0);
+ ASSERT_GE(asprintf(&self->procselfexe, "/proc/%d/exe", getpid()), 0);
+}
-void test_openat2_opath_tests(void)
+FIXTURE_TEARDOWN(openat2_resolve)
{
- int rootfd, hardcoded_fd;
- char *procselfexe, *hardcoded_fdpath;
-
- E_asprintf(&procselfexe, "/proc/%d/exe", getpid());
- rootfd = setup_testdir();
-
- hardcoded_fd = open("/dev/null", O_RDONLY);
- E_assert(hardcoded_fd >= 0, "open fd to hardcode");
- E_asprintf(&hardcoded_fdpath, "self/fd/%d", hardcoded_fd);
+ free(self->procselfexe);
+ free(self->hardcoded_fdpath);
+ if (self->hardcoded_fd >= 0)
+ close(self->hardcoded_fd);
+ if (self->rootfd >= 0)
+ close(self->rootfd);
+}
- struct basic_test tests[] = {
- /** RESOLVE_BENEATH **/
+/* Attempts to cross the dirfd should be blocked with -EXDEV. */
+TEST_F(openat2_resolve, resolve_beneath)
+{
+ struct resolve_test tests[] = {
/* Attempts to cross dirfd should be blocked. */
{ .name = "[beneath] jump to /",
.path = "/", .how.resolve = RESOLVE_BENEATH,
@@ -206,9 +288,17 @@ void test_openat2_opath_tests(void)
{ .name = "[beneath] tricky absolute + garbage link outside $root",
.path = "abscheeky/garbagelink", .how.resolve = RESOLVE_BENEATH,
.out.err = -EXDEV, .pass = false },
+ };
- /** RESOLVE_IN_ROOT **/
- /* All attempts to cross the dirfd will be scoped-to-root. */
+ for (int i = 0; i < ARRAY_SIZE(tests); i++)
+ verify_resolve_test(_metadata, self->rootfd,
+ self->hardcoded_fd, &tests[i]);
+}
+
+/* All attempts to cross the dirfd will be scoped-to-root. */
+TEST_F(openat2_resolve, resolve_in_root)
+{
+ struct resolve_test tests[] = {
{ .name = "[in_root] jump to /",
.path = "/", .how.resolve = RESOLVE_IN_ROOT,
.out.path = NULL, .pass = true },
@@ -297,8 +387,17 @@ void test_openat2_opath_tests(void)
.how.mode = 0700,
.how.resolve = RESOLVE_IN_ROOT,
.out.path = "newfile3", .pass = true },
+ };
+
+ for (int i = 0; i < ARRAY_SIZE(tests); i++)
+ verify_resolve_test(_metadata, self->rootfd,
+ self->hardcoded_fd, &tests[i]);
+}
- /** RESOLVE_NO_XDEV **/
+/* Crossing mount boundaries should be blocked. */
+TEST_F(openat2_resolve, resolve_no_xdev)
+{
+ struct resolve_test tests[] = {
/* Crossing *down* into a mountpoint is disallowed. */
{ .name = "[no_xdev] cross into $mnt",
.path = "mnt", .how.resolve = RESOLVE_NO_XDEV,
@@ -347,10 +446,19 @@ void test_openat2_opath_tests(void)
.out.err = -EXDEV, .pass = false },
/* Except magic-link jumps inside the same vfsmount. */
{ .name = "[no_xdev] jump through magic-link to same procfs",
- .dir = "/proc", .path = hardcoded_fdpath, .how.resolve = RESOLVE_NO_XDEV,
- .out.path = "/proc", .pass = true, },
+ .dir = "/proc", .path = self->hardcoded_fdpath, .how.resolve = RESOLVE_NO_XDEV,
+ .out.path = "/proc", .pass = true, },
+ };
+
+ for (int i = 0; i < ARRAY_SIZE(tests); i++)
+ verify_resolve_test(_metadata, self->rootfd,
+ self->hardcoded_fd, &tests[i]);
+}
- /** RESOLVE_NO_MAGICLINKS **/
+/* Procfs-style magic-link resolution should be blocked. */
+TEST_F(openat2_resolve, resolve_no_magiclinks)
+{
+ struct resolve_test tests[] = {
/* Regular symlinks should work. */
{ .name = "[no_magiclinks] ordinary relative symlink",
.path = "relsym", .how.resolve = RESOLVE_NO_MAGICLINKS,
@@ -365,7 +473,7 @@ void test_openat2_opath_tests(void)
{ .name = "[no_magiclinks] normal path to magic-link with O_NOFOLLOW",
.path = "/proc/self/exe", .how.flags = O_NOFOLLOW,
.how.resolve = RESOLVE_NO_MAGICLINKS,
- .out.path = procselfexe, .pass = true },
+ .out.path = self->procselfexe, .pass = true },
{ .name = "[no_magiclinks] symlink to magic-link path component",
.path = "procroot/etc", .how.resolve = RESOLVE_NO_MAGICLINKS,
.out.err = -ELOOP, .pass = false },
@@ -376,8 +484,17 @@ void test_openat2_opath_tests(void)
.path = "/proc/self/root/etc", .how.flags = O_NOFOLLOW,
.how.resolve = RESOLVE_NO_MAGICLINKS,
.out.err = -ELOOP, .pass = false },
+ };
- /** RESOLVE_NO_SYMLINKS **/
+ for (int i = 0; i < ARRAY_SIZE(tests); i++)
+ verify_resolve_test(_metadata, self->rootfd,
+ self->hardcoded_fd, &tests[i]);
+}
+
+/* All symlink resolution should be blocked. */
+TEST_F(openat2_resolve, resolve_no_symlinks)
+{
+ struct resolve_test tests[] = {
/* Normal paths should work. */
{ .name = "[no_symlinks] ordinary path to '.'",
.path = ".", .how.resolve = RESOLVE_NO_SYMLINKS,
@@ -436,88 +553,9 @@ void test_openat2_opath_tests(void)
.out.err = -ELOOP, .pass = false },
};
- BUILD_BUG_ON(ARRAY_LEN(tests) != NUM_OPENAT2_OPATH_TESTS);
-
- for (int i = 0; i < ARRAY_LEN(tests); i++) {
- int dfd, fd;
- char *fdpath = NULL;
- bool failed;
- void (*resultfn)(const char *msg, ...) = ksft_test_result_pass;
- struct basic_test *test = &tests[i];
-
- if (!openat2_supported) {
- ksft_print_msg("openat2(2) unsupported\n");
- resultfn = ksft_test_result_skip;
- goto skip;
- }
-
- /* Auto-set O_PATH. */
- if (!(test->how.flags & O_CREAT))
- test->how.flags |= O_PATH;
-
- if (test->dir)
- dfd = openat(rootfd, test->dir, O_PATH | O_DIRECTORY);
- else
- dfd = dup(rootfd);
- E_assert(dfd, "failed to openat root '%s': %m", test->dir);
-
- E_dup2(dfd, hardcoded_fd);
-
- fd = sys_openat2(dfd, test->path, &test->how);
- if (test->pass)
- failed = (fd < 0 || !fdequal(fd, rootfd, test->out.path));
- else
- failed = (fd != test->out.err);
- if (fd >= 0) {
- fdpath = fdreadlink(fd);
- close(fd);
- }
- close(dfd);
-
- if (failed) {
- resultfn = ksft_test_result_fail;
-
- ksft_print_msg("openat2 unexpectedly returned ");
- if (fdpath)
- ksft_print_msg("%d['%s']\n", fd, fdpath);
- else
- ksft_print_msg("%d (%s)\n", fd, strerror(-fd));
- }
-
-skip:
- if (test->pass)
- resultfn("%s gives path '%s'\n", test->name,
- test->out.path ?: ".");
- else
- resultfn("%s fails with %d (%s)\n", test->name,
- test->out.err, strerror(-test->out.err));
-
- fflush(stdout);
- free(fdpath);
- }
-
- free(procselfexe);
- close(rootfd);
-
- free(hardcoded_fdpath);
- close(hardcoded_fd);
+ for (int i = 0; i < ARRAY_SIZE(tests); i++)
+ verify_resolve_test(_metadata, self->rootfd,
+ self->hardcoded_fd, &tests[i]);
}
-#define NUM_TESTS NUM_OPENAT2_OPATH_TESTS
-
-int main(int argc, char **argv)
-{
- ksft_print_header();
- ksft_set_plan(NUM_TESTS);
-
- /* NOTE: We should be checking for CAP_SYS_ADMIN here... */
- if (geteuid() != 0)
- ksft_exit_skip("all tests require euid == 0\n");
-
- test_openat2_opath_tests();
-
- if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0)
- ksft_exit_fail();
- else
- ksft_exit_pass();
-}
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/namespaces/listns_efault_test.c b/tools/testing/selftests/namespaces/listns_efault_test.c
index b570746e917c1..26b452c98c665 100644
--- a/tools/testing/selftests/namespaces/listns_efault_test.c
+++ b/tools/testing/selftests/namespaces/listns_efault_test.c
@@ -38,7 +38,7 @@ TEST(listns_partial_fault_with_ns_cleanup)
__u64 *ns_ids;
ssize_t ret;
long page_size;
- pid_t pid, iter_pid;
+ pid_t pid, iter_pid, ns_pids[5];
int pidfds[5];
int sv[5][2];
int iter_pidfd;
@@ -114,6 +114,7 @@ TEST(listns_partial_fault_with_ns_cleanup)
pid = create_child(&pidfds[i], CLONE_NEWNS);
ASSERT_NE(pid, -1);
+ ns_pids[i] = pid;
if (pid == 0) {
close(sv[i][0]); /* Close parent end */
@@ -164,7 +165,7 @@ TEST(listns_partial_fault_with_ns_cleanup)
/* Wait for all mount namespace children to exit and cleanup */
for (i = 0; i < 5; i++) {
- waitpid(-1, NULL, 0);
+ waitpid(ns_pids[i], NULL, 0);
close(sv[i][0]);
close(pidfds[i]);
}
@@ -175,6 +176,12 @@ TEST(listns_partial_fault_with_ns_cleanup)
ASSERT_EQ(ret, iter_pid);
close(iter_pidfd);
+ /* If listns() is not supported the iterator exits cleanly via ENOSYS */
+ if (WIFEXITED(status) && WEXITSTATUS(status) == PIDFD_SKIP) {
+ munmap(map, page_size);
+ SKIP(return, "listns() not supported");
+ }
+
/* Should have been killed */
ASSERT_TRUE(WIFSIGNALED(status));
ASSERT_EQ(WTERMSIG(status), SIGKILL);
@@ -250,7 +257,7 @@ TEST(listns_late_fault_with_ns_cleanup)
__u64 *ns_ids;
ssize_t ret;
long page_size;
- pid_t pid, iter_pid;
+ pid_t pid, iter_pid, ns_pids[10];
int pidfds[10];
int sv[10][2];
int iter_pidfd;
@@ -320,6 +327,7 @@ TEST(listns_late_fault_with_ns_cleanup)
pid = create_child(&pidfds[i], CLONE_NEWNS);
ASSERT_NE(pid, -1);
+ ns_pids[i] = pid;
if (pid == 0) {
close(sv[i][0]); /* Close parent end */
@@ -373,7 +381,7 @@ TEST(listns_late_fault_with_ns_cleanup)
/* Wait for all children and cleanup */
for (i = 0; i < 10; i++) {
- waitpid(-1, NULL, 0);
+ waitpid(ns_pids[i], NULL, 0);
close(sv[i][0]);
close(pidfds[i]);
}
@@ -384,6 +392,12 @@ TEST(listns_late_fault_with_ns_cleanup)
ASSERT_EQ(ret, iter_pid);
close(iter_pidfd);
+ /* If listns() is not supported the iterator exits cleanly via ENOSYS */
+ if (WIFEXITED(status) && WEXITSTATUS(status) == PIDFD_SKIP) {
+ munmap(map, page_size);
+ SKIP(return, "listns() not supported");
+ }
+
/* Should have been killed */
ASSERT_TRUE(WIFSIGNALED(status));
ASSERT_EQ(WTERMSIG(status), SIGKILL);
@@ -402,7 +416,7 @@ TEST(listns_mnt_ns_cleanup_on_fault)
__u64 *ns_ids;
ssize_t ret;
long page_size;
- pid_t pid, iter_pid;
+ pid_t pid, iter_pid, ns_pids[8];
int pidfds[8];
int sv[8][2];
int iter_pidfd;
@@ -462,6 +476,7 @@ TEST(listns_mnt_ns_cleanup_on_fault)
pid = create_child(&pidfds[i], CLONE_NEWNS);
ASSERT_NE(pid, -1);
+ ns_pids[i] = pid;
if (pid == 0) {
close(sv[i][0]); /* Close parent end */
@@ -508,7 +523,7 @@ TEST(listns_mnt_ns_cleanup_on_fault)
/* Wait for children and cleanup */
for (i = 0; i < 8; i++) {
- waitpid(-1, NULL, 0);
+ waitpid(ns_pids[i], NULL, 0);
close(sv[i][0]);
close(pidfds[i]);
}
@@ -519,6 +534,12 @@ TEST(listns_mnt_ns_cleanup_on_fault)
ASSERT_EQ(ret, iter_pid);
close(iter_pidfd);
+ /* If listns() is not supported the iterator exits cleanly via ENOSYS */
+ if (WIFEXITED(status) && WEXITSTATUS(status) == PIDFD_SKIP) {
+ munmap(map, page_size);
+ SKIP(return, "listns() not supported");
+ }
+
/* Should have been killed */
ASSERT_TRUE(WIFSIGNALED(status));
ASSERT_EQ(WTERMSIG(status), SIGKILL);
diff --git a/tools/testing/selftests/namespaces/nsid_test.c b/tools/testing/selftests/namespaces/nsid_test.c
index b4a14c6693a54..46dc838cba82d 100644
--- a/tools/testing/selftests/namespaces/nsid_test.c
+++ b/tools/testing/selftests/namespaces/nsid_test.c
@@ -25,14 +25,24 @@
/* Fixture for tests that create child processes */
FIXTURE(nsid) {
pid_t child_pid;
+ pid_t grandchild_pid;
};
FIXTURE_SETUP(nsid) {
self->child_pid = 0;
+ self->grandchild_pid = 0;
}
FIXTURE_TEARDOWN(nsid) {
- /* Clean up any child process that may still be running */
+ /*
+ * Kill grandchild first: timens_separate and pidns_separate fork a
+ * grandchild that calls pause(). It is reparented to init on child
+ * exit and keeps the test runner's tap pipe open, hanging the runner.
+ */
+ if (self->grandchild_pid > 0) {
+ kill(self->grandchild_pid, SIGKILL);
+ waitpid(self->grandchild_pid, NULL, 0);
+ }
if (self->child_pid > 0) {
kill(self->child_pid, SIGKILL);
waitpid(self->child_pid, NULL, 0);
@@ -676,6 +686,7 @@ TEST_F(nsid, timens_separate)
pid_t grandchild_pid;
ASSERT_EQ(read(pipefd[0], &grandchild_pid, sizeof(grandchild_pid)), sizeof(grandchild_pid));
+ self->grandchild_pid = grandchild_pid;
close(pipefd[0]);
/* Open grandchild's time namespace */
@@ -797,6 +808,7 @@ TEST_F(nsid, pidns_separate)
pid_t grandchild_pid;
ASSERT_EQ(read(pipefd[0], &grandchild_pid, sizeof(grandchild_pid)), sizeof(grandchild_pid));
+ self->grandchild_pid = grandchild_pid;
close(pipefd[0]);
/* Open grandchild's PID namespace */
diff --git a/tools/testing/selftests/openat2/helpers.c b/tools/testing/selftests/openat2/helpers.c
deleted file mode 100644
index 5074681ffdc99..0000000000000
--- a/tools/testing/selftests/openat2/helpers.c
+++ /dev/null
@@ -1,109 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Author: Aleksa Sarai <cyphar@cyphar.com>
- * Copyright (C) 2018-2019 SUSE LLC.
- */
-
-#define _GNU_SOURCE
-#include <errno.h>
-#include <fcntl.h>
-#include <stdbool.h>
-#include <string.h>
-#include <syscall.h>
-#include <limits.h>
-
-#include "helpers.h"
-
-bool needs_openat2(const struct open_how *how)
-{
- return how->resolve != 0;
-}
-
-int raw_openat2(int dfd, const char *path, void *how, size_t size)
-{
- int ret = syscall(__NR_openat2, dfd, path, how, size);
- return ret >= 0 ? ret : -errno;
-}
-
-int sys_openat2(int dfd, const char *path, struct open_how *how)
-{
- return raw_openat2(dfd, path, how, sizeof(*how));
-}
-
-int sys_openat(int dfd, const char *path, struct open_how *how)
-{
- int ret = openat(dfd, path, how->flags, how->mode);
- return ret >= 0 ? ret : -errno;
-}
-
-int sys_renameat2(int olddirfd, const char *oldpath,
- int newdirfd, const char *newpath, unsigned int flags)
-{
- int ret = syscall(__NR_renameat2, olddirfd, oldpath,
- newdirfd, newpath, flags);
- return ret >= 0 ? ret : -errno;
-}
-
-int touchat(int dfd, const char *path)
-{
- int fd = openat(dfd, path, O_CREAT, 0700);
- if (fd >= 0)
- close(fd);
- return fd;
-}
-
-char *fdreadlink(int fd)
-{
- char *target, *tmp;
-
- E_asprintf(&tmp, "/proc/self/fd/%d", fd);
-
- target = malloc(PATH_MAX);
- if (!target)
- ksft_exit_fail_msg("fdreadlink: malloc failed\n");
- memset(target, 0, PATH_MAX);
-
- E_readlink(tmp, target, PATH_MAX);
- free(tmp);
- return target;
-}
-
-bool fdequal(int fd, int dfd, const char *path)
-{
- char *fdpath, *dfdpath, *other;
- bool cmp;
-
- fdpath = fdreadlink(fd);
- dfdpath = fdreadlink(dfd);
-
- if (!path)
- E_asprintf(&other, "%s", dfdpath);
- else if (*path == '/')
- E_asprintf(&other, "%s", path);
- else
- E_asprintf(&other, "%s/%s", dfdpath, path);
-
- cmp = !strcmp(fdpath, other);
-
- free(fdpath);
- free(dfdpath);
- free(other);
- return cmp;
-}
-
-bool openat2_supported = false;
-
-void __attribute__((constructor)) init(void)
-{
- struct open_how how = {};
- int fd;
-
- BUILD_BUG_ON(sizeof(struct open_how) != OPEN_HOW_SIZE_VER0);
-
- /* Check openat2(2) support. */
- fd = sys_openat2(AT_FDCWD, ".", &how);
- openat2_supported = (fd >= 0);
-
- if (fd >= 0)
- close(fd);
-}
diff --git a/tools/testing/selftests/openat2/helpers.h b/tools/testing/selftests/openat2/helpers.h
deleted file mode 100644
index 510e60602511a..0000000000000
--- a/tools/testing/selftests/openat2/helpers.h
+++ /dev/null
@@ -1,108 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Author: Aleksa Sarai <cyphar@cyphar.com>
- * Copyright (C) 2018-2019 SUSE LLC.
- */
-
-#ifndef __RESOLVEAT_H__
-#define __RESOLVEAT_H__
-
-#define _GNU_SOURCE
-#include <stdint.h>
-#include <stdbool.h>
-#include <errno.h>
-#include <linux/types.h>
-#include "kselftest.h"
-
-#define ARRAY_LEN(X) (sizeof (X) / sizeof (*(X)))
-#define BUILD_BUG_ON(e) ((void)(sizeof(struct { int:(-!!(e)); })))
-
-#ifndef SYS_openat2
-#ifndef __NR_openat2
-#define __NR_openat2 437
-#endif /* __NR_openat2 */
-#define SYS_openat2 __NR_openat2
-#endif /* SYS_openat2 */
-
-/*
- * Arguments for how openat2(2) should open the target path. If @resolve is
- * zero, then openat2(2) operates very similarly to openat(2).
- *
- * However, unlike openat(2), unknown bits in @flags result in -EINVAL rather
- * than being silently ignored. @mode must be zero unless one of {O_CREAT,
- * O_TMPFILE} are set.
- *
- * @flags: O_* flags.
- * @mode: O_CREAT/O_TMPFILE file mode.
- * @resolve: RESOLVE_* flags.
- */
-struct open_how {
- __u64 flags;
- __u64 mode;
- __u64 resolve;
-};
-
-#define OPEN_HOW_SIZE_VER0 24 /* sizeof first published struct */
-#define OPEN_HOW_SIZE_LATEST OPEN_HOW_SIZE_VER0
-
-bool needs_openat2(const struct open_how *how);
-
-#ifndef RESOLVE_IN_ROOT
-/* how->resolve flags for openat2(2). */
-#define RESOLVE_NO_XDEV 0x01 /* Block mount-point crossings
- (includes bind-mounts). */
-#define RESOLVE_NO_MAGICLINKS 0x02 /* Block traversal through procfs-style
- "magic-links". */
-#define RESOLVE_NO_SYMLINKS 0x04 /* Block traversal through all symlinks
- (implies OEXT_NO_MAGICLINKS) */
-#define RESOLVE_BENEATH 0x08 /* Block "lexical" trickery like
- "..", symlinks, and absolute
- paths which escape the dirfd. */
-#define RESOLVE_IN_ROOT 0x10 /* Make all jumps to "/" and ".."
- be scoped inside the dirfd
- (similar to chroot(2)). */
-#endif /* RESOLVE_IN_ROOT */
-
-#define E_func(func, ...) \
- do { \
- errno = 0; \
- if (func(__VA_ARGS__) < 0) \
- ksft_exit_fail_msg("%s:%d %s failed - errno:%d\n", \
- __FILE__, __LINE__, #func, errno); \
- } while (0)
-
-#define E_asprintf(...) E_func(asprintf, __VA_ARGS__)
-#define E_chmod(...) E_func(chmod, __VA_ARGS__)
-#define E_dup2(...) E_func(dup2, __VA_ARGS__)
-#define E_fchdir(...) E_func(fchdir, __VA_ARGS__)
-#define E_fstatat(...) E_func(fstatat, __VA_ARGS__)
-#define E_kill(...) E_func(kill, __VA_ARGS__)
-#define E_mkdirat(...) E_func(mkdirat, __VA_ARGS__)
-#define E_mount(...) E_func(mount, __VA_ARGS__)
-#define E_prctl(...) E_func(prctl, __VA_ARGS__)
-#define E_readlink(...) E_func(readlink, __VA_ARGS__)
-#define E_setresuid(...) E_func(setresuid, __VA_ARGS__)
-#define E_symlinkat(...) E_func(symlinkat, __VA_ARGS__)
-#define E_touchat(...) E_func(touchat, __VA_ARGS__)
-#define E_unshare(...) E_func(unshare, __VA_ARGS__)
-
-#define E_assert(expr, msg, ...) \
- do { \
- if (!(expr)) \
- ksft_exit_fail_msg("ASSERT(%s:%d) failed (%s): " msg "\n", \
- __FILE__, __LINE__, #expr, ##__VA_ARGS__); \
- } while (0)
-
-int raw_openat2(int dfd, const char *path, void *how, size_t size);
-int sys_openat2(int dfd, const char *path, struct open_how *how);
-int sys_openat(int dfd, const char *path, struct open_how *how);
-int sys_renameat2(int olddirfd, const char *oldpath,
- int newdirfd, const char *newpath, unsigned int flags);
-
-int touchat(int dfd, const char *path);
-char *fdreadlink(int fd);
-bool fdequal(int fd, int dfd, const char *path);
-
-extern bool openat2_supported;
-
-#endif /* __RESOLVEAT_H__ */
diff --git a/tools/testing/selftests/openat2/rename_attack_test.c b/tools/testing/selftests/openat2/rename_attack_test.c
deleted file mode 100644
index aa5699e457290..0000000000000
--- a/tools/testing/selftests/openat2/rename_attack_test.c
+++ /dev/null
@@ -1,160 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Author: Aleksa Sarai <cyphar@cyphar.com>
- * Copyright (C) 2018-2019 SUSE LLC.
- */
-
-#define _GNU_SOURCE
-#include <errno.h>
-#include <fcntl.h>
-#include <sched.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <sys/mount.h>
-#include <sys/mman.h>
-#include <sys/prctl.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdbool.h>
-#include <string.h>
-#include <syscall.h>
-#include <limits.h>
-#include <unistd.h>
-
-#include "kselftest.h"
-#include "helpers.h"
-
-/* Construct a test directory with the following structure:
- *
- * root/
- * |-- a/
- * | `-- c/
- * `-- b/
- */
-int setup_testdir(void)
-{
- int dfd;
- char dirname[] = "/tmp/ksft-openat2-rename-attack.XXXXXX";
-
- /* Make the top-level directory. */
- if (!mkdtemp(dirname))
- ksft_exit_fail_msg("setup_testdir: failed to create tmpdir\n");
- dfd = open(dirname, O_PATH | O_DIRECTORY);
- if (dfd < 0)
- ksft_exit_fail_msg("setup_testdir: failed to open tmpdir\n");
-
- E_mkdirat(dfd, "a", 0755);
- E_mkdirat(dfd, "b", 0755);
- E_mkdirat(dfd, "a/c", 0755);
-
- return dfd;
-}
-
-/* Swap @dirfd/@a and @dirfd/@b constantly. Parent must kill this process. */
-pid_t spawn_attack(int dirfd, char *a, char *b)
-{
- pid_t child = fork();
- if (child != 0)
- return child;
-
- /* If the parent (the test process) dies, kill ourselves too. */
- E_prctl(PR_SET_PDEATHSIG, SIGKILL);
-
- /* Swap @a and @b. */
- for (;;)
- renameat2(dirfd, a, dirfd, b, RENAME_EXCHANGE);
- exit(1);
-}
-
-#define NUM_RENAME_TESTS 2
-#define ROUNDS 400000
-
-const char *flagname(int resolve)
-{
- switch (resolve) {
- case RESOLVE_IN_ROOT:
- return "RESOLVE_IN_ROOT";
- case RESOLVE_BENEATH:
- return "RESOLVE_BENEATH";
- }
- return "(unknown)";
-}
-
-void test_rename_attack(int resolve)
-{
- int dfd, afd;
- pid_t child;
- void (*resultfn)(const char *msg, ...) = ksft_test_result_pass;
- int escapes = 0, other_errs = 0, exdevs = 0, eagains = 0, successes = 0;
-
- struct open_how how = {
- .flags = O_PATH,
- .resolve = resolve,
- };
-
- if (!openat2_supported) {
- how.resolve = 0;
- ksft_print_msg("openat2(2) unsupported -- using openat(2) instead\n");
- }
-
- dfd = setup_testdir();
- afd = openat(dfd, "a", O_PATH);
- if (afd < 0)
- ksft_exit_fail_msg("test_rename_attack: failed to open 'a'\n");
-
- child = spawn_attack(dfd, "a/c", "b");
-
- for (int i = 0; i < ROUNDS; i++) {
- int fd;
- char *victim_path = "c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../..";
-
- if (openat2_supported)
- fd = sys_openat2(afd, victim_path, &how);
- else
- fd = sys_openat(afd, victim_path, &how);
-
- if (fd < 0) {
- if (fd == -EAGAIN)
- eagains++;
- else if (fd == -EXDEV)
- exdevs++;
- else if (fd == -ENOENT)
- escapes++; /* escaped outside and got ENOENT... */
- else
- other_errs++; /* unexpected error */
- } else {
- if (fdequal(fd, afd, NULL))
- successes++;
- else
- escapes++; /* we got an unexpected fd */
- }
- close(fd);
- }
-
- if (escapes > 0)
- resultfn = ksft_test_result_fail;
- ksft_print_msg("non-escapes: EAGAIN=%d EXDEV=%d E<other>=%d success=%d\n",
- eagains, exdevs, other_errs, successes);
- resultfn("rename attack with %s (%d runs, got %d escapes)\n",
- flagname(resolve), ROUNDS, escapes);
-
- /* Should be killed anyway, but might as well make sure. */
- E_kill(child, SIGKILL);
-}
-
-#define NUM_TESTS NUM_RENAME_TESTS
-
-int main(int argc, char **argv)
-{
- ksft_print_header();
- ksft_set_plan(NUM_TESTS);
-
- test_rename_attack(RESOLVE_BENEATH);
- test_rename_attack(RESOLVE_IN_ROOT);
-
- if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0)
- ksft_exit_fail();
- else
- ksft_exit_pass();
-}
diff --git a/tools/testing/selftests/pid_namespace/pid_max.c b/tools/testing/selftests/pid_namespace/pid_max.c
index c9519e7385b66..5d686a09aa153 100644
--- a/tools/testing/selftests/pid_namespace/pid_max.c
+++ b/tools/testing/selftests/pid_namespace/pid_max.c
@@ -12,10 +12,74 @@
#include <syscall.h>
#include <sys/mount.h>
#include <sys/wait.h>
+#include <unistd.h>
#include "kselftest_harness.h"
#include "../pidfd/pidfd.h"
+/*
+ * The kernel computes the minimum allowed pid_max as:
+ * max(RESERVED_PIDS + 1, PIDS_PER_CPU_MIN * num_possible_cpus())
+ * Mirror that here so the test values are always valid.
+ *
+ * Note: glibc's get_nprocs_conf() returns the number of *configured*
+ * (present) CPUs, not *possible* CPUs. The kernel uses
+ * num_possible_cpus() which corresponds to /sys/devices/system/cpu/possible.
+ * These can differ significantly (e.g. 16 configured vs 128 possible).
+ */
+#define RESERVED_PIDS 300
+#define PIDS_PER_CPU_MIN 8
+
+/* Count CPUs from a range list like "0-31" or "0-15,32-47". */
+static int num_possible_cpus(void)
+{
+ FILE *f;
+ int count = 0;
+ int lo, hi;
+
+ f = fopen("/sys/devices/system/cpu/possible", "r");
+ if (!f)
+ return 0;
+
+ while (fscanf(f, "%d", &lo) == 1) {
+ if (fscanf(f, "-%d", &hi) == 1)
+ count += hi - lo + 1;
+ else
+ count++;
+ /* skip comma separator */
+ fscanf(f, ",");
+ }
+
+ fclose(f);
+ return count;
+}
+
+static int pid_min(void)
+{
+ int cpu_min = PIDS_PER_CPU_MIN * num_possible_cpus();
+
+ return cpu_min > (RESERVED_PIDS + 1) ? cpu_min : (RESERVED_PIDS + 1);
+}
+
+/*
+ * Outer and inner pid_max limits used by the tests. The outer limit is
+ * the more restrictive ancestor; the inner limit is set higher in a
+ * nested namespace but must still be capped by the outer limit.
+ * Both are derived from the kernel's minimum so they are always writable.
+ *
+ * Global so that clone callbacks can access them without parameter plumbing.
+ */
+static int outer_limit;
+static int inner_limit;
+
+static int write_int_to_fd(int fd, int val)
+{
+ char buf[12];
+ int len = snprintf(buf, sizeof(buf), "%d", val);
+
+ return write(fd, buf, len);
+}
+
#define __STACK_SIZE (8 * 1024 * 1024)
static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
{
@@ -60,18 +124,18 @@ static int pid_max_cb(void *data)
return -1;
}
- ret = write(fd, "500", sizeof("500") - 1);
+ ret = write_int_to_fd(fd, inner_limit);
if (ret < 0) {
fprintf(stderr, "%m - Failed to write pid_max\n");
return -1;
}
- for (int i = 0; i < 501; i++) {
+ for (int i = 0; i < inner_limit + 1; i++) {
pid = fork();
if (pid == 0)
exit(EXIT_SUCCESS);
wait_for_pid(pid);
- if (pid > 500) {
+ if (pid > inner_limit) {
fprintf(stderr, "Managed to create pid number beyond limit\n");
return -1;
}
@@ -106,7 +170,7 @@ static int pid_max_nested_inner(void *data)
return fret;
}
- ret = write(fd, "500", sizeof("500") - 1);
+ ret = write_int_to_fd(fd, inner_limit);
close(fd);
if (ret < 0) {
fprintf(stderr, "%m - Failed to write pid_max\n");
@@ -133,8 +197,8 @@ static int pid_max_nested_inner(void *data)
return fret;
}
- /* Now make sure that we wrap pids at 400. */
- for (i = 0; i < 510; i++) {
+ /* Now make sure that we wrap pids at outer_limit. */
+ for (i = 0; i < inner_limit + 10; i++) {
pid_t pid;
pid = fork();
@@ -145,7 +209,7 @@ static int pid_max_nested_inner(void *data)
exit(EXIT_SUCCESS);
wait_for_pid(pid);
- if (pid >= 500) {
+ if (pid >= inner_limit) {
fprintf(stderr, "Managed to create process with pid %d beyond configured limit\n", pid);
return fret;
}
@@ -156,15 +220,19 @@ static int pid_max_nested_inner(void *data)
static int pid_max_nested_outer(void *data)
{
- int fret = -1, nr_procs = 400;
- pid_t pids[1000];
- int fd, i, ret;
+ int fret = -1, nr_procs = 0;
+ pid_t *pids;
+ int fd, ret;
pid_t pid;
+ pids = malloc(outer_limit * sizeof(pid_t));
+ if (!pids)
+ return -1;
+
ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0);
if (ret) {
fprintf(stderr, "%m - Failed to make rootfs private mount\n");
- return fret;
+ goto out;
}
umount2("/proc", MNT_DETACH);
@@ -172,27 +240,28 @@ static int pid_max_nested_outer(void *data)
ret = mount("proc", "/proc", "proc", 0, NULL);
if (ret) {
fprintf(stderr, "%m - Failed to mount proc\n");
- return fret;
+ goto out;
}
fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY);
if (fd < 0) {
fprintf(stderr, "%m - Failed to open pid_max\n");
- return fret;
+ goto out;
}
- ret = write(fd, "400", sizeof("400") - 1);
+ ret = write_int_to_fd(fd, outer_limit);
close(fd);
if (ret < 0) {
fprintf(stderr, "%m - Failed to write pid_max\n");
- return fret;
+ goto out;
}
/*
- * Create 397 processes. This leaves room for do_clone() (398) and
- * one more 399. So creating another process needs to fail.
+ * Create (outer_limit - 4) processes. This leaves room for
+ * do_clone() and one more. So creating another process needs
+ * to fail.
*/
- for (nr_procs = 0; nr_procs < 396; nr_procs++) {
+ for (nr_procs = 0; nr_procs < outer_limit - 4; nr_procs++) {
pid = fork();
if (pid < 0)
goto reap;
@@ -220,20 +289,26 @@ reap:
for (int i = 0; i < nr_procs; i++)
wait_for_pid(pids[i]);
+out:
+ free(pids);
return fret;
}
static int pid_max_nested_limit_inner(void *data)
{
- int fret = -1, nr_procs = 400;
+ int fret = -1, nr_procs = 0;
int fd, ret;
pid_t pid;
- pid_t pids[1000];
+ pid_t *pids;
+
+ pids = malloc(inner_limit * sizeof(pid_t));
+ if (!pids)
+ return -1;
ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0);
if (ret) {
fprintf(stderr, "%m - Failed to make rootfs private mount\n");
- return fret;
+ goto out;
}
umount2("/proc", MNT_DETACH);
@@ -241,23 +316,23 @@ static int pid_max_nested_limit_inner(void *data)
ret = mount("proc", "/proc", "proc", 0, NULL);
if (ret) {
fprintf(stderr, "%m - Failed to mount proc\n");
- return fret;
+ goto out;
}
fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY);
if (fd < 0) {
fprintf(stderr, "%m - Failed to open pid_max\n");
- return fret;
+ goto out;
}
- ret = write(fd, "500", sizeof("500") - 1);
+ ret = write_int_to_fd(fd, inner_limit);
close(fd);
if (ret < 0) {
fprintf(stderr, "%m - Failed to write pid_max\n");
- return fret;
+ goto out;
}
- for (nr_procs = 0; nr_procs < 500; nr_procs++) {
+ for (nr_procs = 0; nr_procs < inner_limit; nr_procs++) {
pid = fork();
if (pid < 0)
break;
@@ -268,7 +343,7 @@ static int pid_max_nested_limit_inner(void *data)
pids[nr_procs] = pid;
}
- if (nr_procs >= 400) {
+ if (nr_procs >= outer_limit) {
fprintf(stderr, "Managed to create processes beyond the configured outer limit\n");
goto reap;
}
@@ -279,6 +354,8 @@ reap:
for (int i = 0; i < nr_procs; i++)
wait_for_pid(pids[i]);
+out:
+ free(pids);
return fret;
}
@@ -307,7 +384,7 @@ static int pid_max_nested_limit_outer(void *data)
return -1;
}
- ret = write(fd, "400", sizeof("400") - 1);
+ ret = write_int_to_fd(fd, outer_limit);
close(fd);
if (ret < 0) {
fprintf(stderr, "%m - Failed to write pid_max\n");
@@ -328,17 +405,32 @@ static int pid_max_nested_limit_outer(void *data)
return 0;
}
-TEST(pid_max_simple)
+FIXTURE(pid_max) {
+ int dummy;
+};
+
+FIXTURE_SETUP(pid_max)
{
- pid_t pid;
+ int min = pid_min();
+
+ outer_limit = min + 100;
+ inner_limit = min + 200;
+}
+FIXTURE_TEARDOWN(pid_max)
+{
+}
+
+TEST_F(pid_max, simple)
+{
+ pid_t pid;
pid = do_clone(pid_max_cb, NULL, CLONE_NEWPID | CLONE_NEWNS);
ASSERT_GT(pid, 0);
ASSERT_EQ(0, wait_for_pid(pid));
}
-TEST(pid_max_nested_limit)
+TEST_F(pid_max, nested_limit)
{
pid_t pid;
@@ -347,7 +439,7 @@ TEST(pid_max_nested_limit)
ASSERT_EQ(0, wait_for_pid(pid));
}
-TEST(pid_max_nested)
+TEST_F(pid_max, nested)
{
pid_t pid;