diff options
| author | Mark Brown <broonie@kernel.org> | 2026-05-29 18:09:32 +0100 |
|---|---|---|
| committer | Mark Brown <broonie@kernel.org> | 2026-05-29 18:09:32 +0100 |
| commit | 5f74287f42d47e9acdc9a987518387125f046527 (patch) | |
| tree | a8c7e4a5ad67952a170f269f99418a4ab50a5318 /tools | |
| parent | 96c3d0c2555e1b97c57348e83702f3b56b8df9d3 (diff) | |
| parent | 982071afc4e24a052d84132ffbf4340856924c28 (diff) | |
| download | linux-next-history-5f74287f42d47e9acdc9a987518387125f046527.tar.gz | |
Merge branch 'fs-next' of linux-next
# Conflicts:
# fs/btrfs/defrag.c
Diffstat (limited to 'tools')
20 files changed, 936 insertions, 696 deletions
diff --git a/tools/arch/alpha/include/uapi/asm/errno.h b/tools/arch/alpha/include/uapi/asm/errno.h index 6791f6508632e..1a99f38813c79 100644 --- a/tools/arch/alpha/include/uapi/asm/errno.h +++ b/tools/arch/alpha/include/uapi/asm/errno.h @@ -127,4 +127,6 @@ #define EHWPOISON 139 /* Memory page has hardware error */ +#define EFTYPE 140 /* Wrong file type for the intended operation */ + #endif diff --git a/tools/arch/mips/include/uapi/asm/errno.h b/tools/arch/mips/include/uapi/asm/errno.h index c01ed91b1ef44..1835a50b69cef 100644 --- a/tools/arch/mips/include/uapi/asm/errno.h +++ b/tools/arch/mips/include/uapi/asm/errno.h @@ -126,6 +126,8 @@ #define EHWPOISON 168 /* Memory page has hardware error */ +#define EFTYPE 169 /* Wrong file type for the intended operation */ + #define EDQUOT 1133 /* Quota exceeded */ diff --git a/tools/arch/parisc/include/uapi/asm/errno.h b/tools/arch/parisc/include/uapi/asm/errno.h index 8cbc07c1903e4..93194fbb0a80d 100644 --- a/tools/arch/parisc/include/uapi/asm/errno.h +++ b/tools/arch/parisc/include/uapi/asm/errno.h @@ -124,4 +124,6 @@ #define EHWPOISON 257 /* Memory page has hardware error */ +#define EFTYPE 258 /* Wrong file type for the intended operation */ + #endif diff --git a/tools/arch/sparc/include/uapi/asm/errno.h b/tools/arch/sparc/include/uapi/asm/errno.h index 4a41e7835fd5b..71940ec9130b4 100644 --- a/tools/arch/sparc/include/uapi/asm/errno.h +++ b/tools/arch/sparc/include/uapi/asm/errno.h @@ -117,4 +117,6 @@ #define EHWPOISON 135 /* Memory page has hardware error */ +#define EFTYPE 136 /* Wrong file type for the intended operation */ + #endif diff --git a/tools/include/uapi/asm-generic/errno.h b/tools/include/uapi/asm-generic/errno.h index 92e7ae493ee31..bd78e69e0a43f 100644 --- a/tools/include/uapi/asm-generic/errno.h +++ b/tools/include/uapi/asm-generic/errno.h @@ -122,4 +122,6 @@ #define EHWPOISON 133 /* Memory page has hardware error */ +#define EFTYPE 134 /* Wrong file type for the intended operation */ + #endif diff --git a/tools/include/uapi/linux/openat2.h b/tools/include/uapi/linux/openat2.h new file mode 100644 index 0000000000000..4759c471676cc --- /dev/null +++ b/tools/include/uapi/linux/openat2.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _LINUX_OPENAT2_H +#define _LINUX_OPENAT2_H + +#include <linux/types.h> + +/* + * Arguments for how openat2(2) should open the target path. If only @flags and + * @mode are non-zero, then openat2(2) operates very similarly to openat(2). + * + * However, unlike openat(2), unknown or invalid bits in @flags result in + * -EINVAL rather than being silently ignored. @mode must be zero unless one of + * {O_CREAT, O_TMPFILE} are set. + * + * @flags: O_* flags. + * @mode: O_CREAT/O_TMPFILE file mode. + * @resolve: RESOLVE_* flags. + */ +struct open_how { + __u64 flags; + __u64 mode; + __u64 resolve; +}; + +/* how->resolve flags for openat2(2). */ +#define RESOLVE_NO_XDEV 0x01 /* Block mount-point crossings + (includes bind-mounts). */ +#define RESOLVE_NO_MAGICLINKS 0x02 /* Block traversal through procfs-style + "magic-links". */ +#define RESOLVE_NO_SYMLINKS 0x04 /* Block traversal through all symlinks + (implies OEXT_NO_MAGICLINKS) */ +#define RESOLVE_BENEATH 0x08 /* Block "lexical" trickery like + "..", symlinks, and absolute + paths which escape the dirfd. */ +#define RESOLVE_IN_ROOT 0x10 /* Make all jumps to "/" and ".." + be scoped inside the dirfd + (similar to chroot(2)). */ +#define RESOLVE_CACHED 0x20 /* Only complete if resolution can be + completed through cached lookup. May + return -EAGAIN if that's not + possible. */ + +#endif /* _LINUX_OPENAT2_H */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h index 13f71202845e8..2ea4c81df08f1 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fs.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h @@ -254,6 +254,13 @@ struct file_attr { #define FS_XFLAG_DAX 0x00008000 /* use DAX for IO */ #define FS_XFLAG_COWEXTSIZE 0x00010000 /* CoW extent size allocator hint */ #define FS_XFLAG_VERITY 0x00020000 /* fs-verity enabled */ +/* + * Case handling flags (read-only, cannot be set via ioctl). + * Default (neither set) indicates POSIX semantics: case-sensitive + * lookups and case-preserving storage. + */ +#define FS_XFLAG_CASEFOLD 0x00040000 /* case-insensitive lookups */ +#define FS_XFLAG_CASENONPRESERVING 0x00080000 /* case not preserved */ #define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ /* the read-only stuff doesn't really belong here, but any other place is diff --git a/tools/testing/selftests/openat2/.gitignore b/tools/testing/selftests/filesystems/openat2/.gitignore index 82a4846cbc4b2..82a4846cbc4b2 100644 --- a/tools/testing/selftests/openat2/.gitignore +++ b/tools/testing/selftests/filesystems/openat2/.gitignore diff --git a/tools/testing/selftests/openat2/Makefile b/tools/testing/selftests/filesystems/openat2/Makefile index 185dc76ebb5fc..d848aac96bded 100644 --- a/tools/testing/selftests/openat2/Makefile +++ b/tools/testing/selftests/filesystems/openat2/Makefile @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-or-later -CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined -TEST_GEN_PROGS := openat2_test resolve_test rename_attack_test +CFLAGS += $(KHDR_INCLUDES) +CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined $(TOOLS_INCLUDES) +TEST_GEN_PROGS := openat2_test resolve_test rename_attack_test emptypath_test # gcc requires -static-libasan in order to ensure that Address Sanitizer's # library is the first one loaded. However, clang already statically links the @@ -13,6 +14,4 @@ endif LOCAL_HDRS += helpers.h -include ../lib.mk - -$(TEST_GEN_PROGS): helpers.c +include ../../lib.mk diff --git a/tools/testing/selftests/filesystems/openat2/emptypath_test.c b/tools/testing/selftests/filesystems/openat2/emptypath_test.c new file mode 100644 index 0000000000000..be37ccba57ecf --- /dev/null +++ b/tools/testing/selftests/filesystems/openat2/emptypath_test.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#define _GNU_SOURCE +#define __SANE_USERSPACE_TYPES__ +#include <fcntl.h> +#include <unistd.h> +#include <errno.h> +#include <string.h> +#include <sys/stat.h> + +#include "kselftest_harness.h" + +#ifndef O_EMPTYPATH +#define O_EMPTYPATH (1 << 26) +#endif + +#define EMPTYPATH_TEST_FILE "/tmp/emptypath_test" + +FIXTURE(emptypath) { + int opath_fd; +}; + +FIXTURE_SETUP(emptypath) +{ + int fd; + + self->opath_fd = -1; + + fd = open(EMPTYPATH_TEST_FILE, O_CREAT | O_WRONLY, S_IRWXU); + ASSERT_GE(fd, 0) { + TH_LOG("create %s: %s", EMPTYPATH_TEST_FILE, strerror(errno)); + } + close(fd); + + self->opath_fd = open(EMPTYPATH_TEST_FILE, O_PATH); + ASSERT_GE(self->opath_fd, 0) { + TH_LOG("open %s O_PATH: %s", EMPTYPATH_TEST_FILE, strerror(errno)); + } +} + +FIXTURE_TEARDOWN(emptypath) +{ + if (self->opath_fd >= 0) + close(self->opath_fd); + unlink(EMPTYPATH_TEST_FILE); +} + +/* An empty path is rejected with ENOENT unless O_EMPTYPATH is set. */ +TEST_F(emptypath, without_flag_returns_enoent) +{ + int fd = openat(self->opath_fd, "", O_RDONLY); + + if (fd >= 0) + close(fd); + ASSERT_LT(fd, 0) { + TH_LOG("empty path without O_EMPTYPATH unexpectedly succeeded"); + } + EXPECT_EQ(errno, ENOENT) { + TH_LOG("expected ENOENT, got %s", strerror(errno)); + } +} + +/* O_EMPTYPATH reopens the O_PATH fd through an empty path. */ +TEST_F(emptypath, reopens_opath_fd) +{ + int fd = openat(self->opath_fd, "", O_RDONLY | O_EMPTYPATH); + + if (fd < 0 && errno == EINVAL) + SKIP(return, "O_EMPTYPATH not supported"); + + ASSERT_GE(fd, 0) { + TH_LOG("O_EMPTYPATH failed: %s", strerror(errno)); + } + close(fd); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/filesystems/openat2/helpers.h b/tools/testing/selftests/filesystems/openat2/helpers.h new file mode 100644 index 0000000000000..3f01fb68c5a6d --- /dev/null +++ b/tools/testing/selftests/filesystems/openat2/helpers.h @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Author: Aleksa Sarai <cyphar@cyphar.com> + * Copyright (C) 2018-2019 SUSE LLC. + * Copyright (C) 2026 Amutable GmbH + */ + +#ifndef __RESOLVEAT_H__ +#define __RESOLVEAT_H__ + +#define _GNU_SOURCE +#include <stdint.h> +#include <stdbool.h> +#include <errno.h> +#include <limits.h> +#include <linux/types.h> +#include <linux/unistd.h> +#include <linux/openat2.h> +#include "kselftest_harness.h" + +#define BUILD_BUG_ON(e) ((void)(sizeof(struct { int:(-!!(e)); }))) + +#define OPEN_HOW_SIZE_VER0 24 /* sizeof first published struct */ +#define OPEN_HOW_SIZE_LATEST OPEN_HOW_SIZE_VER0 + +__maybe_unused +static bool needs_openat2(const struct open_how *how) +{ + return how->resolve != 0; +} + +__maybe_unused +static int raw_openat2(int dfd, const char *path, void *how, size_t size) +{ + int ret = syscall(__NR_openat2, dfd, path, how, size); + + return ret >= 0 ? ret : -errno; +} + +__maybe_unused +static int sys_openat2(int dfd, const char *path, struct open_how *how) +{ + return raw_openat2(dfd, path, how, sizeof(*how)); +} + +__maybe_unused +static int sys_openat(int dfd, const char *path, struct open_how *how) +{ + int ret = openat(dfd, path, how->flags, how->mode); + + return ret >= 0 ? ret : -errno; +} + +__maybe_unused +static int sys_renameat2(int olddirfd, const char *oldpath, + int newdirfd, const char *newpath, unsigned int flags) +{ + int ret = syscall(__NR_renameat2, olddirfd, oldpath, + newdirfd, newpath, flags); + + return ret >= 0 ? ret : -errno; +} + +__maybe_unused +static int touchat(int dfd, const char *path) +{ + int fd = openat(dfd, path, O_CREAT, 0700); + + if (fd >= 0) + close(fd); + return fd; +} + +__maybe_unused +static char *fdreadlink(struct __test_metadata *_metadata, int fd) +{ + char *target, *tmp; + + ASSERT_GT(asprintf(&tmp, "/proc/self/fd/%d", fd), 0); + + target = malloc(PATH_MAX); + ASSERT_NE(target, NULL); + memset(target, 0, PATH_MAX); + + ASSERT_GT(readlink(tmp, target, PATH_MAX), 0); + + free(tmp); + return target; +} + +__maybe_unused +static bool fdequal(struct __test_metadata *_metadata, int fd, + int dfd, const char *path) +{ + char *fdpath, *dfdpath, *other; + bool cmp; + + fdpath = fdreadlink(_metadata, fd); + dfdpath = fdreadlink(_metadata, dfd); + + if (!path) { + ASSERT_GT(asprintf(&other, "%s", dfdpath), 0); + } else if (*path == '/') { + ASSERT_GT(asprintf(&other, "%s", path), 0); + } else { + ASSERT_GT(asprintf(&other, "%s/%s", dfdpath, path), 0); + } + + cmp = !strcmp(fdpath, other); + + free(fdpath); + free(dfdpath); + free(other); + return cmp; +} + +static bool openat2_supported = false; + +__attribute__((constructor)) +static void __detect_openat2_supported(void) +{ + struct open_how how = {}; + int fd; + + BUILD_BUG_ON(sizeof(struct open_how) != OPEN_HOW_SIZE_VER0); + + /* Check openat2(2) support. */ + fd = sys_openat2(AT_FDCWD, ".", &how); + openat2_supported = (fd >= 0); + + if (fd >= 0) + close(fd); +} + +#endif /* __RESOLVEAT_H__ */ diff --git a/tools/testing/selftests/openat2/openat2_test.c b/tools/testing/selftests/filesystems/openat2/openat2_test.c index 0e161ef9e9e48..6f5afbe2d8d31 100644 --- a/tools/testing/selftests/openat2/openat2_test.c +++ b/tools/testing/selftests/filesystems/openat2/openat2_test.c @@ -15,8 +15,8 @@ #include <stdbool.h> #include <string.h> -#include "kselftest.h" #include "helpers.h" +#include "kselftest_harness.h" /* * O_LARGEFILE is set to 0 by glibc. @@ -45,13 +45,29 @@ struct struct_test { int err; }; -#define NUM_OPENAT2_STRUCT_TESTS 7 -#define NUM_OPENAT2_STRUCT_VARIATIONS 13 +struct flag_test { + const char *name; + struct open_how how; + int err; +}; + +FIXTURE(openat2) {}; -void test_openat2_struct(void) +FIXTURE_SETUP(openat2) { - int misalignments[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 17, 87 }; + if (!openat2_supported) + SKIP(return, "openat2(2) not supported"); +} + +FIXTURE_TEARDOWN(openat2) {} +/* + * Verify that the struct size and misalignment handling for openat2(2) is + * correct, including that is_zeroed_user() works. + */ +TEST_F(openat2, struct_argument_sizes) +{ + int misalignments[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 17, 87 }; struct struct_test tests[] = { /* Normal struct. */ { .name = "normal struct", @@ -83,26 +99,14 @@ void test_openat2_struct(void) .size = sizeof(struct open_how_ext), .err = -E2BIG }, }; - BUILD_BUG_ON(ARRAY_LEN(misalignments) != NUM_OPENAT2_STRUCT_VARIATIONS); - BUILD_BUG_ON(ARRAY_LEN(tests) != NUM_OPENAT2_STRUCT_TESTS); - - for (int i = 0; i < ARRAY_LEN(tests); i++) { + for (int i = 0; i < ARRAY_SIZE(tests); i++) { struct struct_test *test = &tests[i]; struct open_how_ext how_ext = test->arg; - for (int j = 0; j < ARRAY_LEN(misalignments); j++) { + for (int j = 0; j < ARRAY_SIZE(misalignments); j++) { int fd, misalign = misalignments[j]; - char *fdpath = NULL; - bool failed; - void (*resultfn)(const char *msg, ...) = ksft_test_result_pass; - void *copy = NULL, *how_copy = &how_ext; - - if (!openat2_supported) { - ksft_print_msg("openat2(2) unsupported\n"); - resultfn = ksft_test_result_skip; - goto skip; - } + char *fdpath = NULL; if (misalign) { /* @@ -119,50 +123,42 @@ void test_openat2_struct(void) } fd = raw_openat2(AT_FDCWD, ".", how_copy, test->size); - if (test->err >= 0) - failed = (fd < 0); - else - failed = (fd != test->err); if (fd >= 0) { - fdpath = fdreadlink(fd); + fdpath = fdreadlink(_metadata, fd); close(fd); } - if (failed) { - resultfn = ksft_test_result_fail; - - ksft_print_msg("openat2 unexpectedly returned "); - if (fdpath) - ksft_print_msg("%d['%s']\n", fd, fdpath); - else - ksft_print_msg("%d (%s)\n", fd, strerror(-fd)); + if (test->err >= 0) { + EXPECT_GE(fd, 0) { + TH_LOG("openat2 with %s [misalign=%d] should succeed, got %d (%s)", + test->name, misalign, + fd, strerror(-fd)); + } + } else { + EXPECT_EQ(test->err, fd) { + if (fdpath) + TH_LOG("openat2 with %s [misalign=%d] should fail with %d (%s), got %d['%s']", + test->name, misalign, + test->err, + strerror(-test->err), + fd, fdpath); + else + TH_LOG("openat2 with %s [misalign=%d] should fail with %d (%s), got %d (%s)", + test->name, misalign, + test->err, + strerror(-test->err), + fd, strerror(-fd)); + } } -skip: - if (test->err >= 0) - resultfn("openat2 with %s argument [misalign=%d] succeeds\n", - test->name, misalign); - else - resultfn("openat2 with %s argument [misalign=%d] fails with %d (%s)\n", - test->name, misalign, test->err, - strerror(-test->err)); - free(copy); free(fdpath); - fflush(stdout); } } } -struct flag_test { - const char *name; - struct open_how how; - int err; -}; - -#define NUM_OPENAT2_FLAG_TESTS 25 - -void test_openat2_flags(void) +/* Verify openat2(2) flag and mode validation. */ +TEST_F(openat2, flag_validation) { struct flag_test tests[] = { /* O_TMPFILE is incompatible with O_PATH and O_CREAT. */ @@ -241,20 +237,10 @@ void test_openat2_flags(void) .how.resolve = 0, .err = -EINVAL }, }; - BUILD_BUG_ON(ARRAY_LEN(tests) != NUM_OPENAT2_FLAG_TESTS); - - for (int i = 0; i < ARRAY_LEN(tests); i++) { + for (int i = 0; i < ARRAY_SIZE(tests); i++) { int fd, fdflags = -1; char *path, *fdpath = NULL; - bool failed = false; struct flag_test *test = &tests[i]; - void (*resultfn)(const char *msg, ...) = ksft_test_result_pass; - - if (!openat2_supported) { - ksft_print_msg("openat2(2) unsupported\n"); - resultfn = ksft_test_result_skip; - goto skip; - } path = (test->how.flags & O_CREAT) ? "/tmp/ksft.openat2_tmpfile" : "."; unlink(path); @@ -265,74 +251,112 @@ void test_openat2_flags(void) * Skip the testcase if it failed because not supported * by FS. (e.g. a valid O_TMPFILE combination on NFS) */ - ksft_test_result_skip("openat2 with %s fails with %d (%s)\n", - test->name, fd, strerror(-fd)); - goto next; + TH_LOG("openat2 with %s not supported by FS -- skipping", + test->name); + continue; } - if (test->err >= 0) - failed = (fd < 0); - else - failed = (fd != test->err); - if (fd >= 0) { - int otherflags; - - fdpath = fdreadlink(fd); - fdflags = fcntl(fd, F_GETFL); - otherflags = fcntl(fd, F_GETFD); - close(fd); + if (test->err >= 0) { + EXPECT_GE(fd, 0) { + TH_LOG("openat2 with %s should succeed, got %d (%s)", + test->name, fd, strerror(-fd)); + } + if (fd >= 0) { + int otherflags; - E_assert(fdflags >= 0, "fcntl F_GETFL of new fd"); - E_assert(otherflags >= 0, "fcntl F_GETFD of new fd"); + fdpath = fdreadlink(_metadata, fd); + fdflags = fcntl(fd, F_GETFL); + otherflags = fcntl(fd, F_GETFD); + close(fd); - /* O_CLOEXEC isn't shown in F_GETFL. */ - if (otherflags & FD_CLOEXEC) - fdflags |= O_CLOEXEC; - /* O_CREAT is hidden from F_GETFL. */ - if (test->how.flags & O_CREAT) - fdflags |= O_CREAT; - if (!(test->how.flags & O_LARGEFILE)) - fdflags &= ~O_LARGEFILE; - failed |= (fdflags != test->how.flags); - } + ASSERT_GE(fdflags, 0); + ASSERT_GE(otherflags, 0); - if (failed) { - resultfn = ksft_test_result_fail; + /* O_CLOEXEC isn't shown in F_GETFL. */ + if (otherflags & FD_CLOEXEC) + fdflags |= O_CLOEXEC; + /* O_CREAT is hidden from F_GETFL. */ + if (test->how.flags & O_CREAT) + fdflags |= O_CREAT; + if (!(test->how.flags & O_LARGEFILE)) + fdflags &= ~O_LARGEFILE; - ksft_print_msg("openat2 unexpectedly returned "); - if (fdpath) - ksft_print_msg("%d['%s'] with %X (!= %llX)\n", - fd, fdpath, fdflags, - test->how.flags); - else - ksft_print_msg("%d (%s)\n", fd, strerror(-fd)); + EXPECT_EQ(fdflags, (int)test->how.flags) { + TH_LOG("openat2 with %s: flags mismatch %X != %llX", + test->name, fdflags, + (unsigned long long)test->how.flags); + } + } + } else { + EXPECT_EQ(test->err, fd) { + if (fd >= 0) { + fdpath = fdreadlink(_metadata, fd); + TH_LOG("openat2 with %s should fail with %d (%s), got %d['%s']", + test->name, test->err, + strerror(-test->err), + fd, fdpath); + } else { + TH_LOG("openat2 with %s should fail with %d (%s), got %d (%s)", + test->name, test->err, + strerror(-test->err), + fd, strerror(-fd)); + } + } + if (fd >= 0) + close(fd); } -skip: - if (test->err >= 0) - resultfn("openat2 with %s succeeds\n", test->name); - else - resultfn("openat2 with %s fails with %d (%s)\n", - test->name, test->err, strerror(-test->err)); -next: free(fdpath); - fflush(stdout); } } -#define NUM_TESTS (NUM_OPENAT2_STRUCT_VARIATIONS * NUM_OPENAT2_STRUCT_TESTS + \ - NUM_OPENAT2_FLAG_TESTS) +#ifndef OPENAT2_REGULAR +#define OPENAT2_REGULAR ((__u64)1 << 32) +#endif + +#ifndef EFTYPE +#define EFTYPE 134 +#endif + +/* Kernel-internal carrier for OPENAT2_REGULAR (see __O_REGULAR in fcntl.h). */ +#ifndef __O_REGULAR +#define __O_REGULAR (1 << 30) +#endif -int main(int argc, char **argv) +/* Verify that OPENAT2_REGULAR rejects non-regular files with EFTYPE. */ +TEST_F(openat2, regular_flag) { - ksft_print_header(); - ksft_set_plan(NUM_TESTS); + struct open_how how = { + .flags = OPENAT2_REGULAR | O_RDONLY, + }; + int fd; - test_openat2_struct(); - test_openat2_flags(); + fd = sys_openat2(AT_FDCWD, "/dev/null", &how); + if (fd == -ENOENT) + SKIP(return, "/dev/null does not exist"); - if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0) - ksft_exit_fail(); - else - ksft_exit_pass(); + EXPECT_EQ(-EFTYPE, fd) { + TH_LOG("openat2 with OPENAT2_REGULAR should fail with %d (%s), got %d (%s)", + -EFTYPE, strerror(EFTYPE), fd, strerror(-fd)); + } + if (fd >= 0) + close(fd); } + +/* open()/openat() must keep ignoring the internal __O_REGULAR bit. */ +TEST(legacy_openat_ignores_o_regular) +{ + int fd; + + fd = openat(AT_FDCWD, "/dev/null", O_RDONLY | __O_REGULAR); + if (fd < 0 && errno == ENOENT) + SKIP(return, "/dev/null does not exist"); + + ASSERT_GE(fd, 0) { + TH_LOG("legacy openat() must ignore the __O_REGULAR carrier bit, got errno %d (%s)", + errno, strerror(errno)); + } + close(fd); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/filesystems/openat2/rename_attack_test.c b/tools/testing/selftests/filesystems/openat2/rename_attack_test.c new file mode 100644 index 0000000000000..1f33c34f56be9 --- /dev/null +++ b/tools/testing/selftests/filesystems/openat2/rename_attack_test.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Author: Aleksa Sarai <cyphar@cyphar.com> + * Copyright (C) 2018-2019 SUSE LLC. + */ + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <sched.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/mount.h> +#include <sys/mman.h> +#include <sys/prctl.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <string.h> +#include <syscall.h> +#include <limits.h> +#include <unistd.h> + +#include "helpers.h" +#include "kselftest_harness.h" + +#define ROUNDS 400000 + +/* Swap @dirfd/@a and @dirfd/@b constantly. Parent must kill this process. */ +pid_t spawn_attack(struct __test_metadata *_metadata, + int dirfd, char *a, char *b) +{ + pid_t child = fork(); + if (child != 0) + return child; + + /* If the parent (the test process) dies, kill ourselves too. */ + ASSERT_EQ(prctl(PR_SET_PDEATHSIG, SIGKILL), 0); + + /* Swap @a and @b. */ + for (;;) + renameat2(dirfd, a, dirfd, b, RENAME_EXCHANGE); + exit(1); +} + +/* + * Construct a test directory with the following structure: + * + * root/ + * |-- a/ + * | `-- c/ + * `-- b/ + */ +FIXTURE(rename_attack) { + int dfd; + int afd; + pid_t child; +}; + +FIXTURE_SETUP(rename_attack) +{ + char dirname[] = "/tmp/ksft-openat2-rename-attack.XXXXXX"; + + self->dfd = -1; + self->afd = -1; + self->child = 0; + + /* Make the top-level directory. */ + ASSERT_NE(mkdtemp(dirname), NULL); + self->dfd = open(dirname, O_PATH | O_DIRECTORY); + ASSERT_GE(self->dfd, 0); + + ASSERT_EQ(mkdirat(self->dfd, "a", 0755), 0); + ASSERT_EQ(mkdirat(self->dfd, "b", 0755), 0); + ASSERT_EQ(mkdirat(self->dfd, "a/c", 0755), 0); + + self->afd = openat(self->dfd, "a", O_PATH); + ASSERT_GE(self->afd, 0); + + self->child = spawn_attack(_metadata, self->dfd, "a/c", "b"); + ASSERT_GT(self->child, 0); +} + +FIXTURE_TEARDOWN(rename_attack) +{ + if (self->child > 0) + kill(self->child, SIGKILL); + if (self->afd >= 0) + close(self->afd); + if (self->dfd >= 0) + close(self->dfd); +} + +FIXTURE_VARIANT(rename_attack) { + int resolve; + const char *name; +}; + +FIXTURE_VARIANT_ADD(rename_attack, resolve_beneath) { + .resolve = RESOLVE_BENEATH, + .name = "RESOLVE_BENEATH", +}; + +FIXTURE_VARIANT_ADD(rename_attack, resolve_in_root) { + .resolve = RESOLVE_IN_ROOT, + .name = "RESOLVE_IN_ROOT", +}; + +TEST_F_TIMEOUT(rename_attack, test, 120) +{ + int escapes = 0, successes = 0, other_errs = 0, exdevs = 0, eagains = 0; + char *victim_path = "c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../.."; + struct open_how how = { + .flags = O_PATH, + .resolve = variant->resolve, + }; + + if (!openat2_supported) { + how.resolve = 0; + TH_LOG("openat2(2) unsupported -- using openat(2) instead"); + } + + for (int i = 0; i < ROUNDS; i++) { + int fd; + + if (openat2_supported) + fd = sys_openat2(self->afd, victim_path, &how); + else + fd = sys_openat(self->afd, victim_path, &how); + + if (fd < 0) { + if (fd == -EAGAIN) + eagains++; + else if (fd == -EXDEV) + exdevs++; + else if (fd == -ENOENT) + escapes++; /* escaped outside and got ENOENT... */ + else + other_errs++; /* unexpected error */ + } else { + if (fdequal(_metadata, fd, self->afd, NULL)) + successes++; + else + escapes++; /* we got an unexpected fd */ + } + if (fd >= 0) + close(fd); + } + + TH_LOG("non-escapes: EAGAIN=%d EXDEV=%d E<other>=%d success=%d", + eagains, exdevs, other_errs, successes); + ASSERT_EQ(escapes, 0) { + TH_LOG("rename attack with %s (%d runs, got %d escapes)", + variant->name, ROUNDS, escapes); + } +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/openat2/resolve_test.c b/tools/testing/selftests/filesystems/openat2/resolve_test.c index a76ef15ceb90a..eacde59ce1583 100644 --- a/tools/testing/selftests/openat2/resolve_test.c +++ b/tools/testing/selftests/filesystems/openat2/resolve_test.c @@ -14,8 +14,81 @@ #include <stdbool.h> #include <string.h> -#include "kselftest.h" #include "helpers.h" +#include "kselftest_harness.h" + +struct resolve_test { + const char *name; + const char *dir; + const char *path; + struct open_how how; + bool pass; + union { + int err; + const char *path; + } out; +}; + +/* + * Verify a single resolve test case. This must be called from within a TEST_F + * function with _metadata in scope. + */ +static void verify_resolve_test(struct __test_metadata *_metadata, + int rootfd, int hardcoded_fd, + const struct resolve_test *test) +{ + struct open_how how = test->how; + int dfd, fd; + char *fdpath = NULL; + + /* Auto-set O_PATH. */ + if (!(how.flags & O_CREAT)) + how.flags |= O_PATH; + + if (test->dir) + dfd = openat(rootfd, test->dir, O_PATH | O_DIRECTORY); + else + dfd = dup(rootfd); + ASSERT_GE(dfd, 0) TH_LOG("failed to open dir '%s': %m", test->dir ?: "."); + ASSERT_EQ(dup2(dfd, hardcoded_fd), hardcoded_fd); + + fd = sys_openat2(dfd, test->path, &how); + + if (test->pass) { + EXPECT_GE(fd, 0) { + TH_LOG("%s: expected success, got %d (%s)", + test->name, fd, strerror(-fd)); + } + if (fd >= 0) { + EXPECT_TRUE(fdequal(_metadata, fd, rootfd, test->out.path)) { + fdpath = fdreadlink(_metadata, fd); + TH_LOG("%s: wrong path '%s', expected '%s'", + test->name, fdpath, + test->out.path ?: "."); + free(fdpath); + } + } + } else { + EXPECT_EQ(test->out.err, fd) { + if (fd >= 0) { + fdpath = fdreadlink(_metadata, fd); + TH_LOG("%s: expected %d (%s), got %d['%s']", + test->name, test->out.err, + strerror(-test->out.err), fd, fdpath); + free(fdpath); + } else { + TH_LOG("%s: expected %d (%s), got %d (%s)", + test->name, test->out.err, + strerror(-test->out.err), + fd, strerror(-fd)); + } + } + } + + if (fd >= 0) + close(fd); + close(dfd); +} /* * Construct a test directory with the following structure: @@ -39,101 +112,110 @@ * |-- absself -> / * |-- self -> ../../root/ * |-- garbageself -> /../../root/ - * |-- passwd -> ../cheeky/../cheeky/../etc/../etc/passwd - * |-- abspasswd -> /../cheeky/../cheeky/../etc/../etc/passwd + * |-- passwd -> ../cheeky/../etc/../etc/passwd + * |-- abspasswd -> /../cheeky/../etc/../etc/passwd * |-- dotdotlink -> ../../../../../../../../../../../../../../etc/passwd * `-- garbagelink -> /../../../../../../../../../../../../../../etc/passwd */ -int setup_testdir(void) +FIXTURE(openat2_resolve) { + int rootfd; + int hardcoded_fd; + char *hardcoded_fdpath; + char *procselfexe; +}; + +FIXTURE_SETUP(openat2_resolve) { - int dfd, tmpfd; char dirname[] = "/tmp/ksft-openat2-testdir.XXXXXX"; + int dfd, tmpfd; + + self->rootfd = -1; + self->hardcoded_fd = -1; + self->hardcoded_fdpath = NULL; + self->procselfexe = NULL; + + /* NOTE: We should be checking for CAP_SYS_ADMIN here... */ + if (geteuid() != 0) + SKIP(return, "all tests require euid == 0"); + if (!openat2_supported) + SKIP(return, "openat2(2) not supported"); /* Unshare and make /tmp a new directory. */ - E_unshare(CLONE_NEWNS); - E_mount("", "/tmp", "", MS_PRIVATE, ""); + ASSERT_EQ(unshare(CLONE_NEWNS), 0); + ASSERT_EQ(mount("", "/tmp", "", MS_PRIVATE, ""), 0); /* Make the top-level directory. */ - if (!mkdtemp(dirname)) - ksft_exit_fail_msg("setup_testdir: failed to create tmpdir\n"); + ASSERT_NE(mkdtemp(dirname), NULL); dfd = open(dirname, O_PATH | O_DIRECTORY); - if (dfd < 0) - ksft_exit_fail_msg("setup_testdir: failed to open tmpdir\n"); + ASSERT_GE(dfd, 0); /* A sub-directory which is actually used for tests. */ - E_mkdirat(dfd, "root", 0755); + ASSERT_EQ(mkdirat(dfd, "root", 0755), 0); tmpfd = openat(dfd, "root", O_PATH | O_DIRECTORY); - if (tmpfd < 0) - ksft_exit_fail_msg("setup_testdir: failed to open tmpdir\n"); + ASSERT_GE(tmpfd, 0); close(dfd); dfd = tmpfd; - E_symlinkat("/proc/self/exe", dfd, "procexe"); - E_symlinkat("/proc/self/root", dfd, "procroot"); - E_mkdirat(dfd, "root", 0755); + ASSERT_EQ(symlinkat("/proc/self/exe", dfd, "procexe"), 0); + ASSERT_EQ(symlinkat("/proc/self/root", dfd, "procroot"), 0); + ASSERT_EQ(mkdirat(dfd, "root", 0755), 0); /* There is no mountat(2), so use chdir. */ - E_mkdirat(dfd, "mnt", 0755); - E_fchdir(dfd); - E_mount("tmpfs", "./mnt", "tmpfs", MS_NOSUID | MS_NODEV, ""); - E_symlinkat("../mnt/", dfd, "mnt/self"); - E_symlinkat("/mnt/", dfd, "mnt/absself"); + ASSERT_EQ(mkdirat(dfd, "mnt", 0755), 0); + ASSERT_EQ(fchdir(dfd), 0); + ASSERT_EQ(mount("tmpfs", "./mnt", "tmpfs", MS_NOSUID | MS_NODEV, ""), 0); + ASSERT_EQ(symlinkat("../mnt/", dfd, "mnt/self"), 0); + ASSERT_EQ(symlinkat("/mnt/", dfd, "mnt/absself"), 0); - E_mkdirat(dfd, "etc", 0755); - E_touchat(dfd, "etc/passwd"); + ASSERT_EQ(mkdirat(dfd, "etc", 0755), 0); + ASSERT_GE(touchat(dfd, "etc/passwd"), 0); - E_symlinkat("/newfile3", dfd, "creatlink"); - E_symlinkat("etc/", dfd, "reletc"); - E_symlinkat("etc/passwd", dfd, "relsym"); - E_symlinkat("/etc/", dfd, "absetc"); - E_symlinkat("/etc/passwd", dfd, "abssym"); - E_symlinkat("/cheeky", dfd, "abscheeky"); + ASSERT_EQ(symlinkat("/newfile3", dfd, "creatlink"), 0); + ASSERT_EQ(symlinkat("etc/", dfd, "reletc"), 0); + ASSERT_EQ(symlinkat("etc/passwd", dfd, "relsym"), 0); + ASSERT_EQ(symlinkat("/etc/", dfd, "absetc"), 0); + ASSERT_EQ(symlinkat("/etc/passwd", dfd, "abssym"), 0); + ASSERT_EQ(symlinkat("/cheeky", dfd, "abscheeky"), 0); - E_mkdirat(dfd, "cheeky", 0755); + ASSERT_EQ(mkdirat(dfd, "cheeky", 0755), 0); - E_symlinkat("/", dfd, "cheeky/absself"); - E_symlinkat("../../root/", dfd, "cheeky/self"); - E_symlinkat("/../../root/", dfd, "cheeky/garbageself"); + ASSERT_EQ(symlinkat("/", dfd, "cheeky/absself"), 0); + ASSERT_EQ(symlinkat("../../root/", dfd, "cheeky/self"), 0); + ASSERT_EQ(symlinkat("/../../root/", dfd, "cheeky/garbageself"), 0); - E_symlinkat("../cheeky/../etc/../etc/passwd", dfd, "cheeky/passwd"); - E_symlinkat("/../cheeky/../etc/../etc/passwd", dfd, "cheeky/abspasswd"); + ASSERT_EQ(symlinkat("../cheeky/../etc/../etc/passwd", + dfd, "cheeky/passwd"), 0); + ASSERT_EQ(symlinkat("/../cheeky/../etc/../etc/passwd", + dfd, "cheeky/abspasswd"), 0); - E_symlinkat("../../../../../../../../../../../../../../etc/passwd", - dfd, "cheeky/dotdotlink"); - E_symlinkat("/../../../../../../../../../../../../../../etc/passwd", - dfd, "cheeky/garbagelink"); + ASSERT_EQ(symlinkat("../../../../../../../../../../../../../../etc/passwd", + dfd, "cheeky/dotdotlink"), 0); + ASSERT_EQ(symlinkat("/../../../../../../../../../../../../../../etc/passwd", + dfd, "cheeky/garbagelink"), 0); - return dfd; -} - -struct basic_test { - const char *name; - const char *dir; - const char *path; - struct open_how how; - bool pass; - union { - int err; - const char *path; - } out; -}; + self->rootfd = dfd; -#define NUM_OPENAT2_OPATH_TESTS 88 + self->hardcoded_fd = open("/dev/null", O_RDONLY); + ASSERT_GE(self->hardcoded_fd, 0); + ASSERT_GE(asprintf(&self->hardcoded_fdpath, "self/fd/%d", + self->hardcoded_fd), 0); + ASSERT_GE(asprintf(&self->procselfexe, "/proc/%d/exe", getpid()), 0); +} -void test_openat2_opath_tests(void) +FIXTURE_TEARDOWN(openat2_resolve) { - int rootfd, hardcoded_fd; - char *procselfexe, *hardcoded_fdpath; - - E_asprintf(&procselfexe, "/proc/%d/exe", getpid()); - rootfd = setup_testdir(); - - hardcoded_fd = open("/dev/null", O_RDONLY); - E_assert(hardcoded_fd >= 0, "open fd to hardcode"); - E_asprintf(&hardcoded_fdpath, "self/fd/%d", hardcoded_fd); + free(self->procselfexe); + free(self->hardcoded_fdpath); + if (self->hardcoded_fd >= 0) + close(self->hardcoded_fd); + if (self->rootfd >= 0) + close(self->rootfd); +} - struct basic_test tests[] = { - /** RESOLVE_BENEATH **/ +/* Attempts to cross the dirfd should be blocked with -EXDEV. */ +TEST_F(openat2_resolve, resolve_beneath) +{ + struct resolve_test tests[] = { /* Attempts to cross dirfd should be blocked. */ { .name = "[beneath] jump to /", .path = "/", .how.resolve = RESOLVE_BENEATH, @@ -206,9 +288,17 @@ void test_openat2_opath_tests(void) { .name = "[beneath] tricky absolute + garbage link outside $root", .path = "abscheeky/garbagelink", .how.resolve = RESOLVE_BENEATH, .out.err = -EXDEV, .pass = false }, + }; - /** RESOLVE_IN_ROOT **/ - /* All attempts to cross the dirfd will be scoped-to-root. */ + for (int i = 0; i < ARRAY_SIZE(tests); i++) + verify_resolve_test(_metadata, self->rootfd, + self->hardcoded_fd, &tests[i]); +} + +/* All attempts to cross the dirfd will be scoped-to-root. */ +TEST_F(openat2_resolve, resolve_in_root) +{ + struct resolve_test tests[] = { { .name = "[in_root] jump to /", .path = "/", .how.resolve = RESOLVE_IN_ROOT, .out.path = NULL, .pass = true }, @@ -297,8 +387,17 @@ void test_openat2_opath_tests(void) .how.mode = 0700, .how.resolve = RESOLVE_IN_ROOT, .out.path = "newfile3", .pass = true }, + }; + + for (int i = 0; i < ARRAY_SIZE(tests); i++) + verify_resolve_test(_metadata, self->rootfd, + self->hardcoded_fd, &tests[i]); +} - /** RESOLVE_NO_XDEV **/ +/* Crossing mount boundaries should be blocked. */ +TEST_F(openat2_resolve, resolve_no_xdev) +{ + struct resolve_test tests[] = { /* Crossing *down* into a mountpoint is disallowed. */ { .name = "[no_xdev] cross into $mnt", .path = "mnt", .how.resolve = RESOLVE_NO_XDEV, @@ -347,10 +446,19 @@ void test_openat2_opath_tests(void) .out.err = -EXDEV, .pass = false }, /* Except magic-link jumps inside the same vfsmount. */ { .name = "[no_xdev] jump through magic-link to same procfs", - .dir = "/proc", .path = hardcoded_fdpath, .how.resolve = RESOLVE_NO_XDEV, - .out.path = "/proc", .pass = true, }, + .dir = "/proc", .path = self->hardcoded_fdpath, .how.resolve = RESOLVE_NO_XDEV, + .out.path = "/proc", .pass = true, }, + }; + + for (int i = 0; i < ARRAY_SIZE(tests); i++) + verify_resolve_test(_metadata, self->rootfd, + self->hardcoded_fd, &tests[i]); +} - /** RESOLVE_NO_MAGICLINKS **/ +/* Procfs-style magic-link resolution should be blocked. */ +TEST_F(openat2_resolve, resolve_no_magiclinks) +{ + struct resolve_test tests[] = { /* Regular symlinks should work. */ { .name = "[no_magiclinks] ordinary relative symlink", .path = "relsym", .how.resolve = RESOLVE_NO_MAGICLINKS, @@ -365,7 +473,7 @@ void test_openat2_opath_tests(void) { .name = "[no_magiclinks] normal path to magic-link with O_NOFOLLOW", .path = "/proc/self/exe", .how.flags = O_NOFOLLOW, .how.resolve = RESOLVE_NO_MAGICLINKS, - .out.path = procselfexe, .pass = true }, + .out.path = self->procselfexe, .pass = true }, { .name = "[no_magiclinks] symlink to magic-link path component", .path = "procroot/etc", .how.resolve = RESOLVE_NO_MAGICLINKS, .out.err = -ELOOP, .pass = false }, @@ -376,8 +484,17 @@ void test_openat2_opath_tests(void) .path = "/proc/self/root/etc", .how.flags = O_NOFOLLOW, .how.resolve = RESOLVE_NO_MAGICLINKS, .out.err = -ELOOP, .pass = false }, + }; - /** RESOLVE_NO_SYMLINKS **/ + for (int i = 0; i < ARRAY_SIZE(tests); i++) + verify_resolve_test(_metadata, self->rootfd, + self->hardcoded_fd, &tests[i]); +} + +/* All symlink resolution should be blocked. */ +TEST_F(openat2_resolve, resolve_no_symlinks) +{ + struct resolve_test tests[] = { /* Normal paths should work. */ { .name = "[no_symlinks] ordinary path to '.'", .path = ".", .how.resolve = RESOLVE_NO_SYMLINKS, @@ -436,88 +553,9 @@ void test_openat2_opath_tests(void) .out.err = -ELOOP, .pass = false }, }; - BUILD_BUG_ON(ARRAY_LEN(tests) != NUM_OPENAT2_OPATH_TESTS); - - for (int i = 0; i < ARRAY_LEN(tests); i++) { - int dfd, fd; - char *fdpath = NULL; - bool failed; - void (*resultfn)(const char *msg, ...) = ksft_test_result_pass; - struct basic_test *test = &tests[i]; - - if (!openat2_supported) { - ksft_print_msg("openat2(2) unsupported\n"); - resultfn = ksft_test_result_skip; - goto skip; - } - - /* Auto-set O_PATH. */ - if (!(test->how.flags & O_CREAT)) - test->how.flags |= O_PATH; - - if (test->dir) - dfd = openat(rootfd, test->dir, O_PATH | O_DIRECTORY); - else - dfd = dup(rootfd); - E_assert(dfd, "failed to openat root '%s': %m", test->dir); - - E_dup2(dfd, hardcoded_fd); - - fd = sys_openat2(dfd, test->path, &test->how); - if (test->pass) - failed = (fd < 0 || !fdequal(fd, rootfd, test->out.path)); - else - failed = (fd != test->out.err); - if (fd >= 0) { - fdpath = fdreadlink(fd); - close(fd); - } - close(dfd); - - if (failed) { - resultfn = ksft_test_result_fail; - - ksft_print_msg("openat2 unexpectedly returned "); - if (fdpath) - ksft_print_msg("%d['%s']\n", fd, fdpath); - else - ksft_print_msg("%d (%s)\n", fd, strerror(-fd)); - } - -skip: - if (test->pass) - resultfn("%s gives path '%s'\n", test->name, - test->out.path ?: "."); - else - resultfn("%s fails with %d (%s)\n", test->name, - test->out.err, strerror(-test->out.err)); - - fflush(stdout); - free(fdpath); - } - - free(procselfexe); - close(rootfd); - - free(hardcoded_fdpath); - close(hardcoded_fd); + for (int i = 0; i < ARRAY_SIZE(tests); i++) + verify_resolve_test(_metadata, self->rootfd, + self->hardcoded_fd, &tests[i]); } -#define NUM_TESTS NUM_OPENAT2_OPATH_TESTS - -int main(int argc, char **argv) -{ - ksft_print_header(); - ksft_set_plan(NUM_TESTS); - - /* NOTE: We should be checking for CAP_SYS_ADMIN here... */ - if (geteuid() != 0) - ksft_exit_skip("all tests require euid == 0\n"); - - test_openat2_opath_tests(); - - if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0) - ksft_exit_fail(); - else - ksft_exit_pass(); -} +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/namespaces/listns_efault_test.c b/tools/testing/selftests/namespaces/listns_efault_test.c index b570746e917c1..26b452c98c665 100644 --- a/tools/testing/selftests/namespaces/listns_efault_test.c +++ b/tools/testing/selftests/namespaces/listns_efault_test.c @@ -38,7 +38,7 @@ TEST(listns_partial_fault_with_ns_cleanup) __u64 *ns_ids; ssize_t ret; long page_size; - pid_t pid, iter_pid; + pid_t pid, iter_pid, ns_pids[5]; int pidfds[5]; int sv[5][2]; int iter_pidfd; @@ -114,6 +114,7 @@ TEST(listns_partial_fault_with_ns_cleanup) pid = create_child(&pidfds[i], CLONE_NEWNS); ASSERT_NE(pid, -1); + ns_pids[i] = pid; if (pid == 0) { close(sv[i][0]); /* Close parent end */ @@ -164,7 +165,7 @@ TEST(listns_partial_fault_with_ns_cleanup) /* Wait for all mount namespace children to exit and cleanup */ for (i = 0; i < 5; i++) { - waitpid(-1, NULL, 0); + waitpid(ns_pids[i], NULL, 0); close(sv[i][0]); close(pidfds[i]); } @@ -175,6 +176,12 @@ TEST(listns_partial_fault_with_ns_cleanup) ASSERT_EQ(ret, iter_pid); close(iter_pidfd); + /* If listns() is not supported the iterator exits cleanly via ENOSYS */ + if (WIFEXITED(status) && WEXITSTATUS(status) == PIDFD_SKIP) { + munmap(map, page_size); + SKIP(return, "listns() not supported"); + } + /* Should have been killed */ ASSERT_TRUE(WIFSIGNALED(status)); ASSERT_EQ(WTERMSIG(status), SIGKILL); @@ -250,7 +257,7 @@ TEST(listns_late_fault_with_ns_cleanup) __u64 *ns_ids; ssize_t ret; long page_size; - pid_t pid, iter_pid; + pid_t pid, iter_pid, ns_pids[10]; int pidfds[10]; int sv[10][2]; int iter_pidfd; @@ -320,6 +327,7 @@ TEST(listns_late_fault_with_ns_cleanup) pid = create_child(&pidfds[i], CLONE_NEWNS); ASSERT_NE(pid, -1); + ns_pids[i] = pid; if (pid == 0) { close(sv[i][0]); /* Close parent end */ @@ -373,7 +381,7 @@ TEST(listns_late_fault_with_ns_cleanup) /* Wait for all children and cleanup */ for (i = 0; i < 10; i++) { - waitpid(-1, NULL, 0); + waitpid(ns_pids[i], NULL, 0); close(sv[i][0]); close(pidfds[i]); } @@ -384,6 +392,12 @@ TEST(listns_late_fault_with_ns_cleanup) ASSERT_EQ(ret, iter_pid); close(iter_pidfd); + /* If listns() is not supported the iterator exits cleanly via ENOSYS */ + if (WIFEXITED(status) && WEXITSTATUS(status) == PIDFD_SKIP) { + munmap(map, page_size); + SKIP(return, "listns() not supported"); + } + /* Should have been killed */ ASSERT_TRUE(WIFSIGNALED(status)); ASSERT_EQ(WTERMSIG(status), SIGKILL); @@ -402,7 +416,7 @@ TEST(listns_mnt_ns_cleanup_on_fault) __u64 *ns_ids; ssize_t ret; long page_size; - pid_t pid, iter_pid; + pid_t pid, iter_pid, ns_pids[8]; int pidfds[8]; int sv[8][2]; int iter_pidfd; @@ -462,6 +476,7 @@ TEST(listns_mnt_ns_cleanup_on_fault) pid = create_child(&pidfds[i], CLONE_NEWNS); ASSERT_NE(pid, -1); + ns_pids[i] = pid; if (pid == 0) { close(sv[i][0]); /* Close parent end */ @@ -508,7 +523,7 @@ TEST(listns_mnt_ns_cleanup_on_fault) /* Wait for children and cleanup */ for (i = 0; i < 8; i++) { - waitpid(-1, NULL, 0); + waitpid(ns_pids[i], NULL, 0); close(sv[i][0]); close(pidfds[i]); } @@ -519,6 +534,12 @@ TEST(listns_mnt_ns_cleanup_on_fault) ASSERT_EQ(ret, iter_pid); close(iter_pidfd); + /* If listns() is not supported the iterator exits cleanly via ENOSYS */ + if (WIFEXITED(status) && WEXITSTATUS(status) == PIDFD_SKIP) { + munmap(map, page_size); + SKIP(return, "listns() not supported"); + } + /* Should have been killed */ ASSERT_TRUE(WIFSIGNALED(status)); ASSERT_EQ(WTERMSIG(status), SIGKILL); diff --git a/tools/testing/selftests/namespaces/nsid_test.c b/tools/testing/selftests/namespaces/nsid_test.c index b4a14c6693a54..46dc838cba82d 100644 --- a/tools/testing/selftests/namespaces/nsid_test.c +++ b/tools/testing/selftests/namespaces/nsid_test.c @@ -25,14 +25,24 @@ /* Fixture for tests that create child processes */ FIXTURE(nsid) { pid_t child_pid; + pid_t grandchild_pid; }; FIXTURE_SETUP(nsid) { self->child_pid = 0; + self->grandchild_pid = 0; } FIXTURE_TEARDOWN(nsid) { - /* Clean up any child process that may still be running */ + /* + * Kill grandchild first: timens_separate and pidns_separate fork a + * grandchild that calls pause(). It is reparented to init on child + * exit and keeps the test runner's tap pipe open, hanging the runner. + */ + if (self->grandchild_pid > 0) { + kill(self->grandchild_pid, SIGKILL); + waitpid(self->grandchild_pid, NULL, 0); + } if (self->child_pid > 0) { kill(self->child_pid, SIGKILL); waitpid(self->child_pid, NULL, 0); @@ -676,6 +686,7 @@ TEST_F(nsid, timens_separate) pid_t grandchild_pid; ASSERT_EQ(read(pipefd[0], &grandchild_pid, sizeof(grandchild_pid)), sizeof(grandchild_pid)); + self->grandchild_pid = grandchild_pid; close(pipefd[0]); /* Open grandchild's time namespace */ @@ -797,6 +808,7 @@ TEST_F(nsid, pidns_separate) pid_t grandchild_pid; ASSERT_EQ(read(pipefd[0], &grandchild_pid, sizeof(grandchild_pid)), sizeof(grandchild_pid)); + self->grandchild_pid = grandchild_pid; close(pipefd[0]); /* Open grandchild's PID namespace */ diff --git a/tools/testing/selftests/openat2/helpers.c b/tools/testing/selftests/openat2/helpers.c deleted file mode 100644 index 5074681ffdc99..0000000000000 --- a/tools/testing/selftests/openat2/helpers.c +++ /dev/null @@ -1,109 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Author: Aleksa Sarai <cyphar@cyphar.com> - * Copyright (C) 2018-2019 SUSE LLC. - */ - -#define _GNU_SOURCE -#include <errno.h> -#include <fcntl.h> -#include <stdbool.h> -#include <string.h> -#include <syscall.h> -#include <limits.h> - -#include "helpers.h" - -bool needs_openat2(const struct open_how *how) -{ - return how->resolve != 0; -} - -int raw_openat2(int dfd, const char *path, void *how, size_t size) -{ - int ret = syscall(__NR_openat2, dfd, path, how, size); - return ret >= 0 ? ret : -errno; -} - -int sys_openat2(int dfd, const char *path, struct open_how *how) -{ - return raw_openat2(dfd, path, how, sizeof(*how)); -} - -int sys_openat(int dfd, const char *path, struct open_how *how) -{ - int ret = openat(dfd, path, how->flags, how->mode); - return ret >= 0 ? ret : -errno; -} - -int sys_renameat2(int olddirfd, const char *oldpath, - int newdirfd, const char *newpath, unsigned int flags) -{ - int ret = syscall(__NR_renameat2, olddirfd, oldpath, - newdirfd, newpath, flags); - return ret >= 0 ? ret : -errno; -} - -int touchat(int dfd, const char *path) -{ - int fd = openat(dfd, path, O_CREAT, 0700); - if (fd >= 0) - close(fd); - return fd; -} - -char *fdreadlink(int fd) -{ - char *target, *tmp; - - E_asprintf(&tmp, "/proc/self/fd/%d", fd); - - target = malloc(PATH_MAX); - if (!target) - ksft_exit_fail_msg("fdreadlink: malloc failed\n"); - memset(target, 0, PATH_MAX); - - E_readlink(tmp, target, PATH_MAX); - free(tmp); - return target; -} - -bool fdequal(int fd, int dfd, const char *path) -{ - char *fdpath, *dfdpath, *other; - bool cmp; - - fdpath = fdreadlink(fd); - dfdpath = fdreadlink(dfd); - - if (!path) - E_asprintf(&other, "%s", dfdpath); - else if (*path == '/') - E_asprintf(&other, "%s", path); - else - E_asprintf(&other, "%s/%s", dfdpath, path); - - cmp = !strcmp(fdpath, other); - - free(fdpath); - free(dfdpath); - free(other); - return cmp; -} - -bool openat2_supported = false; - -void __attribute__((constructor)) init(void) -{ - struct open_how how = {}; - int fd; - - BUILD_BUG_ON(sizeof(struct open_how) != OPEN_HOW_SIZE_VER0); - - /* Check openat2(2) support. */ - fd = sys_openat2(AT_FDCWD, ".", &how); - openat2_supported = (fd >= 0); - - if (fd >= 0) - close(fd); -} diff --git a/tools/testing/selftests/openat2/helpers.h b/tools/testing/selftests/openat2/helpers.h deleted file mode 100644 index 510e60602511a..0000000000000 --- a/tools/testing/selftests/openat2/helpers.h +++ /dev/null @@ -1,108 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Author: Aleksa Sarai <cyphar@cyphar.com> - * Copyright (C) 2018-2019 SUSE LLC. - */ - -#ifndef __RESOLVEAT_H__ -#define __RESOLVEAT_H__ - -#define _GNU_SOURCE -#include <stdint.h> -#include <stdbool.h> -#include <errno.h> -#include <linux/types.h> -#include "kselftest.h" - -#define ARRAY_LEN(X) (sizeof (X) / sizeof (*(X))) -#define BUILD_BUG_ON(e) ((void)(sizeof(struct { int:(-!!(e)); }))) - -#ifndef SYS_openat2 -#ifndef __NR_openat2 -#define __NR_openat2 437 -#endif /* __NR_openat2 */ -#define SYS_openat2 __NR_openat2 -#endif /* SYS_openat2 */ - -/* - * Arguments for how openat2(2) should open the target path. If @resolve is - * zero, then openat2(2) operates very similarly to openat(2). - * - * However, unlike openat(2), unknown bits in @flags result in -EINVAL rather - * than being silently ignored. @mode must be zero unless one of {O_CREAT, - * O_TMPFILE} are set. - * - * @flags: O_* flags. - * @mode: O_CREAT/O_TMPFILE file mode. - * @resolve: RESOLVE_* flags. - */ -struct open_how { - __u64 flags; - __u64 mode; - __u64 resolve; -}; - -#define OPEN_HOW_SIZE_VER0 24 /* sizeof first published struct */ -#define OPEN_HOW_SIZE_LATEST OPEN_HOW_SIZE_VER0 - -bool needs_openat2(const struct open_how *how); - -#ifndef RESOLVE_IN_ROOT -/* how->resolve flags for openat2(2). */ -#define RESOLVE_NO_XDEV 0x01 /* Block mount-point crossings - (includes bind-mounts). */ -#define RESOLVE_NO_MAGICLINKS 0x02 /* Block traversal through procfs-style - "magic-links". */ -#define RESOLVE_NO_SYMLINKS 0x04 /* Block traversal through all symlinks - (implies OEXT_NO_MAGICLINKS) */ -#define RESOLVE_BENEATH 0x08 /* Block "lexical" trickery like - "..", symlinks, and absolute - paths which escape the dirfd. */ -#define RESOLVE_IN_ROOT 0x10 /* Make all jumps to "/" and ".." - be scoped inside the dirfd - (similar to chroot(2)). */ -#endif /* RESOLVE_IN_ROOT */ - -#define E_func(func, ...) \ - do { \ - errno = 0; \ - if (func(__VA_ARGS__) < 0) \ - ksft_exit_fail_msg("%s:%d %s failed - errno:%d\n", \ - __FILE__, __LINE__, #func, errno); \ - } while (0) - -#define E_asprintf(...) E_func(asprintf, __VA_ARGS__) -#define E_chmod(...) E_func(chmod, __VA_ARGS__) -#define E_dup2(...) E_func(dup2, __VA_ARGS__) -#define E_fchdir(...) E_func(fchdir, __VA_ARGS__) -#define E_fstatat(...) E_func(fstatat, __VA_ARGS__) -#define E_kill(...) E_func(kill, __VA_ARGS__) -#define E_mkdirat(...) E_func(mkdirat, __VA_ARGS__) -#define E_mount(...) E_func(mount, __VA_ARGS__) -#define E_prctl(...) E_func(prctl, __VA_ARGS__) -#define E_readlink(...) E_func(readlink, __VA_ARGS__) -#define E_setresuid(...) E_func(setresuid, __VA_ARGS__) -#define E_symlinkat(...) E_func(symlinkat, __VA_ARGS__) -#define E_touchat(...) E_func(touchat, __VA_ARGS__) -#define E_unshare(...) E_func(unshare, __VA_ARGS__) - -#define E_assert(expr, msg, ...) \ - do { \ - if (!(expr)) \ - ksft_exit_fail_msg("ASSERT(%s:%d) failed (%s): " msg "\n", \ - __FILE__, __LINE__, #expr, ##__VA_ARGS__); \ - } while (0) - -int raw_openat2(int dfd, const char *path, void *how, size_t size); -int sys_openat2(int dfd, const char *path, struct open_how *how); -int sys_openat(int dfd, const char *path, struct open_how *how); -int sys_renameat2(int olddirfd, const char *oldpath, - int newdirfd, const char *newpath, unsigned int flags); - -int touchat(int dfd, const char *path); -char *fdreadlink(int fd); -bool fdequal(int fd, int dfd, const char *path); - -extern bool openat2_supported; - -#endif /* __RESOLVEAT_H__ */ diff --git a/tools/testing/selftests/openat2/rename_attack_test.c b/tools/testing/selftests/openat2/rename_attack_test.c deleted file mode 100644 index aa5699e457290..0000000000000 --- a/tools/testing/selftests/openat2/rename_attack_test.c +++ /dev/null @@ -1,160 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Author: Aleksa Sarai <cyphar@cyphar.com> - * Copyright (C) 2018-2019 SUSE LLC. - */ - -#define _GNU_SOURCE -#include <errno.h> -#include <fcntl.h> -#include <sched.h> -#include <sys/stat.h> -#include <sys/types.h> -#include <sys/mount.h> -#include <sys/mman.h> -#include <sys/prctl.h> -#include <signal.h> -#include <stdio.h> -#include <stdlib.h> -#include <stdbool.h> -#include <string.h> -#include <syscall.h> -#include <limits.h> -#include <unistd.h> - -#include "kselftest.h" -#include "helpers.h" - -/* Construct a test directory with the following structure: - * - * root/ - * |-- a/ - * | `-- c/ - * `-- b/ - */ -int setup_testdir(void) -{ - int dfd; - char dirname[] = "/tmp/ksft-openat2-rename-attack.XXXXXX"; - - /* Make the top-level directory. */ - if (!mkdtemp(dirname)) - ksft_exit_fail_msg("setup_testdir: failed to create tmpdir\n"); - dfd = open(dirname, O_PATH | O_DIRECTORY); - if (dfd < 0) - ksft_exit_fail_msg("setup_testdir: failed to open tmpdir\n"); - - E_mkdirat(dfd, "a", 0755); - E_mkdirat(dfd, "b", 0755); - E_mkdirat(dfd, "a/c", 0755); - - return dfd; -} - -/* Swap @dirfd/@a and @dirfd/@b constantly. Parent must kill this process. */ -pid_t spawn_attack(int dirfd, char *a, char *b) -{ - pid_t child = fork(); - if (child != 0) - return child; - - /* If the parent (the test process) dies, kill ourselves too. */ - E_prctl(PR_SET_PDEATHSIG, SIGKILL); - - /* Swap @a and @b. */ - for (;;) - renameat2(dirfd, a, dirfd, b, RENAME_EXCHANGE); - exit(1); -} - -#define NUM_RENAME_TESTS 2 -#define ROUNDS 400000 - -const char *flagname(int resolve) -{ - switch (resolve) { - case RESOLVE_IN_ROOT: - return "RESOLVE_IN_ROOT"; - case RESOLVE_BENEATH: - return "RESOLVE_BENEATH"; - } - return "(unknown)"; -} - -void test_rename_attack(int resolve) -{ - int dfd, afd; - pid_t child; - void (*resultfn)(const char *msg, ...) = ksft_test_result_pass; - int escapes = 0, other_errs = 0, exdevs = 0, eagains = 0, successes = 0; - - struct open_how how = { - .flags = O_PATH, - .resolve = resolve, - }; - - if (!openat2_supported) { - how.resolve = 0; - ksft_print_msg("openat2(2) unsupported -- using openat(2) instead\n"); - } - - dfd = setup_testdir(); - afd = openat(dfd, "a", O_PATH); - if (afd < 0) - ksft_exit_fail_msg("test_rename_attack: failed to open 'a'\n"); - - child = spawn_attack(dfd, "a/c", "b"); - - for (int i = 0; i < ROUNDS; i++) { - int fd; - char *victim_path = "c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../../c/../.."; - - if (openat2_supported) - fd = sys_openat2(afd, victim_path, &how); - else - fd = sys_openat(afd, victim_path, &how); - - if (fd < 0) { - if (fd == -EAGAIN) - eagains++; - else if (fd == -EXDEV) - exdevs++; - else if (fd == -ENOENT) - escapes++; /* escaped outside and got ENOENT... */ - else - other_errs++; /* unexpected error */ - } else { - if (fdequal(fd, afd, NULL)) - successes++; - else - escapes++; /* we got an unexpected fd */ - } - close(fd); - } - - if (escapes > 0) - resultfn = ksft_test_result_fail; - ksft_print_msg("non-escapes: EAGAIN=%d EXDEV=%d E<other>=%d success=%d\n", - eagains, exdevs, other_errs, successes); - resultfn("rename attack with %s (%d runs, got %d escapes)\n", - flagname(resolve), ROUNDS, escapes); - - /* Should be killed anyway, but might as well make sure. */ - E_kill(child, SIGKILL); -} - -#define NUM_TESTS NUM_RENAME_TESTS - -int main(int argc, char **argv) -{ - ksft_print_header(); - ksft_set_plan(NUM_TESTS); - - test_rename_attack(RESOLVE_BENEATH); - test_rename_attack(RESOLVE_IN_ROOT); - - if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0) - ksft_exit_fail(); - else - ksft_exit_pass(); -} diff --git a/tools/testing/selftests/pid_namespace/pid_max.c b/tools/testing/selftests/pid_namespace/pid_max.c index c9519e7385b66..5d686a09aa153 100644 --- a/tools/testing/selftests/pid_namespace/pid_max.c +++ b/tools/testing/selftests/pid_namespace/pid_max.c @@ -12,10 +12,74 @@ #include <syscall.h> #include <sys/mount.h> #include <sys/wait.h> +#include <unistd.h> #include "kselftest_harness.h" #include "../pidfd/pidfd.h" +/* + * The kernel computes the minimum allowed pid_max as: + * max(RESERVED_PIDS + 1, PIDS_PER_CPU_MIN * num_possible_cpus()) + * Mirror that here so the test values are always valid. + * + * Note: glibc's get_nprocs_conf() returns the number of *configured* + * (present) CPUs, not *possible* CPUs. The kernel uses + * num_possible_cpus() which corresponds to /sys/devices/system/cpu/possible. + * These can differ significantly (e.g. 16 configured vs 128 possible). + */ +#define RESERVED_PIDS 300 +#define PIDS_PER_CPU_MIN 8 + +/* Count CPUs from a range list like "0-31" or "0-15,32-47". */ +static int num_possible_cpus(void) +{ + FILE *f; + int count = 0; + int lo, hi; + + f = fopen("/sys/devices/system/cpu/possible", "r"); + if (!f) + return 0; + + while (fscanf(f, "%d", &lo) == 1) { + if (fscanf(f, "-%d", &hi) == 1) + count += hi - lo + 1; + else + count++; + /* skip comma separator */ + fscanf(f, ","); + } + + fclose(f); + return count; +} + +static int pid_min(void) +{ + int cpu_min = PIDS_PER_CPU_MIN * num_possible_cpus(); + + return cpu_min > (RESERVED_PIDS + 1) ? cpu_min : (RESERVED_PIDS + 1); +} + +/* + * Outer and inner pid_max limits used by the tests. The outer limit is + * the more restrictive ancestor; the inner limit is set higher in a + * nested namespace but must still be capped by the outer limit. + * Both are derived from the kernel's minimum so they are always writable. + * + * Global so that clone callbacks can access them without parameter plumbing. + */ +static int outer_limit; +static int inner_limit; + +static int write_int_to_fd(int fd, int val) +{ + char buf[12]; + int len = snprintf(buf, sizeof(buf), "%d", val); + + return write(fd, buf, len); +} + #define __STACK_SIZE (8 * 1024 * 1024) static pid_t do_clone(int (*fn)(void *), void *arg, int flags) { @@ -60,18 +124,18 @@ static int pid_max_cb(void *data) return -1; } - ret = write(fd, "500", sizeof("500") - 1); + ret = write_int_to_fd(fd, inner_limit); if (ret < 0) { fprintf(stderr, "%m - Failed to write pid_max\n"); return -1; } - for (int i = 0; i < 501; i++) { + for (int i = 0; i < inner_limit + 1; i++) { pid = fork(); if (pid == 0) exit(EXIT_SUCCESS); wait_for_pid(pid); - if (pid > 500) { + if (pid > inner_limit) { fprintf(stderr, "Managed to create pid number beyond limit\n"); return -1; } @@ -106,7 +170,7 @@ static int pid_max_nested_inner(void *data) return fret; } - ret = write(fd, "500", sizeof("500") - 1); + ret = write_int_to_fd(fd, inner_limit); close(fd); if (ret < 0) { fprintf(stderr, "%m - Failed to write pid_max\n"); @@ -133,8 +197,8 @@ static int pid_max_nested_inner(void *data) return fret; } - /* Now make sure that we wrap pids at 400. */ - for (i = 0; i < 510; i++) { + /* Now make sure that we wrap pids at outer_limit. */ + for (i = 0; i < inner_limit + 10; i++) { pid_t pid; pid = fork(); @@ -145,7 +209,7 @@ static int pid_max_nested_inner(void *data) exit(EXIT_SUCCESS); wait_for_pid(pid); - if (pid >= 500) { + if (pid >= inner_limit) { fprintf(stderr, "Managed to create process with pid %d beyond configured limit\n", pid); return fret; } @@ -156,15 +220,19 @@ static int pid_max_nested_inner(void *data) static int pid_max_nested_outer(void *data) { - int fret = -1, nr_procs = 400; - pid_t pids[1000]; - int fd, i, ret; + int fret = -1, nr_procs = 0; + pid_t *pids; + int fd, ret; pid_t pid; + pids = malloc(outer_limit * sizeof(pid_t)); + if (!pids) + return -1; + ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0); if (ret) { fprintf(stderr, "%m - Failed to make rootfs private mount\n"); - return fret; + goto out; } umount2("/proc", MNT_DETACH); @@ -172,27 +240,28 @@ static int pid_max_nested_outer(void *data) ret = mount("proc", "/proc", "proc", 0, NULL); if (ret) { fprintf(stderr, "%m - Failed to mount proc\n"); - return fret; + goto out; } fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY); if (fd < 0) { fprintf(stderr, "%m - Failed to open pid_max\n"); - return fret; + goto out; } - ret = write(fd, "400", sizeof("400") - 1); + ret = write_int_to_fd(fd, outer_limit); close(fd); if (ret < 0) { fprintf(stderr, "%m - Failed to write pid_max\n"); - return fret; + goto out; } /* - * Create 397 processes. This leaves room for do_clone() (398) and - * one more 399. So creating another process needs to fail. + * Create (outer_limit - 4) processes. This leaves room for + * do_clone() and one more. So creating another process needs + * to fail. */ - for (nr_procs = 0; nr_procs < 396; nr_procs++) { + for (nr_procs = 0; nr_procs < outer_limit - 4; nr_procs++) { pid = fork(); if (pid < 0) goto reap; @@ -220,20 +289,26 @@ reap: for (int i = 0; i < nr_procs; i++) wait_for_pid(pids[i]); +out: + free(pids); return fret; } static int pid_max_nested_limit_inner(void *data) { - int fret = -1, nr_procs = 400; + int fret = -1, nr_procs = 0; int fd, ret; pid_t pid; - pid_t pids[1000]; + pid_t *pids; + + pids = malloc(inner_limit * sizeof(pid_t)); + if (!pids) + return -1; ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0); if (ret) { fprintf(stderr, "%m - Failed to make rootfs private mount\n"); - return fret; + goto out; } umount2("/proc", MNT_DETACH); @@ -241,23 +316,23 @@ static int pid_max_nested_limit_inner(void *data) ret = mount("proc", "/proc", "proc", 0, NULL); if (ret) { fprintf(stderr, "%m - Failed to mount proc\n"); - return fret; + goto out; } fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY); if (fd < 0) { fprintf(stderr, "%m - Failed to open pid_max\n"); - return fret; + goto out; } - ret = write(fd, "500", sizeof("500") - 1); + ret = write_int_to_fd(fd, inner_limit); close(fd); if (ret < 0) { fprintf(stderr, "%m - Failed to write pid_max\n"); - return fret; + goto out; } - for (nr_procs = 0; nr_procs < 500; nr_procs++) { + for (nr_procs = 0; nr_procs < inner_limit; nr_procs++) { pid = fork(); if (pid < 0) break; @@ -268,7 +343,7 @@ static int pid_max_nested_limit_inner(void *data) pids[nr_procs] = pid; } - if (nr_procs >= 400) { + if (nr_procs >= outer_limit) { fprintf(stderr, "Managed to create processes beyond the configured outer limit\n"); goto reap; } @@ -279,6 +354,8 @@ reap: for (int i = 0; i < nr_procs; i++) wait_for_pid(pids[i]); +out: + free(pids); return fret; } @@ -307,7 +384,7 @@ static int pid_max_nested_limit_outer(void *data) return -1; } - ret = write(fd, "400", sizeof("400") - 1); + ret = write_int_to_fd(fd, outer_limit); close(fd); if (ret < 0) { fprintf(stderr, "%m - Failed to write pid_max\n"); @@ -328,17 +405,32 @@ static int pid_max_nested_limit_outer(void *data) return 0; } -TEST(pid_max_simple) +FIXTURE(pid_max) { + int dummy; +}; + +FIXTURE_SETUP(pid_max) { - pid_t pid; + int min = pid_min(); + + outer_limit = min + 100; + inner_limit = min + 200; +} +FIXTURE_TEARDOWN(pid_max) +{ +} + +TEST_F(pid_max, simple) +{ + pid_t pid; pid = do_clone(pid_max_cb, NULL, CLONE_NEWPID | CLONE_NEWNS); ASSERT_GT(pid, 0); ASSERT_EQ(0, wait_for_pid(pid)); } -TEST(pid_max_nested_limit) +TEST_F(pid_max, nested_limit) { pid_t pid; @@ -347,7 +439,7 @@ TEST(pid_max_nested_limit) ASSERT_EQ(0, wait_for_pid(pid)); } -TEST(pid_max_nested) +TEST_F(pid_max, nested) { pid_t pid; |
