diff options
| author | Mark Brown <broonie@kernel.org> | 2026-05-29 18:09:32 +0100 |
|---|---|---|
| committer | Mark Brown <broonie@kernel.org> | 2026-05-29 18:09:32 +0100 |
| commit | 5f74287f42d47e9acdc9a987518387125f046527 (patch) | |
| tree | a8c7e4a5ad67952a170f269f99418a4ab50a5318 /include | |
| parent | 96c3d0c2555e1b97c57348e83702f3b56b8df9d3 (diff) | |
| parent | 982071afc4e24a052d84132ffbf4340856924c28 (diff) | |
| download | linux-next-history-5f74287f42d47e9acdc9a987518387125f046527.tar.gz | |
Merge branch 'fs-next' of linux-next
# Conflicts:
# fs/btrfs/defrag.c
Diffstat (limited to 'include')
44 files changed, 1740 insertions, 275 deletions
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 65abd5ab8836c..a8379f4eee615 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -25,6 +25,8 @@ struct linux_binprm { struct page *page[MAX_ARG_PAGES]; #endif struct mm_struct *mm; + /* user_ns published to task->exec_state at execve, narrowed by would_dump(). */ + struct user_namespace *user_ns; unsigned long p; /* current top of mem */ unsigned int /* Should an execfd be passed to userspace? */ diff --git a/include/linux/bio.h b/include/linux/bio.h index dc17780d6c1e3..8300d5565e364 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -703,20 +703,6 @@ static inline bool bioset_initialized(struct bio_set *bs) return bs->bio_slab != NULL; } -/* - * Mark a bio as polled. Note that for async polled IO, the caller must - * expect -EWOULDBLOCK if we cannot allocate a request (or other resources). - * We cannot block waiting for requests on polled IO, as those completions - * must be found by the caller. This is different than IRQ driven IO, where - * it's safe to wait for IO to complete. - */ -static inline void bio_set_polled(struct bio *bio, struct kiocb *kiocb) -{ - bio->bi_opf |= REQ_POLLED; - if (kiocb->ki_flags & IOCB_NOWAIT) - bio->bi_opf |= REQ_NOWAIT; -} - static inline void bio_clear_polled(struct bio *bio) { bio->bi_opf &= ~REQ_POLLED; diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 68861da4cf7c2..7b38ee2e7913b 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -5,6 +5,7 @@ #include <linux/types.h> #include <linux/mm.h> #include <linux/fs.h> +#include <linux/sched/coredump.h> #include <asm/siginfo.h> #ifdef CONFIG_COREDUMP @@ -20,7 +21,10 @@ struct coredump_params { const kernel_siginfo_t *siginfo; struct file *file; unsigned long limit; + /* MMF_DUMP_FILTER_* bits, snapshot of mm->flags at dump start. */ unsigned long mm_flags; + /* Snapshot of dumpable at dump start. */ + enum task_dumpable dumpable; int cpu; loff_t written; loff_t pos; diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h index 728fb5dee5ede..de1c738aa8ad9 100644 --- a/include/linux/eventpoll.h +++ b/include/linux/eventpoll.h @@ -61,8 +61,16 @@ static inline void eventpoll_release(struct file *file) eventpoll_release_file(file); } +struct epoll_key { + struct file *file; + int fd; +} __packed; + +int do_epoll_ctl_file(struct file *f, int op, struct epoll_key *tf, + struct epoll_event *epds, bool nonblock); int do_epoll_ctl(int epfd, int op, int fd, struct epoll_event *epds, bool nonblock); +bool is_file_epoll(struct file *f); /* Tells if the epoll_ctl(2) operation needs an event copy from userspace */ static inline int ep_op_has_event(int op) diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h index 8bcdba28b4060..c835bc64f4fad 100644 --- a/include/linux/exportfs.h +++ b/include/linux/exportfs.h @@ -6,9 +6,8 @@ #include <linux/path.h> struct dentry; -struct iattr; +struct exportfs_block_ops; struct inode; -struct iomap; struct super_block; struct vfsmount; @@ -260,19 +259,13 @@ struct handle_to_path_ctx { * @commit_metadata: * @commit_metadata should commit metadata changes to stable storage. * - * @get_uuid: - * Get a filesystem unique signature exposed to clients. - * - * @map_blocks: - * Map and, if necessary, allocate blocks for a layout. - * - * @commit_blocks: - * Commit blocks in a layout once the client is done with them. - * * @flags: * Allows the filesystem to communicate to nfsd that it may want to do things * differently when dealing with it. * + * @block_ops: + * Operations for layout grants to block on the underlying device. + * * Locking rules: * get_parent is called with child->d_inode->i_rwsem down * get_name is not (which is possibly inconsistent) @@ -290,12 +283,6 @@ struct export_operations { struct dentry * (*get_parent)(struct dentry *child); int (*commit_metadata)(struct inode *inode); - int (*get_uuid)(struct super_block *sb, u8 *buf, u32 *len, u64 *offset); - int (*map_blocks)(struct inode *inode, loff_t offset, - u64 len, struct iomap *iomap, - bool write, u32 *device_generation); - int (*commit_blocks)(struct inode *inode, struct iomap *iomaps, - int nr_iomaps, struct iattr *iattr); int (*permission)(struct handle_to_path_ctx *ctx, unsigned int oflags); struct file * (*open)(const struct path *path, unsigned int oflags); #define EXPORT_OP_NOWCC (0x1) /* don't collect v3 wcc data */ @@ -308,6 +295,10 @@ struct export_operations { #define EXPORT_OP_FLUSH_ON_CLOSE (0x20) /* fs flushes file data on close */ #define EXPORT_OP_NOLOCKS (0x40) /* no file locking support */ unsigned long flags; + +#ifdef CONFIG_EXPORTFS_BLOCK_OPS + const struct exportfs_block_ops *block_ops; +#endif }; /** diff --git a/include/linux/exportfs_block.h b/include/linux/exportfs_block.h new file mode 100644 index 0000000000000..de519b7b599b3 --- /dev/null +++ b/include/linux/exportfs_block.h @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2014-2026 Christoph Hellwig. + * + * Support for exportfs-based layout grants for direct block device access. + */ +#ifndef LINUX_EXPORTFS_BLOCK_H +#define LINUX_EXPORTFS_BLOCK_H 1 + +#include <linux/blkdev.h> +#include <linux/exportfs.h> +#include <linux/fs.h> + +struct inode; +struct iomap; +struct super_block; + +/* + * There are the two types of block-style layout support: + * - In-band implies a device identified by a unique cookie inside the actual + * device address space checked by the ->get_uuid method as used by the pNFS + * block layout. This is a bit dangerous and deprecated. + * - Out of band implies identification by out of band unique identifiers + * specified by the storage protocol, which is much safer and used by the + * pNFS SCSI/NVMe layouts. + */ +typedef unsigned int __bitwise expfs_block_layouts_t; +#define EXPFS_BLOCK_FLAG(__bit) \ + ((__force expfs_block_layouts_t)(1u << __bit)) +#define EXPFS_BLOCK_IN_BAND_ID EXPFS_BLOCK_FLAG(0) +#define EXPFS_BLOCK_OUT_OF_BAND_ID EXPFS_BLOCK_FLAG(1) + +struct exportfs_block_ops { + /* + * Returns the EXPFS_BLOCK_* bitmap of supported layout types. + */ + expfs_block_layouts_t (*layouts_supported)(struct super_block *sb); + + /* + * Get the in-band device unique signature exposed to clients. + */ + int (*get_uuid)(struct super_block *sb, u8 *buf, u32 *len, u64 *offset); + + /* + * Map blocks for direct block access. + * If @write is %true, also allocate the blocks for the range if needed. + */ + int (*map_blocks)(struct inode *inode, loff_t offset, u64 len, + struct iomap *iomap, bool write, + u32 *device_generation); + + /* + * Commit blocks previously handed out by ->map_blocks and written to by + * the client. + */ + int (*commit_blocks)(struct inode *inode, struct iomap *iomaps, + int nr_iomaps, loff_t new_size); +}; + +static inline bool +exportfs_bdev_supports_out_of_band_id(struct block_device *bdev) +{ + return bdev->bd_disk->fops->pr_ops && + bdev->bd_disk->fops->get_unique_id; +} + +#ifdef CONFIG_EXPORTFS_BLOCK_OPS +static inline expfs_block_layouts_t +exportfs_layouts_supported(struct super_block *sb) +{ + const struct exportfs_block_ops *bops = sb->s_export_op->block_ops; + + if (!bops || + !bops->layouts_supported || + WARN_ON_ONCE(!bops->map_blocks) || + WARN_ON_ONCE(!bops->commit_blocks)) + return 0; + return bops->layouts_supported(sb); +} +#else +static inline expfs_block_layouts_t +exportfs_layouts_supported(struct super_block *sb) +{ + return 0; +} +#endif /* CONFIG_EXPORTFS_BLOCK_OPS */ + +#endif /* LINUX_EXPORTFS_BLOCK_H */ diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h index a332e79b32079..6ad6b9e7a226a 100644 --- a/include/linux/fcntl.h +++ b/include/linux/fcntl.h @@ -4,13 +4,31 @@ #include <linux/stat.h> #include <uapi/linux/fcntl.h> +#include <uapi/linux/openat2.h> /* List of all valid flags for the open/openat flags argument: */ #define VALID_OPEN_FLAGS \ (O_RDONLY | O_WRONLY | O_RDWR | O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC | \ O_APPEND | O_NDELAY | O_NONBLOCK | __O_SYNC | O_DSYNC | \ FASYNC | O_DIRECT | O_LARGEFILE | O_DIRECTORY | O_NOFOLLOW | \ - O_NOATIME | O_CLOEXEC | O_PATH | __O_TMPFILE) + O_NOATIME | O_CLOEXEC | O_PATH | __O_TMPFILE | O_EMPTYPATH) + +/* List of all valid flags for openat2(2)'s how->flags argument. */ +#define VALID_OPENAT2_FLAGS (VALID_OPEN_FLAGS | OPENAT2_REGULAR) + +/* + * Kernel-internal carrier for OPENAT2_REGULAR. The UAPI bit lives in the + * upper 32 bits of open_how::flags so open()/openat() cannot encode it. + * build_open_flags() translates it to this internal flag, which then + * propagates through op->open_flag and f->f_flags exactly like __FMODE_EXEC. + * do_dentry_open() strips it so userspace cannot observe it via + * fcntl(F_GETFL). + * + * Bit 30 is not claimed by any O_* flag on any architecture and stays clear + * of the sign bit of the int op->open_flag. fcntl_init() enforces that it + * never aliases an open-flag bit. + */ +#define __O_REGULAR (1 << 30) /* List of all valid flags for the how->resolve argument: */ #define VALID_RESOLVE_FLAGS \ diff --git a/include/linux/fileattr.h b/include/linux/fileattr.h index 3780904a63a6c..58044b5980162 100644 --- a/include/linux/fileattr.h +++ b/include/linux/fileattr.h @@ -16,7 +16,8 @@ /* Read-only inode flags */ #define FS_XFLAG_RDONLY_MASK \ - (FS_XFLAG_PREALLOC | FS_XFLAG_HASATTR | FS_XFLAG_VERITY) + (FS_XFLAG_PREALLOC | FS_XFLAG_HASATTR | FS_XFLAG_VERITY | \ + FS_XFLAG_CASEFOLD | FS_XFLAG_CASENONPRESERVING) /* Flags to indicate valid value of fsx_ fields */ #define FS_XFLAG_VALUES_MASK \ diff --git a/include/linux/filelock.h b/include/linux/filelock.h index 5f0a2fb314506..7a7a6e8a9a085 100644 --- a/include/linux/filelock.h +++ b/include/linux/filelock.h @@ -4,19 +4,22 @@ #include <linux/fs.h> -#define FL_POSIX 1 -#define FL_FLOCK 2 -#define FL_DELEG 4 /* NFSv4 delegation */ -#define FL_ACCESS 8 /* not trying to lock, just looking */ -#define FL_EXISTS 16 /* when unlocking, test for existence */ -#define FL_LEASE 32 /* lease held on this file */ -#define FL_CLOSE 64 /* unlock on close */ -#define FL_SLEEP 128 /* A blocking lock */ -#define FL_DOWNGRADE_PENDING 256 /* Lease is being downgraded */ -#define FL_UNLOCK_PENDING 512 /* Lease is being broken */ -#define FL_OFDLCK 1024 /* lock is "owned" by struct file */ -#define FL_LAYOUT 2048 /* outstanding pNFS layout */ -#define FL_RECLAIM 4096 /* reclaiming from a reboot server */ +#define FL_POSIX BIT(0) /* POSIX lock */ +#define FL_FLOCK BIT(1) /* BSD lock */ +#define FL_DELEG BIT(2) /* NFSv4 delegation */ +#define FL_ACCESS BIT(3) /* not trying to lock, just looking */ +#define FL_EXISTS BIT(4) /* when unlocking, test for existence */ +#define FL_LEASE BIT(5) /* file lease */ +#define FL_CLOSE BIT(6) /* unlock on close */ +#define FL_SLEEP BIT(7) /* A blocking lock */ +#define FL_DOWNGRADE_PENDING BIT(8) /* Lease is being downgraded */ +#define FL_UNLOCK_PENDING BIT(9) /* Lease is being broken */ +#define FL_OFDLCK BIT(10) /* POSIX lock "owned" by struct file */ +#define FL_LAYOUT BIT(11) /* outstanding pNFS layout */ +#define FL_RECLAIM BIT(12) /* reclaiming from a reboot server */ +#define FL_IGN_DIR_CREATE BIT(13) /* ignore DIR_CREATE events */ +#define FL_IGN_DIR_DELETE BIT(14) /* ignore DIR_DELETE events */ +#define FL_IGN_DIR_RENAME BIT(15) /* ignore DIR_RENAME events */ #define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE) @@ -26,6 +29,15 @@ */ #define FILE_LOCK_DEFERRED 1 +#define LEASE_BREAK_LEASE BIT(0) // break leases and delegations +#define LEASE_BREAK_DELEG BIT(1) // break delegations only +#define LEASE_BREAK_LAYOUT BIT(2) // break layouts only +#define LEASE_BREAK_NONBLOCK BIT(3) // non-blocking break +#define LEASE_BREAK_OPEN_RDONLY BIT(4) // readonly open event +#define LEASE_BREAK_DIR_CREATE BIT(5) // dir deleg create event +#define LEASE_BREAK_DIR_DELETE BIT(6) // dir deleg delete event +#define LEASE_BREAK_DIR_RENAME BIT(7) // dir deleg rename event + struct file_lock; struct file_lease; @@ -216,19 +228,13 @@ int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl); void locks_init_lease(struct file_lease *); void locks_free_lease(struct file_lease *fl); struct file_lease *locks_alloc_lease(void); - -#define LEASE_BREAK_LEASE BIT(0) // break leases and delegations -#define LEASE_BREAK_DELEG BIT(1) // break delegations only -#define LEASE_BREAK_LAYOUT BIT(2) // break layouts only -#define LEASE_BREAK_NONBLOCK BIT(3) // non-blocking break -#define LEASE_BREAK_OPEN_RDONLY BIT(4) // readonly open event - int __break_lease(struct inode *inode, unsigned int flags); void lease_get_mtime(struct inode *, struct timespec64 *time); int generic_setlease(struct file *, int, struct file_lease **, void **priv); int kernel_setlease(struct file *, int, struct file_lease **, void **); int vfs_setlease(struct file *, int, struct file_lease **, void **); int lease_modify(struct file_lease *, int, struct list_head *); +u32 inode_lease_ignore_mask(struct inode *inode); struct notifier_block; int lease_register_notifier(struct notifier_block *); @@ -516,12 +522,26 @@ static inline bool is_delegated(struct delegated_inode *di) return di->di_inode; } -static inline int try_break_deleg(struct inode *inode, +/** + * try_break_deleg - do a non-blocking delegation break + * @inode: inode that should have its delegations broken + * @flags: extra LEASE_BREAK_* flags to pass to break_deleg() + * @di: returns pointer to delegated inode (may be NULL) + * + * Break delegations in a non-blocking fashion. If there are + * outstanding delegations and @di is set, then an extra reference + * will be taken on @inode and @di->di_inode will be populated so + * that it may be waited upon. + * + * Returns 0 if there is no need to wait or an error. If -EWOULDBLOCK + * is returned, then @di will be populated (if non-NULL). + */ +static inline int try_break_deleg(struct inode *inode, unsigned int flags, struct delegated_inode *di) { int ret; - ret = break_deleg(inode, LEASE_BREAK_NONBLOCK); + ret = break_deleg(inode, flags | LEASE_BREAK_NONBLOCK); if (ret == -EWOULDBLOCK && di) { di->di_inode = inode; ihold(inode); @@ -574,7 +594,7 @@ static inline int break_deleg(struct inode *inode, unsigned int flags) return 0; } -static inline int try_break_deleg(struct inode *inode, +static inline int try_break_deleg(struct inode *inode, unsigned int flags, struct delegated_inode *delegated_inode) { return 0; diff --git a/include/linux/fs.h b/include/linux/fs.h index bb9cc4f7207c1..6439aa13cce71 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2213,8 +2213,21 @@ static inline void mark_inode_dirty_sync(struct inode *inode) __mark_inode_dirty(inode, I_DIRTY_SYNC); } +/* + * returns the refcount on the inode. it can change arbitrarily. + */ +static inline int icount_read_once(const struct inode *inode) +{ + return atomic_read(&inode->i_count); +} + +/* + * returns the refcount on the inode. The lock guarantees no 0->1 or 1->0 transitions + * of the count are going to take place, otherwise it changes arbitrarily. + */ static inline int icount_read(const struct inode *inode) { + lockdep_assert_held(&inode->i_lock); return atomic_read(&inode->i_count); } @@ -2276,12 +2289,13 @@ struct file_system_type { #define FS_MGTIME 64 /* FS uses multigrain timestamps */ #define FS_LBS 128 /* FS supports LBS */ #define FS_POWER_FREEZE 256 /* Always freeze on suspend/hibernate */ +#define FS_USERNS_MOUNT_RESTRICTED 512 /* Restrict mount in userns if not already visible */ #define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() during rename() internally. */ int (*init_fs_context)(struct fs_context *); const struct fs_parameter_spec *parameters; void (*kill_sb) (struct super_block *); struct module *owner; - struct file_system_type * next; + struct hlist_node list; struct hlist_head fs_supers; struct lock_class_key s_lock_key; diff --git a/include/linux/fs/super_types.h b/include/linux/fs/super_types.h index 383050e7fdf57..a6cdc8f6de4e8 100644 --- a/include/linux/fs/super_types.h +++ b/include/linux/fs/super_types.h @@ -326,7 +326,7 @@ struct super_block { #define SB_I_STABLE_WRITES 0x00000008 /* don't modify blks until WB is done */ /* sb->s_iflags to limit user namespace mounts */ -#define SB_I_USERNS_VISIBLE 0x00000010 /* fstype already mounted */ +#define SB_I_RESTRICTED_VARIANT 0x00000010 #define SB_I_IMA_UNVERIFIABLE_SIGNATURE 0x00000020 #define SB_I_UNTRUSTED_MOUNTER 0x00000040 #define SB_I_EVM_HMAC_UNSUPPORTED 0x00000080 diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 079c18bcdbde6..bda798bc67bc5 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -257,6 +257,10 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, __u32 new_dir_mask = FS_MOVED_TO; __u32 rename_mask = FS_RENAME; const struct qstr *new_name = &moved->d_name; + struct fsnotify_rename_data rd = { + .moved = moved, + .target = target, + }; if (isdir) { old_dir_mask |= FS_ISDIR; @@ -265,12 +269,12 @@ static inline void fsnotify_move(struct inode *old_dir, struct inode *new_dir, } /* Event with information about both old and new parent+name */ - fsnotify_name(rename_mask, moved, FSNOTIFY_EVENT_DENTRY, + fsnotify_name(rename_mask, &rd, FSNOTIFY_EVENT_RENAME, old_dir, old_name, 0); fsnotify_name(old_dir_mask, source, FSNOTIFY_EVENT_INODE, old_dir, old_name, fs_cookie); - fsnotify_name(new_dir_mask, source, FSNOTIFY_EVENT_INODE, + fsnotify_name(new_dir_mask, &rd, FSNOTIFY_EVENT_RENAME, new_dir, new_name, fs_cookie); if (target) diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index e5cde39d6e85d..618eed4d6d724 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -311,6 +311,7 @@ enum fsnotify_data_type { FSNOTIFY_EVENT_DENTRY, FSNOTIFY_EVENT_MNT, FSNOTIFY_EVENT_ERROR, + FSNOTIFY_EVENT_RENAME, }; struct fs_error_report { @@ -335,6 +336,11 @@ struct fsnotify_mnt { u64 mnt_id; }; +struct fsnotify_rename_data { + struct dentry *moved; /* the dentry that was renamed */ + struct inode *target; /* inode overwritten by rename, or NULL */ +}; + static inline struct inode *fsnotify_data_inode(const void *data, int data_type) { switch (data_type) { @@ -348,6 +354,8 @@ static inline struct inode *fsnotify_data_inode(const void *data, int data_type) return d_inode(file_range_path(data)->dentry); case FSNOTIFY_EVENT_ERROR: return ((struct fs_error_report *)data)->inode; + case FSNOTIFY_EVENT_RENAME: + return d_inode(((const struct fsnotify_rename_data *)data)->moved); default: return NULL; } @@ -363,6 +371,8 @@ static inline struct dentry *fsnotify_data_dentry(const void *data, int data_typ return ((const struct path *)data)->dentry; case FSNOTIFY_EVENT_FILE_RANGE: return file_range_path(data)->dentry; + case FSNOTIFY_EVENT_RENAME: + return ((struct fsnotify_rename_data *)data)->moved; default: return NULL; } @@ -395,6 +405,8 @@ static inline struct super_block *fsnotify_data_sb(const void *data, return file_range_path(data)->dentry->d_sb; case FSNOTIFY_EVENT_ERROR: return ((struct fs_error_report *) data)->sb; + case FSNOTIFY_EVENT_RENAME: + return ((const struct fsnotify_rename_data *)data)->moved->d_sb; default: return NULL; } @@ -430,6 +442,14 @@ static inline struct fs_error_report *fsnotify_data_error_report( } } +static inline struct inode *fsnotify_data_rename_target(const void *data, + int data_type) +{ + if (data_type == FSNOTIFY_EVENT_RENAME) + return ((const struct fsnotify_rename_data *)data)->target; + return NULL; +} + static inline const struct file_range *fsnotify_data_file_range( const void *data, int data_type) @@ -918,6 +938,7 @@ extern void fsnotify_put_mark(struct fsnotify_mark *mark); struct fsnotify_mark *fsnotify_next_mark(struct fsnotify_mark *mark); extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info); extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info); +extern void fsnotify_modify_mark_mask(struct fsnotify_mark *mark, u32 set, u32 clear); static inline void fsnotify_init_event(struct fsnotify_event *event) { diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 2c5685adf3a97..cea6bbc97b6ef 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -67,6 +67,9 @@ struct vm_fault; * bio, i.e. set REQ_ATOMIC. * * IOMAP_F_INTEGRITY indicates that the filesystems handles integrity metadata. + * + * IOMAP_F_ZERO_TAIL indicates the remainder of the block after the data + * written should be zeroed. */ #define IOMAP_F_NEW (1U << 0) #define IOMAP_F_DIRTY (1U << 1) @@ -86,6 +89,7 @@ struct vm_fault; #else #define IOMAP_F_INTEGRITY 0 #endif /* CONFIG_BLK_DEV_INTEGRITY */ +#define IOMAP_F_ZERO_TAIL (1U << 10) /* * Flag reserved for file system specific usage @@ -143,16 +147,6 @@ static inline void *iomap_inline_data(const struct iomap *iomap, loff_t pos) } /* - * Check if the mapping's length is within the valid range for inline data. - * This is used to guard against accessing data beyond the page inline_data - * points at. - */ -static inline bool iomap_inline_data_valid(const struct iomap *iomap) -{ - return iomap->length <= PAGE_SIZE - offset_in_page(iomap->inline_data); -} - -/* * When get_folio succeeds, put_folio will always be called to do any * cleanup work necessary. put_folio is responsible for unlocking and putting * @folio. diff --git a/include/linux/kstrtox.h b/include/linux/kstrtox.h index 6ea897222af1d..6c92828667704 100644 --- a/include/linux/kstrtox.h +++ b/include/linux/kstrtox.h @@ -142,10 +142,9 @@ static inline int __must_check kstrtos32_from_user(const char __user *s, size_t * Keep in mind above caveat. */ -extern unsigned long simple_strtoul(const char *,char **,unsigned int); -extern unsigned long simple_strntoul(const char *,char **,unsigned int,size_t); -extern long simple_strtol(const char *,char **,unsigned int); -extern unsigned long long simple_strtoull(const char *,char **,unsigned int); -extern long long simple_strtoll(const char *,char **,unsigned int); +unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base); +long simple_strtol(const char *cp, char **endp, unsigned int base); +unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base); +long long simple_strtoll(const char *cp, char **endp, unsigned int base); #endif /* _LINUX_KSTRTOX_H */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 8d1fb85e76847..b8a2106e177b6 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1329,7 +1329,6 @@ struct mm_struct { */ struct task_struct __rcu *owner; #endif - struct user_namespace *user_ns; /* store ref to file /proc/<pid>/exe symlink points to */ struct file __rcu *exe_file; @@ -1894,11 +1893,11 @@ enum { /* mm flags */ /* - * The first two bits represent core dump modes for set-user-ID, - * the modes are SUID_DUMP_* defined in linux/sched/coredump.h + * Bits 0 and 1 were dumpability; that moved to task->exec_state. Reserve + * the bits so MMF_DUMP_FILTER_* positions stay stable for the + * /proc/<pid>/coredump_filter ABI. */ #define MMF_DUMPABLE_BITS 2 -#define MMF_DUMPABLE_MASK (BIT(MMF_DUMPABLE_BITS) - 1) /* coredump filter bits */ #define MMF_DUMP_ANON_PRIVATE 2 #define MMF_DUMP_ANON_SHARED 3 @@ -1959,7 +1958,7 @@ enum { #define MMF_TOPDOWN 31 /* mm searches top down by default */ #define MMF_TOPDOWN_MASK BIT(MMF_TOPDOWN) -#define MMF_INIT_LEGACY_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ +#define MMF_INIT_LEGACY_MASK (MMF_DUMP_FILTER_MASK |\ MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK |\ MMF_VM_MERGE_ANY_MASK | MMF_TOPDOWN_MASK) diff --git a/include/linux/namei.h b/include/linux/namei.h index 2ad6dd9987b90..80488b3de0c98 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -61,7 +61,6 @@ extern struct dentry *start_creating_path(int, const char *, struct path *, unsi extern struct dentry *start_creating_user_path(int, const char __user *, struct path *, unsigned int); extern void end_creating_path(const struct path *, struct dentry *); extern struct dentry *start_removing_path(const char *, struct path *); -extern struct dentry *start_removing_user_path_at(int , const char __user *, struct path *); static inline void end_removing_path(const struct path *path , struct dentry *dentry) { end_creating_path(path, dentry); diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 4daee27fa5eb4..34d294774f8ce 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -306,7 +306,7 @@ struct nfs_server { #define NFS_CAP_ATOMIC_OPEN (1U << 4) #define NFS_CAP_LGOPEN (1U << 5) #define NFS_CAP_CASE_INSENSITIVE (1U << 6) -#define NFS_CAP_CASE_PRESERVING (1U << 7) +#define NFS_CAP_CASE_NONPRESERVING (1U << 7) #define NFS_CAP_REBOOT_LAYOUTRETURN (1U << 8) #define NFS_CAP_OFFLOAD_STATUS (1U << 9) #define NFS_CAP_ZERO_RANGE (1U << 10) diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index fcbd21b5685f4..83ee991cde2be 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -182,6 +182,8 @@ struct nfs_pathconf { struct nfs_fattr *fattr; /* Post-op attributes */ __u32 max_link; /* max # of hard links */ __u32 max_namelen; /* max name length */ + bool case_insensitive; + bool case_preserving; }; struct nfs4_change_info { diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 19d1c5e5f3350..47d7deaeed8ff 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -67,6 +67,7 @@ enum proc_pidonly { struct proc_fs_info { struct pid_namespace *pid_ns; kgid_t pid_gid; + const struct cred *mounter_cred; enum proc_hidepid hide_pid; enum proc_pidonly pidonly; struct rcu_head rcu; @@ -248,4 +249,16 @@ static inline struct pid_namespace *proc_pid_ns(struct super_block *sb) bool proc_ns_file(const struct file *file); +#if defined CONFIG_PROC_FS && !defined MODULE +void impl_proc_make_permanent(struct proc_dir_entry *pde); +#endif + +static inline void proc_make_permanent(struct proc_dir_entry *pde) +{ + /* Don't give matches to modules. */ +#if defined CONFIG_PROC_FS && !defined MODULE + impl_proc_make_permanent(pde); +#endif +} + #endif /* _LINUX_PROC_FS_H */ diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 90507d4afcd6d..ef314f7a9ecc5 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -17,6 +17,7 @@ struct syscall_info { struct seccomp_data data; }; +bool ptracer_access_allowed(struct task_struct *tsk); extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, unsigned int gup_flags); diff --git a/include/linux/rhashtable-types.h b/include/linux/rhashtable-types.h index fc2f596a6df1b..57c11ec9dc645 100644 --- a/include/linux/rhashtable-types.h +++ b/include/linux/rhashtable-types.h @@ -136,12 +136,26 @@ struct rhashtable_iter { bool end_of_table; }; -int rhashtable_init_noprof(struct rhashtable *ht, - const struct rhashtable_params *params); +int __rhashtable_init_noprof(struct rhashtable *ht, + const struct rhashtable_params *params, + struct lock_class_key *key); +#define rhashtable_init_noprof(ht, params) \ +({ \ + static struct lock_class_key __key; \ + \ + __rhashtable_init_noprof(ht, params, &__key); \ +}) #define rhashtable_init(...) alloc_hooks(rhashtable_init_noprof(__VA_ARGS__)) -int rhltable_init_noprof(struct rhltable *hlt, - const struct rhashtable_params *params); +int __rhltable_init_noprof(struct rhltable *hlt, + const struct rhashtable_params *params, + struct lock_class_key *key); +#define rhltable_init_noprof(hlt, params) \ +({ \ + static struct lock_class_key __key; \ + \ + __rhltable_init_noprof(hlt, params, &__key); \ +}) #define rhltable_init(...) alloc_hooks(rhltable_init_noprof(__VA_ARGS__)) #endif /* _LINUX_RHASHTABLE_TYPES_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index d71cec884a5d9..81948cfe90671 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -85,6 +85,7 @@ struct seq_file; struct sighand_struct; struct signal_struct; struct task_delay_info; +struct task_exec_state; struct task_group; struct task_struct; struct timespec64; @@ -962,6 +963,8 @@ struct task_struct { struct mm_struct *mm; struct mm_struct *active_mm; + struct task_exec_state __rcu *exec_state; + int exit_state; int exit_code; int exit_signal; @@ -1002,9 +1005,6 @@ struct task_struct { unsigned sched_rt_mutex:1; #endif - /* Save user-dumpable when mm goes away */ - unsigned user_dumpable:1; - /* Bit to tell TOMOYO we're in execve(): */ unsigned in_execve:1; unsigned in_iowait:1; diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index 624fda17a7857..20957ccde3b58 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -2,43 +2,18 @@ #ifndef _LINUX_SCHED_COREDUMP_H #define _LINUX_SCHED_COREDUMP_H -#include <linux/mm_types.h> - -#define SUID_DUMP_DISABLE 0 /* No setuid dumping */ -#define SUID_DUMP_USER 1 /* Dump as user of process */ -#define SUID_DUMP_ROOT 2 /* Dump as root */ - -static inline unsigned long __mm_flags_get_dumpable(const struct mm_struct *mm) -{ - /* - * By convention, dumpable bits are contained in first 32 bits of the - * bitmap, so we can simply access this first unsigned long directly. - */ - return __mm_flags_get_word(mm); -} - -static inline void __mm_flags_set_mask_dumpable(struct mm_struct *mm, int value) -{ - __mm_flags_set_mask_bits_word(mm, MMF_DUMPABLE_MASK, value); -} - -extern void set_dumpable(struct mm_struct *mm, int value); /* - * This returns the actual value of the suid_dumpable flag. For things - * that are using this for checking for privilege transitions, it must - * test against SUID_DUMP_USER rather than treating it as a boolean - * value. + * Task dumpability mode. Gates core dump production and ptrace_attach() + * authorization. The numeric values are stable ABI (suid_dumpable + * sysctl, prctl(PR_SET_DUMPABLE)); do not renumber. */ -static inline int __get_dumpable(unsigned long mm_flags) -{ - return mm_flags & MMF_DUMPABLE_MASK; -} - -static inline int get_dumpable(struct mm_struct *mm) -{ - unsigned long flags = __mm_flags_get_dumpable(mm); +enum task_dumpable { + TASK_DUMPABLE_OFF = 0, /* no dump; ptrace needs CAP_SYS_PTRACE */ + TASK_DUMPABLE_OWNER = 1, /* default; dump and ptrace by uid match */ + TASK_DUMPABLE_ROOT = 2, /* dump as root; ptrace needs CAP_SYS_PTRACE */ +}; - return __get_dumpable(flags); -} +void task_exec_state_set_dumpable(enum task_dumpable value); +enum task_dumpable task_exec_state_get_dumpable(struct task_struct *task); #endif /* _LINUX_SCHED_COREDUMP_H */ diff --git a/include/linux/sched/exec_state.h b/include/linux/sched/exec_state.h new file mode 100644 index 0000000000000..9b61782510b8e --- /dev/null +++ b/include/linux/sched/exec_state.h @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Christian Brauner <brauner@kernel.org> */ +#ifndef _LINUX_SCHED_EXEC_STATE_H +#define _LINUX_SCHED_EXEC_STATE_H + +#include <linux/init.h> +#include <linux/rcupdate.h> +#include <linux/refcount.h> +#include <linux/sched/coredump.h> +#include <linux/user_namespace.h> + +struct task_exec_state { + refcount_t count; + enum task_dumpable dumpable; + struct user_namespace *user_ns; + struct rcu_head rcu; +}; + +extern struct task_exec_state init_task_exec_state; + +struct task_exec_state *alloc_task_exec_state(struct user_namespace *user_ns); +void put_task_exec_state(struct task_exec_state *exec_state); +struct task_exec_state *task_exec_state_rcu(const struct task_struct *tsk); +struct task_exec_state *task_exec_state_replace(struct task_struct *tsk, + struct task_exec_state *exec_state); +int task_exec_state_copy(struct task_struct *tsk); +void __init exec_state_init(void); + +DEFINE_FREE(put_task_exec_state, struct task_exec_state *, put_task_exec_state(_T)) + +#endif /* _LINUX_SCHED_EXEC_STATE_H */ diff --git a/include/linux/sockptr.h b/include/linux/sockptr.h index 3e6c8e9d67aef..9c2429c1a570a 100644 --- a/include/linux/sockptr.h +++ b/include/linux/sockptr.h @@ -87,24 +87,10 @@ static inline int copy_safe_from_sockptr(void *dst, size_t ksize, static inline int copy_struct_from_sockptr(void *dst, size_t ksize, sockptr_t src, size_t usize) { - size_t size = min(ksize, usize); - size_t rest = max(ksize, usize) - size; - if (!sockptr_is_kernel(src)) - return copy_struct_from_user(dst, ksize, src.user, size); - - if (usize < ksize) { - memset(dst + size, 0, rest); - } else if (usize > ksize) { - char *p = src.kernel; + return copy_struct_from_user(dst, ksize, src.user, usize); - while (rest--) { - if (*p++) - return -E2BIG; - } - } - memcpy(dst, src.kernel, size); - return 0; + return copy_struct_from_bounce_buffer(dst, ksize, src.kernel, usize); } static inline int copy_to_sockptr_offset(sockptr_t dst, size_t offset, @@ -121,6 +107,16 @@ static inline int copy_to_sockptr(sockptr_t dst, const void *src, size_t size) return copy_to_sockptr_offset(dst, 0, src, size); } +static inline int +copy_struct_to_sockptr(sockptr_t dst, size_t usize, const void *src, + size_t ksize, bool *ignored_trailing) +{ + if (!sockptr_is_kernel(dst)) + return copy_struct_to_user(dst.user, usize, src, ksize, ignored_trailing); + + return copy_struct_to_bounce_buffer(dst.kernel, usize, src, ksize, ignored_trailing); +} + static inline void *memdup_sockptr_noprof(sockptr_t src, size_t len) { void *p = kmalloc_track_caller_noprof(len, GFP_USER | __GFP_NOWARN); diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index b1e595c2615bd..2735c332ddb73 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -80,6 +80,9 @@ struct cache_detail { int (*cache_upcall)(struct cache_detail *, struct cache_head *); + int (*cache_notify)(struct cache_detail *cd, + struct cache_head *h); + void (*cache_request)(struct cache_detail *cd, struct cache_head *ch, char **bpp, int *blen); @@ -189,9 +192,9 @@ sunrpc_cache_update(struct cache_detail *detail, struct cache_head *new, struct cache_head *old, int hash); extern int -sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h); +sunrpc_cache_upcall(struct cache_detail *detail, struct cache_head *h); extern int -sunrpc_cache_pipe_upcall_timeout(struct cache_detail *detail, +sunrpc_cache_upcall_warn(struct cache_detail *detail, struct cache_head *h); @@ -248,6 +251,14 @@ extern int sunrpc_cache_register_pipefs(struct dentry *parent, const char *, extern void sunrpc_cache_unregister_pipefs(struct cache_detail *); extern void sunrpc_cache_unhash(struct cache_detail *, struct cache_head *); +int sunrpc_cache_requests_count(struct cache_detail *cd); +int sunrpc_cache_requests_snapshot(struct cache_detail *cd, + struct cache_head **items, + u64 *seqnos, int max, + u64 min_seqno); +int sunrpc_cache_notify(struct cache_detail *cd, struct cache_head *h, + u32 cache_type); + /* Must store cache_detail in seq_file->private if using next three functions */ extern void *cache_seq_start_rcu(struct seq_file *file, loff_t *pos); extern void *cache_seq_next_rcu(struct seq_file *file, void *p, loff_t *pos); diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h index 43950b5237c85..1cd452ed1db5e 100644 --- a/include/linux/sunrpc/gss_krb5.h +++ b/include/linux/sunrpc/gss_krb5.h @@ -37,13 +37,9 @@ #ifndef _LINUX_SUNRPC_GSS_KRB5_H #define _LINUX_SUNRPC_GSS_KRB5_H -#include <crypto/skcipher.h> #include <linux/sunrpc/auth_gss.h> #include <linux/sunrpc/gss_err.h> -/* Length of constant used in key derivation */ -#define GSS_KRB5_K5CLENGTH (5) - /* Maximum key length (in bytes) for the supported crypto algorithms */ #define GSS_KRB5_MAX_KEYLEN (32) @@ -56,11 +52,6 @@ /* The length of the Kerberos GSS token header */ #define GSS_KRB5_TOK_HDR_LEN (16) -#define KG_TOK_MIC_MSG 0x0101 -#define KG_TOK_WRAP_MSG 0x0201 - -#define KG2_TOK_INITIAL 0x0101 -#define KG2_TOK_RESPONSE 0x0202 #define KG2_TOK_MIC 0x0404 #define KG2_TOK_WRAP 0x0504 @@ -68,102 +59,6 @@ #define KG2_TOKEN_FLAG_SEALED 0x02 #define KG2_TOKEN_FLAG_ACCEPTORSUBKEY 0x04 -#define KG2_RESP_FLAG_ERROR 0x0001 -#define KG2_RESP_FLAG_DELEG_OK 0x0002 - -enum sgn_alg { - SGN_ALG_DES_MAC_MD5 = 0x0000, - SGN_ALG_MD2_5 = 0x0001, - SGN_ALG_DES_MAC = 0x0002, - SGN_ALG_3 = 0x0003, /* not published */ - SGN_ALG_HMAC_SHA1_DES3_KD = 0x0004 -}; -enum seal_alg { - SEAL_ALG_NONE = 0xffff, - SEAL_ALG_DES = 0x0000, - SEAL_ALG_1 = 0x0001, /* not published */ - SEAL_ALG_DES3KD = 0x0002 -}; - -/* - * These values are assigned by IANA and published via the - * subregistry at the link below: - * - * https://www.iana.org/assignments/kerberos-parameters/kerberos-parameters.xhtml#kerberos-parameters-2 - */ -#define CKSUMTYPE_CRC32 0x0001 -#define CKSUMTYPE_RSA_MD4 0x0002 -#define CKSUMTYPE_RSA_MD4_DES 0x0003 -#define CKSUMTYPE_DESCBC 0x0004 -#define CKSUMTYPE_RSA_MD5 0x0007 -#define CKSUMTYPE_RSA_MD5_DES 0x0008 -#define CKSUMTYPE_NIST_SHA 0x0009 -#define CKSUMTYPE_HMAC_SHA1_DES3 0x000c -#define CKSUMTYPE_HMAC_SHA1_96_AES128 0x000f -#define CKSUMTYPE_HMAC_SHA1_96_AES256 0x0010 -#define CKSUMTYPE_CMAC_CAMELLIA128 0x0011 -#define CKSUMTYPE_CMAC_CAMELLIA256 0x0012 -#define CKSUMTYPE_HMAC_SHA256_128_AES128 0x0013 -#define CKSUMTYPE_HMAC_SHA384_192_AES256 0x0014 -#define CKSUMTYPE_HMAC_MD5_ARCFOUR -138 /* Microsoft md5 hmac cksumtype */ - -/* from gssapi_err_krb5.h */ -#define KG_CCACHE_NOMATCH (39756032L) -#define KG_KEYTAB_NOMATCH (39756033L) -#define KG_TGT_MISSING (39756034L) -#define KG_NO_SUBKEY (39756035L) -#define KG_CONTEXT_ESTABLISHED (39756036L) -#define KG_BAD_SIGN_TYPE (39756037L) -#define KG_BAD_LENGTH (39756038L) -#define KG_CTX_INCOMPLETE (39756039L) -#define KG_CONTEXT (39756040L) -#define KG_CRED (39756041L) -#define KG_ENC_DESC (39756042L) -#define KG_BAD_SEQ (39756043L) -#define KG_EMPTY_CCACHE (39756044L) -#define KG_NO_CTYPES (39756045L) - -/* per Kerberos v5 protocol spec crypto types from the wire. - * these get mapped to linux kernel crypto routines. - * - * These values are assigned by IANA and published via the - * subregistry at the link below: - * - * https://www.iana.org/assignments/kerberos-parameters/kerberos-parameters.xhtml#kerberos-parameters-1 - */ -#define ENCTYPE_NULL 0x0000 -#define ENCTYPE_DES_CBC_CRC 0x0001 /* DES cbc mode with CRC-32 */ -#define ENCTYPE_DES_CBC_MD4 0x0002 /* DES cbc mode with RSA-MD4 */ -#define ENCTYPE_DES_CBC_MD5 0x0003 /* DES cbc mode with RSA-MD5 */ -#define ENCTYPE_DES_CBC_RAW 0x0004 /* DES cbc mode raw */ -/* XXX deprecated? */ -#define ENCTYPE_DES3_CBC_SHA 0x0005 /* DES-3 cbc mode with NIST-SHA */ -#define ENCTYPE_DES3_CBC_RAW 0x0006 /* DES-3 cbc mode raw */ -#define ENCTYPE_DES_HMAC_SHA1 0x0008 -#define ENCTYPE_DES3_CBC_SHA1 0x0010 -#define ENCTYPE_AES128_CTS_HMAC_SHA1_96 0x0011 -#define ENCTYPE_AES256_CTS_HMAC_SHA1_96 0x0012 -#define ENCTYPE_AES128_CTS_HMAC_SHA256_128 0x0013 -#define ENCTYPE_AES256_CTS_HMAC_SHA384_192 0x0014 -#define ENCTYPE_ARCFOUR_HMAC 0x0017 -#define ENCTYPE_ARCFOUR_HMAC_EXP 0x0018 -#define ENCTYPE_CAMELLIA128_CTS_CMAC 0x0019 -#define ENCTYPE_CAMELLIA256_CTS_CMAC 0x001A -#define ENCTYPE_UNKNOWN 0x01ff - -/* - * Constants used for key derivation - */ -/* for 3DES */ -#define KG_USAGE_SEAL (22) -#define KG_USAGE_SIGN (23) -#define KG_USAGE_SEQ (24) - -/* from rfc3961 */ -#define KEY_USAGE_SEED_CHECKSUM (0x99) -#define KEY_USAGE_SEED_ENCRYPTION (0xAA) -#define KEY_USAGE_SEED_INTEGRITY (0x55) - /* from rfc4121 */ #define KG_USAGE_ACCEPTOR_SEAL (22) #define KG_USAGE_ACCEPTOR_SIGN (23) diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index df6e08aaad570..4ba39f07371df 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -66,7 +66,6 @@ extern unsigned int svcrdma_ord; extern unsigned int svcrdma_max_requests; extern unsigned int svcrdma_max_bc_requests; extern unsigned int svcrdma_max_req_size; -extern struct workqueue_struct *svcrdma_wq; extern struct percpu_counter svcrdma_stat_read; extern struct percpu_counter svcrdma_stat_recv; @@ -117,6 +116,8 @@ struct svcxprt_rdma { struct llist_head sc_recv_ctxts; + struct llist_head sc_send_release_list; + atomic_t sc_completion_ids; }; /* sc_flags */ @@ -230,13 +231,11 @@ struct svc_rdma_write_info { unsigned int wi_next_off; struct svc_rdma_chunk_ctxt wi_cc; - struct work_struct wi_work; }; struct svc_rdma_send_ctxt { struct llist_node sc_node; struct rpc_rdma_cid sc_cid; - struct work_struct sc_work; struct svcxprt_rdma *sc_rdma; struct ib_send_wr sc_send_wr; @@ -300,6 +299,7 @@ extern int svc_rdma_process_read_list(struct svcxprt_rdma *rdma, /* svc_rdma_sendto.c */ extern void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma); +extern void svc_rdma_send_ctxts_drain(struct svcxprt_rdma *rdma); extern struct svc_rdma_send_ctxt * svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma); extern void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma, diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index b639a6fafcbc6..31971b01d962a 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -140,6 +140,21 @@ int xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp); void xdr_free_bvec(struct xdr_buf *buf); unsigned int xdr_buf_to_bvec(struct bio_vec *bvec, unsigned int bvec_size, const struct xdr_buf *xdr); +int xdr_buf_to_sg(const struct xdr_buf *buf, unsigned int offset, + unsigned int len, struct scatterlist *sg, unsigned int nsg); +int xdr_buf_to_sg_alloc(const struct xdr_buf *buf, unsigned int offset, + unsigned int len, struct scatterlist *sg_head, + unsigned int sg_head_nents, + struct scatterlist **sg_overflow, gfp_t gfp); + +/* + * Inline scatterlist entries for xdr_buf_to_sg_alloc(). Sized to cover the + * head kvec, tail kvec, and a few page fragments without any heap allocation. + */ +enum { + XDR_BUF_TO_SG_NENTS = 8, +}; + static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int len) { @@ -260,7 +275,6 @@ extern void xdr_finish_decode(struct xdr_stream *xdr); extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len); extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); -extern int xdr_process_buf(const struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data); extern void xdr_set_pagelen(struct xdr_stream *, unsigned int len); extern bool xdr_stream_subsegment(struct xdr_stream *xdr, struct xdr_buf *subbuf, unsigned int len); diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index e0c3d6e29301d..9d3fcef61f04f 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -503,7 +503,7 @@ copy_struct_to_user(void __user *dst, size_t usize, const void *src, return -EFAULT; } if (ignored_trailing) - *ignored_trailing = ksize < usize && + *ignored_trailing = usize < ksize && memchr_inv(src + size, 0, rest) != NULL; /* Copy the interoperable parts of the struct. */ if (copy_to_user(dst, src, size)) @@ -511,6 +511,69 @@ copy_struct_to_user(void __user *dst, size_t usize, const void *src, return 0; } +static __always_inline void +__copy_struct_generic_bounce_buffer(void *dst, size_t dstsize, + const void *src, size_t srcsize, + bool *ignored_trailing) +{ + size_t size = min(dstsize, srcsize); + size_t rest = max(dstsize, srcsize) - size; + + /* Deal with trailing bytes. */ + if (dstsize > srcsize) + memset(dst + size, 0, rest); + if (ignored_trailing) + *ignored_trailing = dstsize < srcsize && + memchr_inv(src + size, 0, rest) != NULL; + /* Copy the interoperable parts of the struct. */ + memcpy(dst, src, size); +} + +/** + * This is like copy_struct_from_user(), but the + * src buffer was already copied into a kernel + * bounce buffer, so it will never return -EFAULT. + */ +static __always_inline __must_check int +copy_struct_from_bounce_buffer(void *dst, size_t dstsize, + const void *src, size_t srcsize) +{ + bool ignored_trailing; + + /* Double check if ksize is larger than a known object size. */ + if (WARN_ON_ONCE(dstsize > __builtin_object_size(dst, 1))) + return -E2BIG; + + __copy_struct_generic_bounce_buffer(dst, dstsize, + src, srcsize, + &ignored_trailing); + if (unlikely(ignored_trailing)) + return -E2BIG; + + return 0; +} + +/** + * This is like copy_struct_to_user(), but the + * dst buffer is a kernel bounce buffer instead + * of a direct userspace buffer, so it will never return -EFAULT. + */ +static __always_inline __must_check int +copy_struct_to_bounce_buffer(void *dst, size_t dstsize, + const void *src, + size_t srcsize, + bool *ignored_trailing) +{ + /* Double check if srcsize is larger than a known object size. */ + if (WARN_ON_ONCE(srcsize > __builtin_object_size(src, 1))) + return -E2BIG; + + __copy_struct_generic_bounce_buffer(dst, dstsize, + src, srcsize, + ignored_trailing); + return 0; +} + bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size); long copy_from_kernel_nofault(void *dst, const void *src, size_t size); diff --git a/include/net/9p/client.h b/include/net/9p/client.h index 838a94218b593..55c6cb54bd254 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -192,6 +192,7 @@ struct p9_rdma_opts { * @dfltgid: default numeric groupid to mount hierarchy as * @uid: if %V9FS_ACCESS_SINGLE, the numeric uid which mounted the hierarchy * @session_lock_timeout: retry interval for blocking locks + * @ndentry_timeout_ms: Negative dentry lookup cache retention time in ms * * This strucure holds options which are parsed and will be transferred * to the v9fs_session_info structure when mounted, and therefore largely @@ -203,6 +204,7 @@ struct p9_session_opts { unsigned short debug; unsigned int afid; unsigned int cache; + unsigned int ndentry_timeout_ms; #ifdef CONFIG_9P_FSCACHE char *cachetag; #endif diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index ec1df8b94517c..4c5c47c5edb74 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -31,6 +31,19 @@ struct btrfs_space_info; struct btrfs_raid_bio; struct raid56_bio_trace_info; struct find_free_extent_ctl; +struct btrfs_trans_handle; +struct btrfs_transaction; +struct btrfs_log_ctx; + +#define show_inode_type(mode) \ + __print_symbolic((mode) & S_IFMT, \ + { S_IFDIR, "DIR" }, \ + { S_IFREG, "REG" }, \ + { S_IFLNK, "LNK" }, \ + { S_IFIFO, "FIFO" }, \ + { S_IFCHR, "CHR" }, \ + { S_IFBLK, "BLK" }, \ + { S_IFSOCK, "SOCK" }) #define show_ref_type(type) \ __print_symbolic(type, \ @@ -101,8 +114,21 @@ struct find_free_extent_ctl; EM( ALLOC_CHUNK_FORCE, "ALLOC_CHUNK_FORCE") \ EM( RUN_DELAYED_IPUTS, "RUN_DELAYED_IPUTS") \ EM( COMMIT_TRANS, "COMMIT_TRANS") \ + EM( RECLAIM_ZONES, "RECLAIM_ZONES") \ EMe(RESET_ZONES, "RESET_ZONES") +#define TRANSACTION_STATES \ + EM( TRANS_STATE_RUNNING, "TRANS_STATE_RUNNING") \ + EM( TRANS_STATE_COMMIT_PREP, "TRANS_STATE_COMMIT_PREP") \ + EM( TRANS_STATE_COMMIT_START, "TRANS_STATE_COMMIT_START") \ + EM( TRANS_STATE_COMMIT_DOING, "TRANS_STATE_COMMIT_DOING") \ + EM( TRANS_STATE_UNBLOCKED, "TRANS_STATE_UNBLOCKED") \ + EM( TRANS_STATE_SUPER_COMMITTED, "TRANS_STATE_SUPER_COMMITTED") \ + EMe(TRANS_STATE_COMPLETED, "TRANS_STATE_COMPLETED") + +#define LOG_MODES \ + EM( LOG_INODE_ALL, "LOG_INODE_ALL") \ + EMe(LOG_INODE_EXISTS, "LOG_INODE_EXISTS") /* * First define the enums in the above macros to be exported to userspace via * TRACE_DEFINE_ENUM(). @@ -118,6 +144,8 @@ FI_TYPES QGROUP_RSV_TYPES IO_TREE_OWNER FLUSH_STATES +TRANSACTION_STATES +LOG_MODES /* * Now redefine the EM and EMe macros to map the enums to the strings that will @@ -180,25 +208,66 @@ FLUSH_STATES #define TP_printk_btrfs(fmt, args...) \ TP_printk("%pU: " fmt, __entry->fsid, args) +TRACE_EVENT(btrfs_transaction_start, + + TP_PROTO(const struct btrfs_transaction *trans), + + TP_ARGS(trans), + + TP_STRUCT__entry_btrfs( + __field( u64, generation ) + ), + + TP_fast_assign_btrfs(trans->fs_info, + __entry->generation = trans->transid; + ), + + TP_printk_btrfs("gen=%llu", __entry->generation) +); + TRACE_EVENT(btrfs_transaction_commit, - TP_PROTO(const struct btrfs_fs_info *fs_info), + TP_PROTO(const struct btrfs_trans_handle *trans), - TP_ARGS(fs_info), + TP_ARGS(trans), TP_STRUCT__entry_btrfs( __field( u64, generation ) - __field( u64, root_objectid ) + __field( bool, in_fsync ) + __field( int, state ) ), - TP_fast_assign_btrfs(fs_info, - __entry->generation = fs_info->generation; - __entry->root_objectid = BTRFS_ROOT_TREE_OBJECTID; + TP_fast_assign_btrfs(trans->fs_info, + __entry->generation = trans->transid; + __entry->in_fsync = trans->in_fsync; + __entry->state = trans->transaction->state; ), - TP_printk_btrfs("root=%llu(%s) gen=%llu", - show_root_type(__entry->root_objectid), - __entry->generation) + TP_printk_btrfs("gen=%llu in_fsync=%d state=%d(%s)", __entry->generation, + __entry->in_fsync, __entry->state, + __print_symbolic(__entry->state, TRANSACTION_STATES)) +); + +TRACE_EVENT(btrfs_transaction_abort, + + TP_PROTO(const struct btrfs_trans_handle *trans), + + TP_ARGS(trans), + + TP_STRUCT__entry_btrfs( + __field( u64, generation ) + __field( bool, in_fsync ) + __field( int, error ) + ), + + TP_fast_assign_btrfs(trans->fs_info, + __entry->generation = trans->transid; + __entry->in_fsync = trans->in_fsync; + __entry->error = trans->aborted; + ), + + TP_printk_btrfs("gen=%llu in_fsync=%d error=%d", __entry->generation, + __entry->in_fsync, __entry->error) ); DECLARE_EVENT_CLASS(btrfs__inode, @@ -670,7 +739,7 @@ TRACE_EVENT(btrfs_finish_ordered_extent, TP_printk_btrfs("root=%llu(%s) ino=%llu start=%llu len=%llu uptodate=%d", show_root_type(__entry->root_objectid), __entry->ino, __entry->start, - __entry->len, !!__entry->uptodate) + __entry->len, __entry->uptodate) ); DECLARE_EVENT_CLASS(btrfs__writepage, @@ -755,17 +824,18 @@ TRACE_EVENT(btrfs_writepage_end_io_hook, __entry->end, __entry->uptodate) ); -TRACE_EVENT(btrfs_sync_file, +TRACE_EVENT(btrfs_sync_file_enter, TP_PROTO(const struct file *file, int datasync), TP_ARGS(file, datasync), TP_STRUCT__entry_btrfs( - __field( u64, ino ) - __field( u64, parent ) - __field( int, datasync ) - __field( u64, root_objectid ) + __field( u64, ino ) + __field( umode_t, mode ) + __field( u64, parent ) + __field( int, datasync ) + __field( u64, root_objectid ) ), TP_fast_assign( @@ -778,13 +848,790 @@ TRACE_EVENT(btrfs_sync_file, __entry->parent = btrfs_ino(BTRFS_I(parent_inode)); __entry->datasync = datasync; __entry->root_objectid = btrfs_root_id(BTRFS_I(inode)->root); + __entry->mode = inode->i_mode; ), - TP_printk_btrfs("root=%llu(%s) ino=%llu parent=%llu datasync=%d", - show_root_type(__entry->root_objectid), - __entry->ino, - __entry->parent, - __entry->datasync) + TP_printk_btrfs("root=%llu(%s) ino=%llu type=%s parent=%llu datasync=%d", + show_root_type(__entry->root_objectid), __entry->ino, + show_inode_type(__entry->mode), __entry->parent, + __entry->datasync) +); + +TRACE_EVENT(btrfs_sync_file_exit, + + TP_PROTO(const struct file *file, int ret), + + TP_ARGS(file, ret), + + TP_STRUCT__entry_btrfs( + __field( u64, ino ) + __field( int, ret ) + __field( u64, root_objectid ) + ), + + TP_fast_assign( + struct btrfs_inode *inode = BTRFS_I(file_inode(file)); + + TP_fast_assign_fsid(inode->root->fs_info); + __entry->root_objectid = btrfs_root_id(inode->root); + __entry->ino = btrfs_ino(inode); + __entry->ret = ret; + ), + + TP_printk_btrfs("root=%llu(%s) ino=%llu ret=%d", + show_root_type(__entry->root_objectid), + __entry->ino, __entry->ret) +); + +TRACE_EVENT(btrfs_log_inode_parent_enter, + + TP_PROTO(const struct btrfs_trans_handle *trans, struct btrfs_inode *inode), + + TP_ARGS(trans, inode), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, ino ) + __field( umode_t, mode ) + __field( u64, transid ) + __field( u64, generation ) + __field( u64, logged_trans ) + __field( u64, last_unlink_trans ) + __field( int, last_sub_trans ) + __field( int, inode_last_log_commit ) + __field( int, root_last_log_commit ) + ), + + TP_fast_assign( + struct btrfs_root *root = inode->root; + + TP_fast_assign_fsid(root->fs_info); + __entry->root_objectid = btrfs_root_id(root); + __entry->ino = btrfs_ino(inode); + __entry->mode = inode->vfs_inode.i_mode; + __entry->transid = trans->transid; + __entry->generation = inode->generation; + spin_lock(&inode->lock); + __entry->logged_trans = inode->logged_trans; + __entry->last_unlink_trans = inode->last_unlink_trans; + __entry->last_sub_trans = inode->last_sub_trans; + __entry->inode_last_log_commit = inode->last_log_commit; + spin_unlock(&inode->lock); + __entry->root_last_log_commit = btrfs_get_root_last_log_commit(root); + ), + + TP_printk_btrfs("root=%llu(%s) ino=%llu type=%s transid=%llu gen=%llu" + " logged_trans=%llu last_unlink_trans=%llu last_sub_trans=%d" + " inode_last_log_commit=%d root_last_log_commit=%d", + show_root_type(__entry->root_objectid), __entry->ino, + show_inode_type(__entry->mode), __entry->transid, + __entry->generation, __entry->logged_trans, + __entry->last_unlink_trans, __entry->last_sub_trans, + __entry->inode_last_log_commit, __entry->root_last_log_commit) +); + +TRACE_EVENT(btrfs_log_inode_parent_exit, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_inode *inode, + int ret), + + TP_ARGS(trans, inode, ret), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, ino ) + __field( u64, transid ) + __field( int, ret ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(inode->root->fs_info); + __entry->root_objectid = btrfs_root_id(inode->root); + __entry->ino = btrfs_ino(inode); + __entry->transid = trans->transid; + __entry->ret = ret; + ), + + TP_printk_btrfs("root=%llu(%s) ino=%llu transid=%llu ret=%d", + show_root_type(__entry->root_objectid), __entry->ino, + __entry->transid, __entry->ret) +); + +TRACE_EVENT(btrfs_log_inode_enter, + + TP_PROTO(const struct btrfs_trans_handle *trans, struct btrfs_inode *inode, + const struct btrfs_log_ctx *ctx, int log_mode), + + TP_ARGS(trans, inode, ctx, log_mode), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, ino ) + __field( umode_t, mode ) + __field( u64, transid ) + __field( u64, generation ) + __field( u64, logged_trans ) + __field( u64, last_unlink_trans ) + __field( u64, last_reflink_trans ) + __field( int, last_sub_trans ) + __field( int, last_log_commit ) + __field( bool, logging_new_name ) + __field( bool, logging_new_delayed_dentries ) + __field( bool, is_conflict_inode ) + __field( bool, full_sync ) + __field( bool, copy_everything ) + __field( bool, no_xattrs ) + __field( int, log_mode ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(inode->root->fs_info); + __entry->root_objectid = btrfs_root_id(inode->root); + __entry->ino = btrfs_ino(inode); + __entry->mode = inode->vfs_inode.i_mode; + __entry->transid = trans->transid; + __entry->generation = inode->generation; + spin_lock(&inode->lock); + __entry->logged_trans = inode->logged_trans; + __entry->last_unlink_trans = inode->last_unlink_trans; + __entry->last_reflink_trans = inode->last_reflink_trans; + __entry->last_sub_trans = inode->last_sub_trans; + __entry->last_log_commit = inode->last_log_commit; + spin_unlock(&inode->lock); + __entry->logging_new_name = ctx->logging_new_name; + __entry->logging_new_delayed_dentries = ctx->logging_new_delayed_dentries; + __entry->is_conflict_inode = ctx->logging_conflict_inodes; + __entry->full_sync = + test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags); + __entry->copy_everything = + test_bit(BTRFS_INODE_COPY_EVERYTHING, &inode->runtime_flags); + __entry->no_xattrs = + test_bit(BTRFS_INODE_NO_XATTRS, &inode->runtime_flags); + __entry->log_mode = log_mode; + ), + + TP_printk_btrfs("root=%llu(%s) ino=%llu type=%s transid=%llu gen=%llu" + " logged_trans=%llu last_unlink_trans=%llu" + " last_reflink_trans=%llu last_sub_trans=%d last_log_commit=%d" + " logging_new_name=%d logging_new_delayed_dentries=%d" + " is_conflict_inode=%d full_sync=%d copy_everything=%d" + " no_xattrs=%d log_mode=%d(%s)", + show_root_type(__entry->root_objectid), __entry->ino, + show_inode_type(__entry->mode), __entry->transid, + __entry->generation, __entry->logged_trans, + __entry->last_unlink_trans, __entry->last_reflink_trans, + __entry->last_sub_trans, __entry->last_log_commit, + __entry->logging_new_name, __entry->logging_new_delayed_dentries, + __entry->is_conflict_inode, __entry->log_mode, + __entry->full_sync, __entry->copy_everything, __entry->no_xattrs, + __print_symbolic(__entry->log_mode, LOG_MODES)) +); + +TRACE_EVENT(btrfs_log_inode_exit, + + TP_PROTO(const struct btrfs_trans_handle *trans, struct btrfs_inode *inode, + int ret), + + TP_ARGS(trans, inode, ret), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, ino ) + __field( u64, transid ) + __field( u64, logged_trans ) + __field( u64, last_reflink_trans ) + __field( int, last_sub_trans ) + __field( int, last_log_commit ) + __field( int, ret ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(inode->root->fs_info); + __entry->root_objectid = btrfs_root_id(inode->root); + __entry->ino = btrfs_ino(inode); + __entry->transid = trans->transid; + spin_lock(&inode->lock); + __entry->logged_trans = inode->logged_trans; + __entry->last_reflink_trans = inode->last_reflink_trans; + __entry->last_sub_trans = inode->last_sub_trans; + __entry->last_log_commit = inode->last_log_commit; + spin_unlock(&inode->lock); + __entry->ret = ret; + ), + + TP_printk_btrfs("root=%llu(%s) ino=%llu transid=%llu logged_trans=%llu" + " last_reflink_trans=%llu last_sub_trans=%d" + " last_log_commit=%d ret=%d", + show_root_type(__entry->root_objectid), __entry->ino, + __entry->transid, __entry->logged_trans, + __entry->last_reflink_trans, __entry->last_sub_trans, + __entry->last_log_commit, __entry->ret) +); + +TRACE_EVENT(btrfs_log_all_parents_enter, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_inode *inode), + + TP_ARGS(trans, inode), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, ino ) + __field( u64, transid ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(inode->root->fs_info); + __entry->root_objectid = btrfs_root_id(inode->root); + __entry->ino = btrfs_ino(inode); + __entry->transid = trans->transid; + ), + + TP_printk_btrfs("root=%llu(%s) ino=%llu transid=%llu", + show_root_type(__entry->root_objectid), __entry->ino, + __entry->transid) +); + +TRACE_EVENT(btrfs_log_all_parents_exit, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_inode *inode, + int ret), + + TP_ARGS(trans, inode, ret), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, ino ) + __field( u64, transid ) + __field( int, ret ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(inode->root->fs_info); + __entry->root_objectid = btrfs_root_id(inode->root); + __entry->ino = btrfs_ino(inode); + __entry->transid = trans->transid; + __entry->ret = ret; + ), + + TP_printk_btrfs("root=%llu(%s) ino=%llu transid=%llu ret=%d", + show_root_type(__entry->root_objectid), __entry->ino, + __entry->transid, __entry->ret) +); + +TRACE_EVENT(btrfs_log_all_new_ancestors_enter, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_inode *inode), + + TP_ARGS(trans, inode), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, ino ) + __field( u64, transid ) + __field( unsigned int, nlink ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(inode->root->fs_info); + __entry->root_objectid = btrfs_root_id(inode->root); + __entry->ino = btrfs_ino(inode); + __entry->transid = trans->transid; + __entry->nlink = inode->vfs_inode.i_nlink; + ), + + TP_printk_btrfs("root=%llu(%s) ino=%llu transid=%llu nlink=%u", + show_root_type(__entry->root_objectid), __entry->ino, + __entry->transid, __entry->nlink) +); + +TRACE_EVENT(btrfs_log_all_new_ancestors_exit, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_inode *inode, + int ret), + + TP_ARGS(trans, inode, ret), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, ino ) + __field( u64, transid ) + __field( int, ret ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(inode->root->fs_info); + __entry->root_objectid = btrfs_root_id(inode->root); + __entry->ino = btrfs_ino(inode); + __entry->transid = trans->transid; + __entry->ret = ret; + ), + + TP_printk_btrfs("root=%llu(%s) ino=%llu transid=%llu ret=%d", + show_root_type(__entry->root_objectid), __entry->ino, + __entry->transid, __entry->ret) +); + +TRACE_EVENT(btrfs_log_new_dir_dentries_enter, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_inode *inode), + + TP_ARGS(trans, inode), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, ino ) + __field( u64, transid ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(inode->root->fs_info); + __entry->root_objectid = btrfs_root_id(inode->root); + __entry->ino = btrfs_ino(inode); + __entry->transid = trans->transid; + ), + + TP_printk_btrfs("root=%llu(%s) ino=%llu transid=%llu", + show_root_type(__entry->root_objectid), __entry->ino, + __entry->transid) +); + +TRACE_EVENT(btrfs_log_new_dir_dentries_exit, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_inode *inode, + int ret), + + TP_ARGS(trans, inode, ret), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, ino ) + __field( u64, transid ) + __field( int, ret ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(inode->root->fs_info); + __entry->root_objectid = btrfs_root_id(inode->root); + __entry->ino = btrfs_ino(inode); + __entry->transid = trans->transid; + __entry->ret = ret; + ), + + TP_printk_btrfs("root=%llu(%s) ino=%llu transid=%llu ret=%d", + show_root_type(__entry->root_objectid), __entry->ino, + __entry->transid, __entry->ret) +); + +TRACE_EVENT(btrfs_add_conflicting_inode_enter, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_log_ctx *ctx, + u64 ino, u64 parent), + + TP_ARGS(trans, ctx, ino, parent), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, transid ) + __field( u64, ctx_ino ) + __field( u64, conflict_ino ) + __field( u64, conflict_ino_parent ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(trans->fs_info); + __entry->root_objectid = btrfs_root_id(ctx->inode->root); + __entry->transid = trans->transid; + __entry->ctx_ino = btrfs_ino(ctx->inode); + __entry->conflict_ino = ino; + __entry->conflict_ino_parent = parent; + ), + + TP_printk_btrfs("root=%llu(%s) transid=%llu ctx_ino=%llu conflict_ino=%llu" + " conflict_ino_parent=%llu", + show_root_type(__entry->root_objectid), __entry->transid, + __entry->ctx_ino, __entry->conflict_ino, + __entry->conflict_ino_parent) +); + +TRACE_EVENT(btrfs_add_conflicting_inode_exit, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_log_ctx *ctx, + u64 ino, u64 parent, int ret), + + TP_ARGS(trans, ctx, ino, parent, ret), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, transid ) + __field( u64, ctx_ino ) + __field( u64, conflict_ino ) + __field( u64, conflict_ino_parent ) + __field( int, ret ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(trans->fs_info); + __entry->root_objectid = btrfs_root_id(ctx->inode->root); + __entry->transid = trans->transid; + __entry->ctx_ino = btrfs_ino(ctx->inode); + __entry->conflict_ino = ino; + __entry->conflict_ino_parent = parent; + __entry->ret = ret; + ), + + TP_printk_btrfs("root=%llu(%s) transid=%llu ctx_ino=%llu conflict_ino=%llu" + " conflict_ino_parent=%llu ret=%d", + show_root_type(__entry->root_objectid), __entry->transid, + __entry->ctx_ino, __entry->conflict_ino, + __entry->conflict_ino_parent, __entry->ret) +); + +TRACE_EVENT(btrfs_log_conflicting_inodes_enter, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_log_ctx *ctx), + + TP_ARGS(trans, ctx), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, transid ) + __field( u64, ctx_ino ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(trans->fs_info); + __entry->root_objectid = btrfs_root_id(ctx->inode->root); + __entry->transid = trans->transid; + __entry->ctx_ino = btrfs_ino(ctx->inode); + ), + + TP_printk_btrfs("root=%llu(%s) transid=%llu ctx_ino=%llu", + show_root_type(__entry->root_objectid), __entry->transid, + __entry->ctx_ino) +); + +TRACE_EVENT(btrfs_log_conflicting_inodes_exit, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_log_ctx *ctx, + int ret), + + TP_ARGS(trans, ctx, ret), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, transid ) + __field( u64, ctx_ino ) + __field( int, ret ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(trans->fs_info); + __entry->root_objectid = btrfs_root_id(ctx->inode->root); + __entry->transid = trans->transid; + __entry->ctx_ino = btrfs_ino(ctx->inode); + __entry->ret = ret; + ), + + TP_printk_btrfs("root=%llu(%s) transid=%llu ctx_ino=%llu ret=%d", + show_root_type(__entry->root_objectid), __entry->transid, + __entry->ctx_ino, __entry->ret) +); + +TRACE_EVENT(btrfs_log_new_delayed_dentries_enter, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_inode *inode), + + TP_ARGS(trans, inode), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, transid ) + __field( u64, ino ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(trans->fs_info); + __entry->root_objectid = btrfs_root_id(inode->root); + __entry->transid = trans->transid; + __entry->ino = btrfs_ino(inode); + ), + + TP_printk_btrfs("root=%llu(%s) transid=%llu ino=%llu", + show_root_type(__entry->root_objectid), __entry->transid, + __entry->ino) +); + +TRACE_EVENT(btrfs_log_new_delayed_dentries_exit, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_inode *inode, + int ret), + + TP_ARGS(trans, inode, ret), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, transid ) + __field( u64, ino ) + __field( int, ret ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(trans->fs_info); + __entry->root_objectid = btrfs_root_id(inode->root); + __entry->transid = trans->transid; + __entry->ino = btrfs_ino(inode); + __entry->ret = ret; + ), + + TP_printk_btrfs("root=%llu(%s) transid=%llu ino=%llu ret=%d", + show_root_type(__entry->root_objectid), __entry->transid, + __entry->ino, __entry->ret) +); + +TRACE_EVENT(btrfs_record_unlink_dir, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_inode *dir, + const struct btrfs_inode *inode, + bool for_rename), + + TP_ARGS(trans, dir, inode, for_rename), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, transid ) + __field( u64, ino ) + __field( u64, dir ) + __field( bool, for_rename ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(trans->fs_info); + __entry->root_objectid = btrfs_root_id(inode->root); + __entry->transid = trans->transid; + __entry->ino = btrfs_ino(inode); + __entry->dir = btrfs_ino(dir); + __entry->for_rename = for_rename; + ), + + TP_printk_btrfs("root=%llu(%s) transid=%llu ino=%llu dir=%llu for_rename=%d", + show_root_type(__entry->root_objectid), __entry->transid, + __entry->ino, __entry->dir, __entry->for_rename) +); + +TRACE_EVENT(btrfs_record_snapshot_destroy, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_inode *dir), + + TP_ARGS(trans, dir), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, transid ) + __field( u64, dir ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(trans->fs_info); + __entry->root_objectid = btrfs_root_id(dir->root); + __entry->transid = trans->transid; + __entry->dir = btrfs_ino(dir); + ), + + TP_printk_btrfs("root=%llu(%s) transid=%llu dir=%llu", + show_root_type(__entry->root_objectid), __entry->transid, + __entry->dir) +); + +TRACE_EVENT(btrfs_record_new_subvolume, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_inode *dir), + + TP_ARGS(trans, dir), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, transid ) + __field( u64, dir ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(trans->fs_info); + __entry->root_objectid = btrfs_root_id(dir->root); + __entry->transid = trans->transid; + __entry->dir = btrfs_ino(dir); + ), + + TP_printk_btrfs("root=%llu(%s) transid=%llu dir=%llu", + show_root_type(__entry->root_objectid), __entry->transid, + __entry->dir) +); + +TRACE_EVENT(btrfs_log_new_name_enter, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_inode *inode, + const struct btrfs_inode *old_dir, + u64 old_dir_index), + + TP_ARGS(trans, inode, old_dir, old_dir_index), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, transid ) + __field( u64, ino ) + __field( umode_t, mode ) + __field( u64, old_dir_ino ) + __field( u64, old_dir_index ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(trans->fs_info); + __entry->root_objectid = btrfs_root_id(inode->root); + __entry->transid = trans->transid; + __entry->ino = btrfs_ino(inode); + __entry->mode = inode->vfs_inode.i_mode; + __entry->old_dir_ino = old_dir ? btrfs_ino(old_dir) : 0; + __entry->old_dir_index = old_dir_index; + ), + + TP_printk_btrfs("root=%llu(%s) transid=%llu ino=%llu type=%s" + " old_dir=%llu old_dir_index=%llu", + show_root_type(__entry->root_objectid), __entry->transid, + __entry->ino, show_inode_type(__entry->mode), + __entry->old_dir_ino, __entry->old_dir_index) +); + +TRACE_EVENT(btrfs_log_new_name_exit, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_inode *inode, + const struct btrfs_inode *old_dir, + int ret), + + TP_ARGS(trans, inode, old_dir, ret), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, transid ) + __field( u64, ino ) + __field( u64, old_dir_ino ) + __field( int, ret ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(trans->fs_info); + __entry->root_objectid = btrfs_root_id(inode->root); + __entry->transid = trans->transid; + __entry->ino = btrfs_ino(inode); + __entry->old_dir_ino = old_dir ? btrfs_ino(old_dir) : 0; + __entry->ret = ret; + ), + + TP_printk_btrfs("root=%llu(%s) transid=%llu ino=%llu old_dir=%llu ret=%d", + show_root_type(__entry->root_objectid), __entry->transid, + __entry->ino, __entry->old_dir_ino, __entry->ret) +); + +/* Ideally call this while under root->log_mutex (but not always possible). */ +TRACE_EVENT(btrfs_sync_log_enter, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_root *root, + const struct btrfs_log_ctx *ctx), + + TP_ARGS(trans, root, ctx), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, transid ) + __field( int, ctx_log_transid ) + __field( int, root_log_transid ) + __field( int, log_transid_committed ) + __field( bool, log_committing ) + __field( bool, log_committing_prev ) + __field( int, log_writers ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(trans->fs_info); + __entry->root_objectid = btrfs_root_id(root); + __entry->transid = trans->transid; + __entry->ctx_log_transid = ctx->log_transid; + __entry->root_log_transid = btrfs_get_root_log_transid(root); + __entry->log_transid_committed = + data_race(root->log_transid_committed); + __entry->log_committing = + atomic_read(&root->log_commit[ctx->log_transid % 2]); + __entry->log_committing_prev = + atomic_read(&root->log_commit[(ctx->log_transid + 1) % 2]); + __entry->log_writers = atomic_read(&root->log_writers); + ), + + TP_printk_btrfs("root=%llu(%s) transid=%llu ctx_log_transid=%d" + " root_log_transid=%d log_transid_committed=%d" + " log_committing=%d log_committing_prev=%d log_writers=%d", + show_root_type(__entry->root_objectid), __entry->transid, + __entry->ctx_log_transid, __entry->root_log_transid, + __entry->log_transid_committed, __entry->log_committing, + __entry->log_committing_prev, __entry->log_writers) +); + +/* + * Ideally call this while under root->log_mutex and in the same critical + * section that calls the btrfs_sync_log_enter() trace event (though it's not + * always possible). + */ +TRACE_EVENT(btrfs_sync_log_exit, + + TP_PROTO(const struct btrfs_trans_handle *trans, + const struct btrfs_root *root, + const struct btrfs_log_ctx *ctx, + int ret), + + TP_ARGS(trans, root, ctx, ret), + + TP_STRUCT__entry_btrfs( + __field( u64, root_objectid ) + __field( u64, transid ) + __field( int, ctx_log_transid ) + __field( int, root_log_transid ) + __field( int, log_transid_committed ) + __field( int, ret ) + ), + + TP_fast_assign( + TP_fast_assign_fsid(trans->fs_info); + __entry->root_objectid = btrfs_root_id(root); + __entry->transid = trans->transid; + __entry->ctx_log_transid = ctx->log_transid; + __entry->root_log_transid = btrfs_get_root_log_transid(root); + __entry->log_transid_committed = + data_race(root->log_transid_committed); + __entry->ret = ret; + ), + + TP_printk_btrfs("root=%llu(%s) transid=%llu ctx_log_transid=%d" + " root_log_transid=%d log_transid_committed=%d ret=%d", + show_root_type(__entry->root_objectid), __entry->transid, + __entry->ctx_log_transid, __entry->root_log_transid, + __entry->log_transid_committed, __entry->ret) ); TRACE_EVENT(btrfs_sync_fs, diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index b5188d2671d73..270c1a2c24c44 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -2595,6 +2595,34 @@ DEFINE_EVENT(f2fs_priority_update, f2fs_priority_restore, TP_ARGS(sbi, lock_name, is_write, p, orig_prio, new_prio) ); +TRACE_EVENT(f2fs_fault_report, + + TP_PROTO(struct super_block *sb, unsigned int err_code, + const char *func, unsigned int data), + + TP_ARGS(sb, err_code, func, data), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(unsigned int, err_code) + __string(func, func) + __field(unsigned int, data) + ), + + TP_fast_assign( + __entry->dev = sb->s_dev; + __entry->err_code = err_code; + __assign_str(func); + __entry->data = data; + ), + + TP_printk("dev = (%d,%d), err_code = %u, func = %s, data = %u", + show_dev(__entry->dev), + __entry->err_code, + __get_str(func), + __entry->data) +); + #endif /* _TRACE_F2FS_H */ /* This part must be outside protection */ diff --git a/include/trace/events/filelock.h b/include/trace/events/filelock.h index 1167748862449..f11284be01eab 100644 --- a/include/trace/events/filelock.h +++ b/include/trace/events/filelock.h @@ -28,7 +28,10 @@ { FL_DOWNGRADE_PENDING, "FL_DOWNGRADE_PENDING" }, \ { FL_UNLOCK_PENDING, "FL_UNLOCK_PENDING" }, \ { FL_OFDLCK, "FL_OFDLCK" }, \ - { FL_RECLAIM, "FL_RECLAIM"}) + { FL_RECLAIM, "FL_RECLAIM" }, \ + { FL_IGN_DIR_CREATE, "FL_IGN_DIR_CREATE" }, \ + { FL_IGN_DIR_DELETE, "FL_IGN_DIR_DELETE" }, \ + { FL_IGN_DIR_RENAME, "FL_IGN_DIR_RENAME" }) #define show_fl_type(val) \ __print_symbolic(val, \ @@ -117,6 +120,39 @@ DEFINE_EVENT(filelock_lock, flock_lock_inode, TP_PROTO(struct inode *inode, struct file_lock *fl, int ret), TP_ARGS(inode, fl, ret)); +#define show_lease_break_flags(val) \ + __print_flags(val, "|", \ + { LEASE_BREAK_LEASE, "LEASE" }, \ + { LEASE_BREAK_DELEG, "DELEG" }, \ + { LEASE_BREAK_LAYOUT, "LAYOUT" }, \ + { LEASE_BREAK_NONBLOCK, "NONBLOCK" }, \ + { LEASE_BREAK_OPEN_RDONLY, "OPEN_RDONLY" }, \ + { LEASE_BREAK_DIR_CREATE, "DIR_CREATE" }, \ + { LEASE_BREAK_DIR_DELETE, "DIR_DELETE" }, \ + { LEASE_BREAK_DIR_RENAME, "DIR_RENAME" }) + +TRACE_EVENT(break_lease, + TP_PROTO(struct inode *inode, unsigned int flags), + + TP_ARGS(inode, flags), + + TP_STRUCT__entry( + __field(unsigned long, i_ino) + __field(dev_t, s_dev) + __field(unsigned int, flags) + ), + + TP_fast_assign( + __entry->s_dev = inode->i_sb->s_dev; + __entry->i_ino = inode->i_ino; + __entry->flags = flags; + ), + + TP_printk("dev=0x%x:0x%x ino=0x%lx flags=%s", + MAJOR(__entry->s_dev), MINOR(__entry->s_dev), + __entry->i_ino, show_lease_break_flags(__entry->flags)) +); + DECLARE_EVENT_CLASS(filelock_lease, TP_PROTO(struct inode *inode, struct file_lease *fl), @@ -190,7 +226,7 @@ TRACE_EVENT(generic_add_lease, __entry->i_ino = inode->i_ino; __entry->wcount = atomic_read(&inode->i_writecount); __entry->rcount = atomic_read(&inode->i_readcount); - __entry->icount = icount_read(inode); + __entry->icount = icount_read_once(inode); __entry->owner = fl->c.flc_owner; __entry->flags = fl->c.flc_flags; __entry->type = fl->c.flc_type; diff --git a/include/trace/events/fsnotify.h b/include/trace/events/fsnotify.h new file mode 100644 index 0000000000000..341bbd57a39bc --- /dev/null +++ b/include/trace/events/fsnotify.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM fsnotify + +#if !defined(_TRACE_FSNOTIFY_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_FSNOTIFY_H + +#include <linux/tracepoint.h> + +#include <trace/misc/fsnotify.h> + +TRACE_EVENT(fsnotify, + TP_PROTO(__u32 mask, const void *data, int data_type, + struct inode *dir, const struct qstr *file_name, + struct inode *inode, u32 cookie), + + TP_ARGS(mask, data, data_type, dir, file_name, inode, cookie), + + TP_STRUCT__entry( + __field(__u32, mask) + __field(unsigned long, dir_ino) + __field(unsigned long, ino) + __field(dev_t, s_dev) + __field(int, data_type) + __field(u32, cookie) + __string(file_name, file_name ? (const char *)file_name->name : "") + ), + + TP_fast_assign( + __entry->mask = mask; + __entry->dir_ino = dir ? dir->i_ino : 0; + __entry->ino = inode ? inode->i_ino : 0; + __entry->s_dev = dir ? dir->i_sb->s_dev : + inode ? inode->i_sb->s_dev : 0; + __entry->data_type = data_type; + __entry->cookie = cookie; + __assign_str(file_name); + ), + + TP_printk("dev=%d:%d dir=%lu ino=%lu data_type=%d cookie=0x%x mask=0x%x %s name=%s", + MAJOR(__entry->s_dev), MINOR(__entry->s_dev), + __entry->dir_ino, __entry->ino, + __entry->data_type, __entry->cookie, + __entry->mask, show_fsnotify_mask(__entry->mask), + __get_str(file_name)) +); + +#endif /* _TRACE_FSNOTIFY_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/misc/fsnotify.h b/include/trace/misc/fsnotify.h new file mode 100644 index 0000000000000..a201e1bd6d8ca --- /dev/null +++ b/include/trace/misc/fsnotify.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Display helpers for fsnotify events + */ + +#include <linux/fsnotify_backend.h> + +#define show_fsnotify_mask(mask) \ + __print_flags(mask, "|", \ + { FS_ACCESS, "ACCESS" }, \ + { FS_MODIFY, "MODIFY" }, \ + { FS_ATTRIB, "ATTRIB" }, \ + { FS_CLOSE_WRITE, "CLOSE_WRITE" }, \ + { FS_CLOSE_NOWRITE, "CLOSE_NOWRITE" }, \ + { FS_OPEN, "OPEN" }, \ + { FS_MOVED_FROM, "MOVED_FROM" }, \ + { FS_MOVED_TO, "MOVED_TO" }, \ + { FS_CREATE, "CREATE" }, \ + { FS_DELETE, "DELETE" }, \ + { FS_DELETE_SELF, "DELETE_SELF" }, \ + { FS_MOVE_SELF, "MOVE_SELF" }, \ + { FS_OPEN_EXEC, "OPEN_EXEC" }, \ + { FS_UNMOUNT, "UNMOUNT" }, \ + { FS_Q_OVERFLOW, "Q_OVERFLOW" }, \ + { FS_ERROR, "ERROR" }, \ + { FS_OPEN_PERM, "OPEN_PERM" }, \ + { FS_ACCESS_PERM, "ACCESS_PERM" }, \ + { FS_OPEN_EXEC_PERM, "OPEN_EXEC_PERM" }, \ + { FS_PRE_ACCESS, "PRE_ACCESS" }, \ + { FS_MNT_ATTACH, "MNT_ATTACH" }, \ + { FS_MNT_DETACH, "MNT_DETACH" }, \ + { FS_EVENT_ON_CHILD, "EVENT_ON_CHILD" }, \ + { FS_RENAME, "RENAME" }, \ + { FS_DN_MULTISHOT, "DN_MULTISHOT" }, \ + { FS_ISDIR, "ISDIR" }) diff --git a/include/uapi/asm-generic/errno.h b/include/uapi/asm-generic/errno.h index 92e7ae493ee31..bd78e69e0a43f 100644 --- a/include/uapi/asm-generic/errno.h +++ b/include/uapi/asm-generic/errno.h @@ -122,4 +122,6 @@ #define EHWPOISON 133 /* Memory page has hardware error */ +#define EFTYPE 134 /* Wrong file type for the intended operation */ + #endif diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h index 613475285643b..bfc68156b45a3 100644 --- a/include/uapi/asm-generic/fcntl.h +++ b/include/uapi/asm-generic/fcntl.h @@ -88,6 +88,10 @@ #define __O_TMPFILE 020000000 #endif +#ifndef O_EMPTYPATH +#define O_EMPTYPATH (1 << 26) /* allow empty path */ +#endif + /* a horrid kludge trying to make sure that this will fail on old kernels */ #define O_TMPFILE (__O_TMPFILE | O_DIRECTORY) diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h index 9165154a274d9..9b576603b3f14 100644 --- a/include/uapi/linux/btrfs.h +++ b/include/uapi/linux/btrfs.h @@ -1100,6 +1100,38 @@ enum btrfs_err_code { BTRFS_ERROR_DEV_RAID1C4_MIN_NOT_MET, }; +/* Flags for struct btrfs_ioctl_get_csums_entry::type. */ +#define BTRFS_GET_CSUMS_HAS_CSUMS (1U << 0) +#define BTRFS_GET_CSUMS_ZEROED (1U << 1) +#define BTRFS_GET_CSUMS_NODATASUM (1U << 2) +#define BTRFS_GET_CSUMS_COMPRESSED (1U << 3) +#define BTRFS_GET_CSUMS_ENCRYPTED (1U << 4) +#define BTRFS_GET_CSUMS_INLINE (1U << 5) + +struct btrfs_ioctl_get_csums_entry { + /* File offset of this range. */ + __u64 offset; + /* Length in bytes. */ + __u64 length; + /* One of BTRFS_GET_CSUMS_* types. */ + __u32 type; + /* Padding, must be 0. */ + __u32 reserved; +}; + +struct btrfs_ioctl_get_csums_args { + /* In/out: file offset in bytes. */ + __u64 offset; + /* In/out: range length in bytes. */ + __u64 length; + /* In/out: buffer capacity / bytes written. */ + __u64 buf_size; + /* In: flags, must be 0 for now. */ + __u64 flags; + /* Out: entries of type btrfs_ioctl_get_csums_entry + csum data */ + __u8 buf[]; +}; + /* Flags for IOC_SHUTDOWN, must match XFS_FSOP_GOING_FLAGS_* flags. */ #define BTRFS_SHUTDOWN_FLAGS_DEFAULT 0x0 #define BTRFS_SHUTDOWN_FLAGS_LOGFLUSH 0x1 @@ -1226,6 +1258,8 @@ enum btrfs_err_code { struct btrfs_ioctl_encoded_io_args) #define BTRFS_IOC_SUBVOL_SYNC_WAIT _IOW(BTRFS_IOCTL_MAGIC, 65, \ struct btrfs_ioctl_subvol_wait) +#define BTRFS_IOC_GET_CSUMS _IOWR(BTRFS_IOCTL_MAGIC, 66, \ + struct btrfs_ioctl_get_csums_args) /* Shutdown ioctl should follow XFS's interfaces, thus not using btrfs magic. */ #define BTRFS_IOC_SHUTDOWN _IOR('X', 125, __u32) diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 13f71202845e8..bd87262f2e349 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -254,6 +254,13 @@ struct file_attr { #define FS_XFLAG_DAX 0x00008000 /* use DAX for IO */ #define FS_XFLAG_COWEXTSIZE 0x00010000 /* CoW extent size allocator hint */ #define FS_XFLAG_VERITY 0x00020000 /* fs-verity enabled */ +/* + * Case handling flags (read-only, cannot be set via ioctl). + * Default (neither set) indicates POSIX semantics: case-sensitive + * lookups and case-preserving storage. + */ +#define FS_XFLAG_CASEFOLD 0x00040000 /* case-insensitive lookups */ +#define FS_XFLAG_CASENONPRESERVING 0x00080000 /* case not preserved */ #define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */ /* the read-only stuff doesn't really belong here, but any other place is @@ -388,7 +395,16 @@ struct file_attr { #define FS_DAX_FL 0x02000000 /* Inode is DAX */ #define FS_INLINE_DATA_FL 0x10000000 /* Reserved for ext4 */ #define FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */ -#define FS_CASEFOLD_FL 0x40000000 /* Folder is case insensitive */ +/* + * FS_CASEFOLD_FL indicates case-insensitive name lookup. The + * bit is most often reported on directories, where it controls + * lookups of entries within. Filesystems that derive + * case-insensitivity from mount or volume state may also report + * it on non-directory inodes; userspace must not assume the bit + * is directory-only. FS_XFLAG_CASEFOLD reports the same + * information read-only via FS_IOC_FSGETXATTR. + */ +#define FS_CASEFOLD_FL 0x40000000 #define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ #define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ diff --git a/include/uapi/linux/nfsd_netlink.h b/include/uapi/linux/nfsd_netlink.h index 97c7447f4d14d..f5b75d5caba9f 100644 --- a/include/uapi/linux/nfsd_netlink.h +++ b/include/uapi/linux/nfsd_netlink.h @@ -10,6 +10,53 @@ #define NFSD_FAMILY_NAME "nfsd" #define NFSD_FAMILY_VERSION 1 +enum nfsd_cache_type { + NFSD_CACHE_TYPE_SVC_EXPORT = 1, + NFSD_CACHE_TYPE_EXPKEY = 2, +}; + +/* + * These flags are ordered to match the NFSEXP_* flags in + * include/linux/nfsd/export.h + */ +enum nfsd_export_flags { + NFSD_EXPORT_FLAGS_READONLY = 1, + NFSD_EXPORT_FLAGS_INSECURE_PORT = 2, + NFSD_EXPORT_FLAGS_ROOTSQUASH = 4, + NFSD_EXPORT_FLAGS_ALLSQUASH = 8, + NFSD_EXPORT_FLAGS_ASYNC = 16, + NFSD_EXPORT_FLAGS_GATHERED_WRITES = 32, + NFSD_EXPORT_FLAGS_NOREADDIRPLUS = 64, + NFSD_EXPORT_FLAGS_SECURITY_LABEL = 128, + NFSD_EXPORT_FLAGS_SIGN_FH = 256, + NFSD_EXPORT_FLAGS_NOHIDE = 512, + NFSD_EXPORT_FLAGS_NOSUBTREECHECK = 1024, + NFSD_EXPORT_FLAGS_NOAUTHNLM = 2048, + NFSD_EXPORT_FLAGS_MSNFS = 4096, + NFSD_EXPORT_FLAGS_FSID = 8192, + NFSD_EXPORT_FLAGS_CROSSMOUNT = 16384, + NFSD_EXPORT_FLAGS_NOACL = 32768, + NFSD_EXPORT_FLAGS_V4ROOT = 65536, + NFSD_EXPORT_FLAGS_PNFS = 131072, +}; + +/* + * These flags are ordered to match the NFSEXP_XPRTSEC_* flags in + * include/linux/nfsd/export.h + */ +enum nfsd_xprtsec_mode { + NFSD_XPRTSEC_MODE_NONE = 1, + NFSD_XPRTSEC_MODE_TLS = 2, + NFSD_XPRTSEC_MODE_MTLS = 4, +}; + +enum { + NFSD_A_CACHE_NOTIFY_CACHE_TYPE = 1, + + __NFSD_A_CACHE_NOTIFY_MAX, + NFSD_A_CACHE_NOTIFY_MAX = (__NFSD_A_CACHE_NOTIFY_MAX - 1) +}; + enum { NFSD_A_RPC_STATUS_XID = 1, NFSD_A_RPC_STATUS_FLAGS, @@ -82,6 +129,103 @@ enum { }; enum { + NFSD_A_FSLOCATION_HOST = 1, + NFSD_A_FSLOCATION_PATH, + + __NFSD_A_FSLOCATION_MAX, + NFSD_A_FSLOCATION_MAX = (__NFSD_A_FSLOCATION_MAX - 1) +}; + +enum { + NFSD_A_FSLOCATIONS_LOCATION = 1, + + __NFSD_A_FSLOCATIONS_MAX, + NFSD_A_FSLOCATIONS_MAX = (__NFSD_A_FSLOCATIONS_MAX - 1) +}; + +enum { + NFSD_A_AUTH_FLAVOR_PSEUDOFLAVOR = 1, + NFSD_A_AUTH_FLAVOR_FLAGS, + + __NFSD_A_AUTH_FLAVOR_MAX, + NFSD_A_AUTH_FLAVOR_MAX = (__NFSD_A_AUTH_FLAVOR_MAX - 1) +}; + +enum { + NFSD_A_SVC_EXPORT_SEQNO = 1, + NFSD_A_SVC_EXPORT_CLIENT, + NFSD_A_SVC_EXPORT_PATH, + NFSD_A_SVC_EXPORT_NEGATIVE, + NFSD_A_SVC_EXPORT_EXPIRY, + NFSD_A_SVC_EXPORT_ANON_UID, + NFSD_A_SVC_EXPORT_ANON_GID, + NFSD_A_SVC_EXPORT_FSLOCATIONS, + NFSD_A_SVC_EXPORT_UUID, + NFSD_A_SVC_EXPORT_SECINFO, + NFSD_A_SVC_EXPORT_XPRTSEC, + NFSD_A_SVC_EXPORT_FLAGS, + NFSD_A_SVC_EXPORT_FSID, + + __NFSD_A_SVC_EXPORT_MAX, + NFSD_A_SVC_EXPORT_MAX = (__NFSD_A_SVC_EXPORT_MAX - 1) +}; + +enum { + NFSD_A_SVC_EXPORT_REQS_REQUESTS = 1, + + __NFSD_A_SVC_EXPORT_REQS_MAX, + NFSD_A_SVC_EXPORT_REQS_MAX = (__NFSD_A_SVC_EXPORT_REQS_MAX - 1) +}; + +enum { + NFSD_A_EXPKEY_SEQNO = 1, + NFSD_A_EXPKEY_CLIENT, + NFSD_A_EXPKEY_FSIDTYPE, + NFSD_A_EXPKEY_FSID, + NFSD_A_EXPKEY_NEGATIVE, + NFSD_A_EXPKEY_EXPIRY, + NFSD_A_EXPKEY_PATH, + + __NFSD_A_EXPKEY_MAX, + NFSD_A_EXPKEY_MAX = (__NFSD_A_EXPKEY_MAX - 1) +}; + +enum { + NFSD_A_EXPKEY_REQS_REQUESTS = 1, + + __NFSD_A_EXPKEY_REQS_MAX, + NFSD_A_EXPKEY_REQS_MAX = (__NFSD_A_EXPKEY_REQS_MAX - 1) +}; + +enum { + NFSD_A_CACHE_FLUSH_MASK = 1, + + __NFSD_A_CACHE_FLUSH_MAX, + NFSD_A_CACHE_FLUSH_MAX = (__NFSD_A_CACHE_FLUSH_MAX - 1) +}; + +enum { + NFSD_A_UNLOCK_IP_ADDRESS = 1, + + __NFSD_A_UNLOCK_IP_MAX, + NFSD_A_UNLOCK_IP_MAX = (__NFSD_A_UNLOCK_IP_MAX - 1) +}; + +enum { + NFSD_A_UNLOCK_FILESYSTEM_PATH = 1, + + __NFSD_A_UNLOCK_FILESYSTEM_MAX, + NFSD_A_UNLOCK_FILESYSTEM_MAX = (__NFSD_A_UNLOCK_FILESYSTEM_MAX - 1) +}; + +enum { + NFSD_A_UNLOCK_EXPORT_PATH = 1, + + __NFSD_A_UNLOCK_EXPORT_MAX, + NFSD_A_UNLOCK_EXPORT_MAX = (__NFSD_A_UNLOCK_EXPORT_MAX - 1) +}; + +enum { NFSD_CMD_RPC_STATUS_GET = 1, NFSD_CMD_THREADS_SET, NFSD_CMD_THREADS_GET, @@ -91,9 +235,21 @@ enum { NFSD_CMD_LISTENER_GET, NFSD_CMD_POOL_MODE_SET, NFSD_CMD_POOL_MODE_GET, + NFSD_CMD_CACHE_NOTIFY, + NFSD_CMD_SVC_EXPORT_GET_REQS, + NFSD_CMD_SVC_EXPORT_SET_REQS, + NFSD_CMD_EXPKEY_GET_REQS, + NFSD_CMD_EXPKEY_SET_REQS, + NFSD_CMD_CACHE_FLUSH, + NFSD_CMD_UNLOCK_IP, + NFSD_CMD_UNLOCK_FILESYSTEM, + NFSD_CMD_UNLOCK_EXPORT, __NFSD_CMD_MAX, NFSD_CMD_MAX = (__NFSD_CMD_MAX - 1) }; +#define NFSD_MCGRP_NONE "none" +#define NFSD_MCGRP_EXPORTD "exportd" + #endif /* _UAPI_LINUX_NFSD_NETLINK_H */ diff --git a/include/uapi/linux/openat2.h b/include/uapi/linux/openat2.h index a5feb76049487..575c2c59d14a9 100644 --- a/include/uapi/linux/openat2.h +++ b/include/uapi/linux/openat2.h @@ -22,6 +22,13 @@ struct open_how { __u64 resolve; }; +/* + * how->flags bits exclusive to openat2(2). These live in the upper 32 bits + * of @flags so that they cannot be expressed by open(2) / openat(2), whose + * @flags argument is a C int. + */ +#define OPENAT2_REGULAR ((__u64)1 << 32) /* Only open regular files. */ + /* how->resolve flags for openat2(2). */ #define RESOLVE_NO_XDEV 0x01 /* Block mount-point crossings (includes bind-mounts). */ diff --git a/include/uapi/linux/sunrpc_netlink.h b/include/uapi/linux/sunrpc_netlink.h new file mode 100644 index 0000000000000..34677f0ec2f95 --- /dev/null +++ b/include/uapi/linux/sunrpc_netlink.h @@ -0,0 +1,84 @@ +/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ +/* Do not edit directly, auto-generated from: */ +/* Documentation/netlink/specs/sunrpc_cache.yaml */ +/* YNL-GEN uapi header */ +/* To regenerate run: tools/net/ynl/ynl-regen.sh */ + +#ifndef _UAPI_LINUX_SUNRPC_NETLINK_H +#define _UAPI_LINUX_SUNRPC_NETLINK_H + +#define SUNRPC_FAMILY_NAME "sunrpc" +#define SUNRPC_FAMILY_VERSION 1 + +enum sunrpc_cache_type { + SUNRPC_CACHE_TYPE_IP_MAP = 1, + SUNRPC_CACHE_TYPE_UNIX_GID = 2, +}; + +enum { + SUNRPC_A_CACHE_NOTIFY_CACHE_TYPE = 1, + + __SUNRPC_A_CACHE_NOTIFY_MAX, + SUNRPC_A_CACHE_NOTIFY_MAX = (__SUNRPC_A_CACHE_NOTIFY_MAX - 1) +}; + +enum { + SUNRPC_A_IP_MAP_SEQNO = 1, + SUNRPC_A_IP_MAP_CLASS, + SUNRPC_A_IP_MAP_ADDR, + SUNRPC_A_IP_MAP_DOMAIN, + SUNRPC_A_IP_MAP_NEGATIVE, + SUNRPC_A_IP_MAP_EXPIRY, + + __SUNRPC_A_IP_MAP_MAX, + SUNRPC_A_IP_MAP_MAX = (__SUNRPC_A_IP_MAP_MAX - 1) +}; + +enum { + SUNRPC_A_IP_MAP_REQS_REQUESTS = 1, + + __SUNRPC_A_IP_MAP_REQS_MAX, + SUNRPC_A_IP_MAP_REQS_MAX = (__SUNRPC_A_IP_MAP_REQS_MAX - 1) +}; + +enum { + SUNRPC_A_UNIX_GID_SEQNO = 1, + SUNRPC_A_UNIX_GID_UID, + SUNRPC_A_UNIX_GID_GIDS, + SUNRPC_A_UNIX_GID_NEGATIVE, + SUNRPC_A_UNIX_GID_EXPIRY, + + __SUNRPC_A_UNIX_GID_MAX, + SUNRPC_A_UNIX_GID_MAX = (__SUNRPC_A_UNIX_GID_MAX - 1) +}; + +enum { + SUNRPC_A_UNIX_GID_REQS_REQUESTS = 1, + + __SUNRPC_A_UNIX_GID_REQS_MAX, + SUNRPC_A_UNIX_GID_REQS_MAX = (__SUNRPC_A_UNIX_GID_REQS_MAX - 1) +}; + +enum { + SUNRPC_A_CACHE_FLUSH_MASK = 1, + + __SUNRPC_A_CACHE_FLUSH_MAX, + SUNRPC_A_CACHE_FLUSH_MAX = (__SUNRPC_A_CACHE_FLUSH_MAX - 1) +}; + +enum { + SUNRPC_CMD_CACHE_NOTIFY = 1, + SUNRPC_CMD_IP_MAP_GET_REQS, + SUNRPC_CMD_IP_MAP_SET_REQS, + SUNRPC_CMD_UNIX_GID_GET_REQS, + SUNRPC_CMD_UNIX_GID_SET_REQS, + SUNRPC_CMD_CACHE_FLUSH, + + __SUNRPC_CMD_MAX, + SUNRPC_CMD_MAX = (__SUNRPC_CMD_MAX - 1) +}; + +#define SUNRPC_MCGRP_NONE "none" +#define SUNRPC_MCGRP_EXPORTD "exportd" + +#endif /* _UAPI_LINUX_SUNRPC_NETLINK_H */ |
