diff options
| author | Mark Brown <broonie@kernel.org> | 2026-05-29 12:59:26 +0100 |
|---|---|---|
| committer | Mark Brown <broonie@kernel.org> | 2026-05-29 12:59:26 +0100 |
| commit | c50ad36cd988b822ace4370c2d0431cd3488092b (patch) | |
| tree | dbcc92355a7c85d5b244aa28c887da2db043af54 /fs | |
| parent | 4793e201008e8b72a44e3b81b14d78e1ebc3f42d (diff) | |
| parent | 1f4cc4fcb11c640a3ef5bd1a75136111551fdf9a (diff) | |
| download | linux-next-history-c50ad36cd988b822ace4370c2d0431cd3488092b.tar.gz | |
Merge branch 'nfsd-next' of https://git.kernel.org/pub/scm/linux/kernel/git/cel/linux
# Conflicts:
# fs/exfat/file.c
Diffstat (limited to 'fs')
67 files changed, 2141 insertions, 270 deletions
diff --git a/fs/attr.c b/fs/attr.c index ded221defae6a..4f437fabb7f0f 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -547,7 +547,7 @@ int notify_change(struct mnt_idmap *idmap, struct dentry *dentry, * breaking the delegation in this case. */ if (!(ia_valid & ATTR_DELEG)) { - error = try_break_deleg(inode, delegated_inode); + error = try_break_deleg(inode, 0, delegated_inode); if (error) return error; } diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h index 174728904dc10..2607e51804b25 100644 --- a/fs/exfat/exfat_fs.h +++ b/fs/exfat/exfat_fs.h @@ -562,6 +562,8 @@ int exfat_setattr(struct mnt_idmap *idmap, struct dentry *dentry, int exfat_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, unsigned int request_mask, unsigned int query_flags); +struct file_kattr; +int exfat_fileattr_get(struct dentry *dentry, struct file_kattr *fa); int exfat_file_fsync(struct file *file, loff_t start, loff_t end, int datasync); long exfat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); long exfat_compat_ioctl(struct file *filp, unsigned int cmd, diff --git a/fs/exfat/file.c b/fs/exfat/file.c index e6e58584f567a..1f0328d1db738 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -15,6 +15,7 @@ #include <linux/filelock.h> #include <linux/falloc.h> #include <linux/iomap.h> +#include <linux/fileattr.h> #include "exfat_raw.h" #include "exfat_fs.h" @@ -333,6 +334,18 @@ int exfat_getattr(struct mnt_idmap *idmap, const struct path *path, return 0; } +int exfat_fileattr_get(struct dentry *dentry, struct file_kattr *fa) +{ + /* + * exFAT compares filenames through an upcase table, so lookup + * is always case-insensitive. Long names are stored in UTF-16 + * with case intact; CASENONPRESERVING stays clear. + */ + fa->fsx_xflags |= FS_XFLAG_CASEFOLD; + fa->flags |= FS_CASEFOLD_FL; + return 0; +} + int exfat_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { @@ -960,6 +973,7 @@ const struct file_operations exfat_file_operations = { }; const struct inode_operations exfat_file_inode_operations = { - .setattr = exfat_setattr, - .getattr = exfat_getattr, + .setattr = exfat_setattr, + .getattr = exfat_getattr, + .fileattr_get = exfat_fileattr_get, }; diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c index e69da0f5a111d..78f861f23246f 100644 --- a/fs/exfat/namei.c +++ b/fs/exfat/namei.c @@ -1283,4 +1283,5 @@ const struct inode_operations exfat_dir_inode_operations = { .rename = exfat_rename, .setattr = exfat_setattr, .getattr = exfat_getattr, + .fileattr_get = exfat_fileattr_get, }; diff --git a/fs/fat/fat.h b/fs/fat/fat.h index 5a58f0bf8ce83..99ed9228a677b 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -10,6 +10,8 @@ #include <linux/fs_context.h> #include <linux/fs_parser.h> +struct file_kattr; + /* * vfat shortname flags */ @@ -408,6 +410,7 @@ extern void fat_truncate_blocks(struct inode *inode, loff_t offset); extern int fat_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags); +int fat_fileattr_get(struct dentry *dentry, struct file_kattr *fa); extern int fat_file_fsync(struct file *file, loff_t start, loff_t end, int datasync); diff --git a/fs/fat/file.c b/fs/fat/file.c index becccdd2e501a..37e7049b4c8c4 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -17,6 +17,7 @@ #include <linux/fsnotify.h> #include <linux/security.h> #include <linux/falloc.h> +#include <linux/fileattr.h> #include "fat.h" static long fat_fallocate(struct file *file, int mode, @@ -398,6 +399,40 @@ void fat_truncate_blocks(struct inode *inode, loff_t offset) fat_flush_inodes(inode->i_sb, inode, NULL); } +int fat_fileattr_get(struct dentry *dentry, struct file_kattr *fa) +{ + struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb); + bool case_sensitive; + + /* + * FAT filesystems are case-insensitive by default. VFAT + * becomes case-sensitive when mounted with 'check=strict', + * which installs vfat_dentry_ops. MSDOS has no such option; + * its 'nocase' mount option selects case-sensitive matching. + * + * VFAT long filename entries preserve case. Without VFAT, only + * uppercased 8.3 short names are stored. MSDOS with 'nocase' + * also preserves case. + */ + if (sbi->options.isvfat) + case_sensitive = sbi->options.name_check == 's'; + else + case_sensitive = sbi->options.nocase; + + if (!case_sensitive) { + fa->fsx_xflags |= FS_XFLAG_CASEFOLD; + fa->flags |= FS_CASEFOLD_FL; + if (!sbi->options.isvfat) + fa->fsx_xflags |= FS_XFLAG_CASENONPRESERVING; + } + if (d_inode(dentry)->i_flags & S_IMMUTABLE) { + fa->fsx_xflags |= FS_XFLAG_IMMUTABLE; + fa->flags |= FS_IMMUTABLE_FL; + } + return 0; +} +EXPORT_SYMBOL_GPL(fat_fileattr_get); + int fat_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int flags) { @@ -575,5 +610,6 @@ EXPORT_SYMBOL_GPL(fat_setattr); const struct inode_operations fat_file_inode_operations = { .setattr = fat_setattr, .getattr = fat_getattr, + .fileattr_get = fat_fileattr_get, .update_time = fat_update_time, }; diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 4cc65f330fb7e..0fd2971ad4b13 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c @@ -644,6 +644,7 @@ static const struct inode_operations msdos_dir_inode_operations = { .rename = msdos_rename, .setattr = fat_setattr, .getattr = fat_getattr, + .fileattr_get = fat_fileattr_get, .update_time = fat_update_time, }; diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 918b3756674c3..e909447873e36 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c @@ -1185,6 +1185,7 @@ static const struct inode_operations vfat_dir_inode_operations = { .rename = vfat_rename2, .setattr = fat_setattr, .getattr = fat_getattr, + .fileattr_get = fat_fileattr_get, .update_time = fat_update_time, }; diff --git a/fs/file_attr.c b/fs/file_attr.c index da983e105d708..bfb00d256dd56 100644 --- a/fs/file_attr.c +++ b/fs/file_attr.c @@ -15,12 +15,10 @@ * @fa: fileattr pointer * @xflags: FS_XFLAG_* flags * - * Set ->fsx_xflags, ->fsx_valid and ->flags (translated xflags). All - * other fields are zeroed. + * Set ->fsx_xflags, ->fsx_valid and ->flags (translated xflags). */ void fileattr_fill_xflags(struct file_kattr *fa, u32 xflags) { - memset(fa, 0, sizeof(*fa)); fa->fsx_valid = true; fa->fsx_xflags = xflags; if (fa->fsx_xflags & FS_XFLAG_IMMUTABLE) @@ -39,6 +37,8 @@ void fileattr_fill_xflags(struct file_kattr *fa, u32 xflags) fa->flags |= FS_PROJINHERIT_FL; if (fa->fsx_xflags & FS_XFLAG_VERITY) fa->flags |= FS_VERITY_FL; + if (fa->fsx_xflags & FS_XFLAG_CASEFOLD) + fa->flags |= FS_CASEFOLD_FL; } EXPORT_SYMBOL(fileattr_fill_xflags); @@ -48,11 +48,9 @@ EXPORT_SYMBOL(fileattr_fill_xflags); * @flags: FS_*_FL flags * * Set ->flags, ->flags_valid and ->fsx_xflags (translated flags). - * All other fields are zeroed. */ void fileattr_fill_flags(struct file_kattr *fa, u32 flags) { - memset(fa, 0, sizeof(*fa)); fa->flags_valid = true; fa->flags = flags; if (fa->flags & FS_SYNC_FL) @@ -71,6 +69,8 @@ void fileattr_fill_flags(struct file_kattr *fa, u32 flags) fa->fsx_xflags |= FS_XFLAG_PROJINHERIT; if (fa->flags & FS_VERITY_FL) fa->fsx_xflags |= FS_XFLAG_VERITY; + if (fa->flags & FS_CASEFOLD_FL) + fa->fsx_xflags |= FS_XFLAG_CASEFOLD; } EXPORT_SYMBOL(fileattr_fill_flags); @@ -325,7 +325,7 @@ int ioctl_setflags(struct file *file, unsigned int __user *argp) { struct mnt_idmap *idmap = file_mnt_idmap(file); struct dentry *dentry = file->f_path.dentry; - struct file_kattr fa; + struct file_kattr fa = {}; unsigned int flags; int err; @@ -357,7 +357,7 @@ int ioctl_fssetxattr(struct file *file, void __user *argp) { struct mnt_idmap *idmap = file_mnt_idmap(file); struct dentry *dentry = file->f_path.dentry; - struct file_kattr fa; + struct file_kattr fa = {}; int err; err = copy_fsxattr_from_user(&fa, argp); @@ -431,7 +431,7 @@ SYSCALL_DEFINE5(file_setattr, int, dfd, const char __user *, filename, struct path filepath __free(path_put) = {}; unsigned int lookup_flags = 0; struct file_attr fattr; - struct file_kattr fa; + struct file_kattr fa = {}; int error; BUILD_BUG_ON(sizeof(struct file_attr) < FILE_ATTR_SIZE_VER0); diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index f5e7efe924e78..c4c6e1623f55d 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c @@ -328,4 +328,5 @@ const struct inode_operations hfs_dir_inode_operations = { .rmdir = hfs_remove, .rename = hfs_rename, .setattr = hfs_inode_setattr, + .fileattr_get = hfs_fileattr_get, }; diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index ac0e83f77a0f1..1b23448c9a48b 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h @@ -177,6 +177,8 @@ extern int hfs_get_block(struct inode *inode, sector_t block, extern const struct address_space_operations hfs_aops; extern const struct address_space_operations hfs_btree_aops; +struct file_kattr; +int hfs_fileattr_get(struct dentry *dentry, struct file_kattr *fa); int hfs_write_begin(const struct kiocb *iocb, struct address_space *mapping, loff_t pos, unsigned int len, struct folio **foliop, void **fsdata); diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 89b33a9d46d5c..f41cc261684d9 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -18,6 +18,7 @@ #include <linux/uio.h> #include <linux/xattr.h> #include <linux/blkdev.h> +#include <linux/fileattr.h> #include "hfs_fs.h" #include "btree.h" @@ -699,6 +700,18 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end, return ret; } +int hfs_fileattr_get(struct dentry *dentry, struct file_kattr *fa) +{ + /* + * HFS compares filenames using Mac OS Roman case folding, so + * lookup is always case-insensitive. Names are stored on disk + * with case intact; CASENONPRESERVING stays clear. + */ + fa->fsx_xflags |= FS_XFLAG_CASEFOLD; + fa->flags |= FS_CASEFOLD_FL; + return 0; +} + static const struct file_operations hfs_file_operations = { .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, @@ -715,4 +728,5 @@ static const struct inode_operations hfs_file_inode_operations = { .lookup = hfs_file_lookup, .setattr = hfs_inode_setattr, .listxattr = generic_listxattr, + .fileattr_get = hfs_fileattr_get, }; diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index d05891ec492e3..5565c14b4bf69 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -740,6 +740,7 @@ int hfsplus_fileattr_get(struct dentry *dentry, struct file_kattr *fa) { struct inode *inode = d_inode(dentry); struct hfsplus_inode_info *hip = HFSPLUS_I(inode); + struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); unsigned int flags = 0; if (inode->i_flags & S_IMMUTABLE) @@ -748,6 +749,8 @@ int hfsplus_fileattr_get(struct dentry *dentry, struct file_kattr *fa) flags |= FS_APPEND_FL; if (hip->userflags & HFSPLUS_FLG_NODUMP) flags |= FS_NODUMP_FL; + if (test_bit(HFSPLUS_SB_CASEFOLD, &sbi->flags)) + flags |= FS_CASEFOLD_FL; fileattr_fill_flags(fa, flags); @@ -759,13 +762,24 @@ int hfsplus_fileattr_set(struct mnt_idmap *idmap, { struct inode *inode = d_inode(dentry); struct hfsplus_inode_info *hip = HFSPLUS_I(inode); + struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); + unsigned int allowed = FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NODUMP_FL; unsigned int new_fl = 0; if (fileattr_has_fsx(fa)) return -EOPNOTSUPP; + /* + * FS_CASEFOLD_FL reflects HFSPLUS_SB_CASEFOLD, a mount-time + * property. Accept it as a no-op so chattr's RMW round-trip + * succeeds; reject any attempt to enable it on a volume that + * was not formatted case-insensitive. + */ + if (test_bit(HFSPLUS_SB_CASEFOLD, &sbi->flags)) + allowed |= FS_CASEFOLD_FL; + /* don't silently ignore unsupported ext2 flags */ - if (fa->flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL)) + if (fa->flags & ~allowed) return -EOPNOTSUPP; if (fa->flags & FS_IMMUTABLE_FL) diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c index 6d220eab531e5..cc587cd251623 100644 --- a/fs/isofs/dir.c +++ b/fs/isofs/dir.c @@ -15,6 +15,7 @@ #include <linux/filelock.h> #include <linux/slab.h> #include "isofs.h" +#include <linux/fileattr.h> int isofs_name_translate(struct iso_directory_record *de, char *new, struct inode *inode) { @@ -268,6 +269,20 @@ static int isofs_readdir(struct file *file, struct dir_context *ctx) return result; } +int isofs_fileattr_get(struct dentry *dentry, struct file_kattr *fa) +{ + struct isofs_sb_info *sbi = ISOFS_SB(dentry->d_sb); + + if (sbi->s_check == 'r') { + fa->fsx_xflags |= FS_XFLAG_CASEFOLD; + fa->flags |= FS_CASEFOLD_FL; + } + if (!sbi->s_joliet_level && !sbi->s_rock && + (sbi->s_mapping == 'n' || sbi->s_mapping == 'a')) + fa->fsx_xflags |= FS_XFLAG_CASENONPRESERVING; + return 0; +} + const struct file_operations isofs_dir_operations = { .llseek = generic_file_llseek, @@ -282,6 +297,7 @@ const struct file_operations isofs_dir_operations = const struct inode_operations isofs_dir_inode_operations = { .lookup = isofs_lookup, + .fileattr_get = isofs_fileattr_get, }; diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h index 5065558375333..0ec8b24a42edc 100644 --- a/fs/isofs/isofs.h +++ b/fs/isofs/isofs.h @@ -197,6 +197,9 @@ isofs_normalize_block_and_offset(struct iso_directory_record* de, } } +struct file_kattr; +int isofs_fileattr_get(struct dentry *dentry, struct file_kattr *fa); + extern const struct inode_operations isofs_dir_inode_operations; extern const struct file_operations isofs_dir_operations; extern const struct address_space_operations isofs_symlink_aops; diff --git a/fs/locks.c b/fs/locks.c index fead53474c300..6e4ff7fcec053 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1582,30 +1582,96 @@ trace: return rc; } -static bool -any_leases_conflict(struct inode *inode, struct file_lease *breaker) +#define IGNORE_MASK (FL_IGN_DIR_CREATE | FL_IGN_DIR_DELETE | FL_IGN_DIR_RENAME) + +/** + * inode_lease_ignore_mask - return union of all ignored inode events for this inode + * @inode: inode of which to get ignore mask + * + * Walk the list of leases, and return the result of all of + * their FL_IGN_DIR_* bits or'ed together. + */ +u32 +inode_lease_ignore_mask(struct inode *inode) { - struct file_lock_context *ctx = inode->i_flctx; + struct file_lock_context *ctx; struct file_lock_core *flc; + u32 mask = 0; - lockdep_assert_held(&ctx->flc_lock); + ctx = locks_inode_context(inode); + if (!ctx) + return 0; + spin_lock(&ctx->flc_lock); list_for_each_entry(flc, &ctx->flc_lease, flc_list) { - if (leases_conflict(flc, &breaker->c)) - return true; + mask |= flc->flc_flags & IGNORE_MASK; + /* If we already have everything, we can stop */ + if (mask == IGNORE_MASK) + break; } + spin_unlock(&ctx->flc_lock); + return mask; +} +EXPORT_SYMBOL_GPL(inode_lease_ignore_mask); + +static bool +ignore_dir_deleg_break(struct file_lease *fl, unsigned int flags) +{ + if ((flags & LEASE_BREAK_DIR_CREATE) && (fl->c.flc_flags & FL_IGN_DIR_CREATE)) + return true; + if ((flags & LEASE_BREAK_DIR_DELETE) && (fl->c.flc_flags & FL_IGN_DIR_DELETE)) + return true; + if ((flags & LEASE_BREAK_DIR_RENAME) && (fl->c.flc_flags & FL_IGN_DIR_RENAME)) + return true; + return false; } +static unsigned int +break_lease_flags_to_type(unsigned int flags) +{ + if (flags & LEASE_BREAK_LEASE) + return FL_LEASE; + else if (flags & LEASE_BREAK_DELEG) + return FL_DELEG; + else if (flags & LEASE_BREAK_LAYOUT) + return FL_LAYOUT; + else + return 0; + +} + +static struct file_lease * +first_visible_lease(struct inode *inode, struct file_lease *new_fl, unsigned int flags) +{ + struct file_lock_context *ctx = locks_inode_context(inode); + struct file_lease *fl; + + lockdep_assert_held(&ctx->flc_lock); + + list_for_each_entry(fl, &ctx->flc_lease, c.flc_list) { + if (!leases_conflict(&fl->c, &new_fl->c)) + continue; + if (S_ISDIR(inode->i_mode) && ignore_dir_deleg_break(fl, flags)) + continue; + return fl; + } + return NULL; +} + + /** - * __break_lease - revoke all outstanding leases on file - * @inode: the inode of the file to return - * @flags: LEASE_BREAK_* flags + * __break_lease - revoke all outstanding leases on file + * @inode: the inode of the file to return + * @flags: LEASE_BREAK_* flags * - * break_lease (inlined for speed) has checked there already is at least - * some kind of lock (maybe a lease) on this file. Leases are broken on - * a call to open() or truncate(). This function can block waiting for the - * lease break unless you specify LEASE_BREAK_NONBLOCK. + * break_lease (inlined for speed) has checked there already is at least + * some kind of lock (maybe a lease) on this file. Leases and Delegations + * are broken on a call to open() or truncate(). Delegations are also + * broken on any event that would change the ctime. Directory delegations + * are broken whenever the directory changes (unless the delegation is set + * up to ignore the event). This function can block waiting for the lease + * break unless you specify LEASE_BREAK_NONBLOCK. */ int __break_lease(struct inode *inode, unsigned int flags) { @@ -1617,13 +1683,10 @@ int __break_lease(struct inode *inode, unsigned int flags) bool want_write = !(flags & LEASE_BREAK_OPEN_RDONLY); int error = 0; - if (flags & LEASE_BREAK_LEASE) - type = FL_LEASE; - else if (flags & LEASE_BREAK_DELEG) - type = FL_DELEG; - else if (flags & LEASE_BREAK_LAYOUT) - type = FL_LAYOUT; - else + trace_break_lease(inode, flags); + + type = break_lease_flags_to_type(flags); + if (!type) return -EINVAL; new_fl = lease_alloc(NULL, type, want_write ? F_WRLCK : F_RDLCK); @@ -1642,7 +1705,7 @@ int __break_lease(struct inode *inode, unsigned int flags) time_out_leases(inode, &dispose); - if (!any_leases_conflict(inode, new_fl)) + if (!first_visible_lease(inode, new_fl, flags)) goto out; break_time = 0; @@ -1655,6 +1718,8 @@ int __break_lease(struct inode *inode, unsigned int flags) list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, c.flc_list) { if (!leases_conflict(&fl->c, &new_fl->c)) continue; + if (S_ISDIR(inode->i_mode) && ignore_dir_deleg_break(fl, flags)) + continue; if (want_write) { if (fl->c.flc_flags & FL_UNLOCK_PENDING) continue; @@ -1670,7 +1735,8 @@ int __break_lease(struct inode *inode, unsigned int flags) locks_delete_lock_ctx(&fl->c, &dispose); } - if (list_empty(&ctx->flc_lease)) + fl = first_visible_lease(inode, new_fl, flags); + if (!fl) goto out; if (flags & LEASE_BREAK_NONBLOCK) { @@ -1680,7 +1746,6 @@ int __break_lease(struct inode *inode, unsigned int flags) } restart: - fl = list_first_entry(&ctx->flc_lease, struct file_lease, c.flc_list); break_time = fl->fl_break_time; if (break_time != 0) { if (time_after(jiffies, break_time)) { @@ -1691,7 +1756,7 @@ restart: } else break_time++; locks_insert_block(&fl->c, &new_fl->c, leases_conflict); - trace_break_lease_block(inode, new_fl); + trace_break_lease_block(inode, fl); spin_unlock(&ctx->flc_lock); percpu_up_read(&file_rwsem); @@ -1702,7 +1767,7 @@ restart: percpu_down_read(&file_rwsem); spin_lock(&ctx->flc_lock); - trace_break_lease_unblock(inode, new_fl); + trace_break_lease_unblock(inode, NULL); __locks_delete_block(&new_fl->c); if (error >= 0) { /* @@ -1711,7 +1776,8 @@ restart: */ if (error == 0) time_out_leases(inode, &dispose); - if (any_leases_conflict(inode, new_fl)) + fl = first_visible_lease(inode, new_fl, flags); + if (fl) goto restart; error = 0; } diff --git a/fs/namei.c b/fs/namei.c index c7fac83c9a85e..3a3a2e5e77a0f 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4198,7 +4198,7 @@ int vfs_create(struct mnt_idmap *idmap, struct dentry *dentry, umode_t mode, error = security_inode_create(dir, dentry, mode); if (error) return error; - error = try_break_deleg(dir, di); + error = try_break_deleg(dir, LEASE_BREAK_DIR_CREATE, di); if (error) return error; error = dir->i_op->create(idmap, dir, dentry, mode, true); @@ -4497,7 +4497,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file, /* Negative dentry, just create the file */ if (!dentry->d_inode && (open_flag & O_CREAT)) { /* but break the directory lease first! */ - error = try_break_deleg(dir_inode, delegated_inode); + error = try_break_deleg(dir_inode, LEASE_BREAK_DIR_CREATE, delegated_inode); if (error) goto out_dput; @@ -5113,7 +5113,7 @@ int vfs_mknod(struct mnt_idmap *idmap, struct inode *dir, if (error) return error; - error = try_break_deleg(dir, delegated_inode); + error = try_break_deleg(dir, LEASE_BREAK_DIR_CREATE, delegated_inode); if (error) return error; @@ -5254,7 +5254,7 @@ struct dentry *vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, if (max_links && dir->i_nlink >= max_links) goto err; - error = try_break_deleg(dir, delegated_inode); + error = try_break_deleg(dir, LEASE_BREAK_DIR_CREATE, delegated_inode); if (error) goto err; @@ -5359,7 +5359,7 @@ int vfs_rmdir(struct mnt_idmap *idmap, struct inode *dir, if (error) goto out; - error = try_break_deleg(dir, delegated_inode); + error = try_break_deleg(dir, LEASE_BREAK_DIR_DELETE, delegated_inode); if (error) goto out; @@ -5489,10 +5489,10 @@ int vfs_unlink(struct mnt_idmap *idmap, struct inode *dir, else { error = security_inode_unlink(dir, dentry); if (!error) { - error = try_break_deleg(dir, delegated_inode); + error = try_break_deleg(dir, LEASE_BREAK_DIR_DELETE, delegated_inode); if (error) goto out; - error = try_break_deleg(target, delegated_inode); + error = try_break_deleg(target, 0, delegated_inode); if (error) goto out; error = dir->i_op->unlink(dir, dentry); @@ -5636,7 +5636,7 @@ int vfs_symlink(struct mnt_idmap *idmap, struct inode *dir, if (error) return error; - error = try_break_deleg(dir, delegated_inode); + error = try_break_deleg(dir, LEASE_BREAK_DIR_CREATE, delegated_inode); if (error) return error; @@ -5767,9 +5767,9 @@ int vfs_link(struct dentry *old_dentry, struct mnt_idmap *idmap, else if (max_links && inode->i_nlink >= max_links) error = -EMLINK; else { - error = try_break_deleg(dir, delegated_inode); + error = try_break_deleg(dir, LEASE_BREAK_DIR_CREATE, delegated_inode); if (!error) - error = try_break_deleg(inode, delegated_inode); + error = try_break_deleg(inode, 0, delegated_inode); if (!error) error = dir->i_op->link(old_dentry, dir, new_dentry); } @@ -6033,21 +6033,24 @@ int vfs_rename(struct renamedata *rd) old_dir->i_nlink >= max_links) goto out; } - error = try_break_deleg(old_dir, delegated_inode); + error = try_break_deleg(old_dir, + old_dir == new_dir ? LEASE_BREAK_DIR_RENAME : + LEASE_BREAK_DIR_DELETE, + delegated_inode); if (error) goto out; if (new_dir != old_dir) { - error = try_break_deleg(new_dir, delegated_inode); + error = try_break_deleg(new_dir, LEASE_BREAK_DIR_CREATE, delegated_inode); if (error) goto out; } if (!is_dir) { - error = try_break_deleg(source, delegated_inode); + error = try_break_deleg(source, 0, delegated_inode); if (error) goto out; } if (target && !new_is_dir) { - error = try_break_deleg(target, delegated_inode); + error = try_break_deleg(target, 0, delegated_inode); if (error) goto out; } diff --git a/fs/nfs/client.c b/fs/nfs/client.c index be02bb227741d..73b95318ba48b 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -914,6 +914,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, */ static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fattr *fattr) { + struct nfs_pathconf pathinfo = { }; struct nfs_fsinfo fsinfo; struct nfs_client *clp = server->nfs_client; int error; @@ -933,15 +934,28 @@ static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, str nfs_server_set_fsinfo(server, &fsinfo); - /* Get some general file system info */ - if (server->namelen == 0) { - struct nfs_pathconf pathinfo; + pathinfo.fattr = fattr; + nfs_fattr_init(fattr); - pathinfo.fattr = fattr; - nfs_fattr_init(fattr); + if (clp->rpc_ops->version < 4 || server->namelen == 0) { + if (clp->rpc_ops->pathconf(server, mntfh, &pathinfo) >= 0) { + if (server->namelen == 0) + server->namelen = pathinfo.max_namelen; + if (clp->rpc_ops->version < 4) { + unsigned int caps = server->caps; - if (clp->rpc_ops->pathconf(server, mntfh, &pathinfo) >= 0) - server->namelen = pathinfo.max_namelen; + caps &= ~(NFS_CAP_CASE_INSENSITIVE | + NFS_CAP_CASE_NONPRESERVING); + if (pathinfo.case_insensitive) + caps |= NFS_CAP_CASE_INSENSITIVE; + if (!pathinfo.case_preserving) + caps |= NFS_CAP_CASE_NONPRESERVING; + server->caps = caps; + } + } else if (clp->rpc_ops->version < 4) { + server->caps &= ~(NFS_CAP_CASE_INSENSITIVE | + NFS_CAP_CASE_NONPRESERVING); + } } if (clp->rpc_ops->discover_trunking != NULL && diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index 2ed2126201f41..acd3511c04a6c 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -156,7 +156,7 @@ static int nfs_dns_upcall(struct cache_detail *cd, if (!nfs_cache_upcall(cd, key->hostname)) return 0; clear_bit(CACHE_PENDING, &ch->flags); - return sunrpc_cache_pipe_upcall_timeout(cd, ch); + return sunrpc_cache_upcall_warn(cd, ch); } static int nfs_dns_match(struct cache_head *ca, diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e26030e736966..170d32c217ae4 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -41,6 +41,7 @@ #include <linux/freezer.h> #include <linux/uaccess.h> #include <linux/iversion.h> +#include <linux/fileattr.h> #include "nfs4_fs.h" #include "callback.h" @@ -1095,6 +1096,20 @@ out: } EXPORT_SYMBOL_GPL(nfs_getattr); +int nfs_fileattr_get(struct dentry *dentry, struct file_kattr *fa) +{ + struct inode *inode = d_inode(dentry); + + if (nfs_server_capable(inode, NFS_CAP_CASE_INSENSITIVE)) { + fa->fsx_xflags |= FS_XFLAG_CASEFOLD; + fa->flags |= FS_CASEFOLD_FL; + } + if (nfs_server_capable(inode, NFS_CAP_CASE_NONPRESERVING)) + fa->fsx_xflags |= FS_XFLAG_CASENONPRESERVING; + return 0; +} +EXPORT_SYMBOL_GPL(nfs_fileattr_get); + static void nfs_init_lock_context(struct nfs_lock_context *l_ctx) { refcount_set(&l_ctx->count, 1); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 18d46b0e71ddc..ec2b3d9843989 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -451,6 +451,9 @@ extern void nfs_set_cache_invalid(struct inode *inode, unsigned long flags); extern bool nfs_check_cache_invalid(struct inode *, unsigned long); extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode); +struct file_kattr; +int nfs_fileattr_get(struct dentry *dentry, struct file_kattr *fa); + #if IS_ENABLED(CONFIG_NFS_LOCALIO) /* localio.c */ struct nfs_local_dio { diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index af9be0c5f5163..6d0073c24771b 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -246,11 +246,13 @@ nfs_namespace_setattr(struct mnt_idmap *idmap, struct dentry *dentry, const struct inode_operations nfs_mountpoint_inode_operations = { .getattr = nfs_getattr, .setattr = nfs_setattr, + .fileattr_get = nfs_fileattr_get, }; const struct inode_operations nfs_referral_inode_operations = { .getattr = nfs_namespace_getattr, .setattr = nfs_namespace_setattr, + .fileattr_get = nfs_fileattr_get, }; static void nfs_expire_automounts(struct work_struct *work) diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 95d7cd564b746..b80d0c5efc279 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -1053,6 +1053,7 @@ static const struct inode_operations nfs3_dir_inode_operations = { .permission = nfs_permission, .getattr = nfs_getattr, .setattr = nfs_setattr, + .fileattr_get = nfs_fileattr_get, #ifdef CONFIG_NFS_V3_ACL .listxattr = nfs3_listxattr, .get_inode_acl = nfs3_get_acl, @@ -1064,6 +1065,7 @@ static const struct inode_operations nfs3_file_inode_operations = { .permission = nfs_permission, .getattr = nfs_getattr, .setattr = nfs_setattr, + .fileattr_get = nfs_fileattr_get, #ifdef CONFIG_NFS_V3_ACL .listxattr = nfs3_listxattr, .get_inode_acl = nfs3_get_acl, diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index e17d729084125..e745e78faab0a 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -2276,8 +2276,11 @@ static int decode_pathconf3resok(struct xdr_stream *xdr, if (unlikely(!p)) return -EIO; result->max_link = be32_to_cpup(p++); - result->max_namelen = be32_to_cpup(p); - /* ignore remaining fields */ + result->max_namelen = be32_to_cpup(p++); + p++; /* ignore no_trunc */ + p++; /* ignore chown_restricted */ + result->case_insensitive = be32_to_cpup(p++) != 0; + result->case_preserving = be32_to_cpup(p) != 0; return 0; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a9b8d482d2894..0715a6745d1fa 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3933,7 +3933,8 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f server->caps &= ~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS | NFS_CAP_SECURITY_LABEL | NFS_CAP_FS_LOCATIONS | - NFS_CAP_OPEN_XOR | NFS_CAP_DELEGTIME); + NFS_CAP_OPEN_XOR | NFS_CAP_DELEGTIME | + NFS_CAP_CASE_INSENSITIVE | NFS_CAP_CASE_NONPRESERVING); server->fattr_valid = NFS_ATTR_FATTR_V4; if (res.attr_bitmask[0] & FATTR4_WORD0_ACL && res.acl_bitmask & ACL4_SUPPORT_ALLOW_ACL) @@ -3944,8 +3945,9 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f server->caps |= NFS_CAP_SYMLINKS; if (res.case_insensitive) server->caps |= NFS_CAP_CASE_INSENSITIVE; - if (res.case_preserving) - server->caps |= NFS_CAP_CASE_PRESERVING; + if ((res.attr_bitmask[0] & FATTR4_WORD0_CASE_PRESERVING) && + !res.case_preserving) + server->caps |= NFS_CAP_CASE_NONPRESERVING; #ifdef CONFIG_NFS_V4_SECURITY_LABEL if (res.attr_bitmask[2] & FATTR4_WORD2_SECURITY_LABEL) server->caps |= NFS_CAP_SECURITY_LABEL; @@ -10617,6 +10619,7 @@ static const struct inode_operations nfs4_dir_inode_operations = { .getattr = nfs_getattr, .setattr = nfs_setattr, .listxattr = nfs4_listxattr, + .fileattr_get = nfs_fileattr_get, }; static const struct inode_operations nfs4_file_inode_operations = { @@ -10624,6 +10627,7 @@ static const struct inode_operations nfs4_file_inode_operations = { .getattr = nfs_getattr, .setattr = nfs_setattr, .listxattr = nfs4_listxattr, + .fileattr_get = nfs_fileattr_get, }; static struct nfs_server *nfs4_clone_server(struct nfs_server *source, diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 70795684b8e84..03c2c1f31be9a 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -598,6 +598,7 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, { info->max_link = 0; info->max_namelen = NFS2_MAXNAMLEN; + info->case_preserving = true; return 0; } @@ -718,12 +719,14 @@ static const struct inode_operations nfs_dir_inode_operations = { .permission = nfs_permission, .getattr = nfs_getattr, .setattr = nfs_setattr, + .fileattr_get = nfs_fileattr_get, }; static const struct inode_operations nfs_file_inode_operations = { .permission = nfs_permission, .getattr = nfs_getattr, .setattr = nfs_setattr, + .fileattr_get = nfs_fileattr_get, }; const struct nfs_rpc_ops nfs_v2_clientops = { diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c index 58146e9354020..74a072896f8d9 100644 --- a/fs/nfs/symlink.c +++ b/fs/nfs/symlink.c @@ -22,6 +22,8 @@ #include <linux/mm.h> #include <linux/string.h> +#include "internal.h" + /* Symlink caching in the page cache is even more simplistic * and straight-forward than readdir caching. */ @@ -74,4 +76,5 @@ const struct inode_operations nfs_symlink_inode_operations = { .get_link = nfs_get_link, .getattr = nfs_getattr, .setattr = nfs_setattr, + .fileattr_get = nfs_fileattr_get, }; diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index 9d829c84f374e..5be7721c22c23 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -2,7 +2,7 @@ /* * Copyright (c) 2014-2016 Christoph Hellwig. */ -#include <linux/exportfs.h> +#include <linux/exportfs_block.h> #include <linux/iomap.h> #include <linux/slab.h> #include <linux/pr.h> @@ -32,8 +32,8 @@ nfsd4_block_map_extent(struct inode *inode, const struct svc_fh *fhp, u32 device_generation = 0; int error; - error = sb->s_export_op->map_blocks(inode, offset, length, &iomap, - iomode != IOMODE_READ, &device_generation); + error = sb->s_export_op->block_ops->map_blocks(inode, offset, length, + &iomap, iomode != IOMODE_READ, &device_generation); if (error) { if (error == -ENXIO) return nfserr_layoutunavailable; @@ -179,23 +179,20 @@ static __be32 nfsd4_block_commit_blocks(struct inode *inode, struct nfsd4_layoutcommit *lcp, struct iomap *iomaps, int nr_iomaps) { - struct timespec64 mtime = inode_get_mtime(inode); - struct iattr iattr = { .ia_valid = 0 }; int error; - if (lcp->lc_mtime.tv_nsec == UTIME_NOW || - timespec64_compare(&lcp->lc_mtime, &mtime) < 0) - lcp->lc_mtime = current_time(inode); - iattr.ia_valid |= ATTR_ATIME | ATTR_CTIME | ATTR_MTIME; - iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = lcp->lc_mtime; - - if (lcp->lc_size_chg) { - iattr.ia_valid |= ATTR_SIZE; - iattr.ia_size = lcp->lc_newsize; - } - - error = inode->i_sb->s_export_op->commit_blocks(inode, iomaps, - nr_iomaps, &iattr); + /* + * This ignores the client provided mtime in loca_time_modify, as a + * fully client specified mtime doesn't really fit into the Linux + * multi-grain timestamp architecture. + * + * RFC 8881 Section 18.42 makes it clear that the client provided + * timestamp is a "may" condition, and clients that want to force a + * specific timestamp should send a separate SETATTR in the compound. + */ + error = inode->i_sb->s_export_op->block_ops->commit_blocks(inode, + iomaps, nr_iomaps, + lcp->lc_size_chg ? lcp->lc_newsize : 0); kfree(iomaps); return nfserrno(error); } @@ -218,8 +215,8 @@ nfsd4_block_get_device_info_simple(struct super_block *sb, b->type = PNFS_BLOCK_VOLUME_SIMPLE; b->simple.sig_len = PNFS_BLOCK_UUID_LEN; - return sb->s_export_op->get_uuid(sb, b->simple.sig, &b->simple.sig_len, - &b->simple.offset); + return sb->s_export_op->block_ops->get_uuid(sb, b->simple.sig, + &b->simple.sig_len, &b->simple.offset); } static __be32 diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 665153f1720e0..eb020054f9a3f 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -17,6 +17,8 @@ #include <linux/module.h> #include <linux/exportfs.h> #include <linux/sunrpc/svc_xprt.h> +#include <net/genetlink.h> +#include <uapi/linux/nfsd_netlink.h> #include "nfsd.h" #include "nfsfh.h" @@ -24,6 +26,7 @@ #include "pnfs.h" #include "filecache.h" #include "trace.h" +#include "netlink.h" #define NFSDDBG_FACILITY NFSDDBG_EXPORT @@ -64,7 +67,7 @@ static void expkey_put(struct kref *ref) static int expkey_upcall(struct cache_detail *cd, struct cache_head *h) { - return sunrpc_cache_pipe_upcall(cd, h); + return sunrpc_cache_upcall(cd, h); } static void expkey_request(struct cache_detail *cd, @@ -263,12 +266,18 @@ static void expkey_flush(void) mutex_unlock(&nfsd_mutex); } +static int expkey_notify(struct cache_detail *cd, struct cache_head *h) +{ + return nfsd_cache_notify(cd, h, NFSD_CACHE_TYPE_EXPKEY); +} + static const struct cache_detail svc_expkey_cache_template = { .owner = THIS_MODULE, .hash_size = EXPKEY_HASHMAX, .name = "nfsd.fh", .cache_put = expkey_put, .cache_upcall = expkey_upcall, + .cache_notify = expkey_notify, .cache_request = expkey_request, .cache_parse = expkey_parse, .cache_show = expkey_show, @@ -319,6 +328,266 @@ svc_expkey_update(struct cache_detail *cd, struct svc_expkey *new, return NULL; } +/** + * nfsd_nl_expkey_get_reqs_dumpit - dump pending expkey requests + * @skb: reply buffer + * @cb: netlink metadata and command arguments + * + * Walk the expkey cache's pending request list and create a netlink + * message with a nested entry for each cache_request, containing the + * seqno, client string, fsidtype and fsid. + * + * Uses cb->args[0] as a seqno cursor for dump continuation across + * multiple netlink messages. + * + * Returns the size of the reply or a negative errno. + */ +int nfsd_nl_expkey_get_reqs_dumpit(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct nfsd_net *nn; + struct cache_detail *cd; + struct cache_head **items; + u64 *seqnos; + int cnt, i, emitted; + void *hdr; + int ret; + + nn = net_generic(sock_net(skb->sk), nfsd_net_id); + + mutex_lock(&nfsd_mutex); + + cd = nn->svc_expkey_cache; + if (!cd) { + ret = -ENODEV; + goto out_unlock; + } + + cnt = sunrpc_cache_requests_count(cd); + if (!cnt) { + ret = 0; + goto out_unlock; + } + + items = kcalloc(cnt, sizeof(*items), GFP_KERNEL); + seqnos = kcalloc(cnt, sizeof(*seqnos), GFP_KERNEL); + if (!items || !seqnos) { + ret = -ENOMEM; + goto out_alloc; + } + + cnt = sunrpc_cache_requests_snapshot(cd, items, seqnos, cnt, + cb->args[0]); + if (!cnt) { + ret = 0; + goto out_alloc; + } + + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, &nfsd_nl_family, + NLM_F_MULTI, NFSD_CMD_EXPKEY_GET_REQS); + if (!hdr) { + ret = -ENOBUFS; + goto out_put; + } + + emitted = 0; + for (i = 0; i < cnt; i++) { + struct svc_expkey *ek; + struct nlattr *nest; + + ek = container_of(items[i], struct svc_expkey, h); + + nest = nla_nest_start(skb, NFSD_A_EXPKEY_REQS_REQUESTS); + if (!nest) + break; + + if (nla_put_u64_64bit(skb, NFSD_A_EXPKEY_SEQNO, + seqnos[i], 0) || + nla_put_string(skb, NFSD_A_EXPKEY_CLIENT, + ek->ek_client->name) || + nla_put_u8(skb, NFSD_A_EXPKEY_FSIDTYPE, + ek->ek_fsidtype) || + nla_put(skb, NFSD_A_EXPKEY_FSID, + key_len(ek->ek_fsidtype), ek->ek_fsid)) { + nla_nest_cancel(skb, nest); + break; + } + + nla_nest_end(skb, nest); + cb->args[0] = seqnos[i]; + emitted++; + } + + if (!emitted) { + genlmsg_cancel(skb, hdr); + ret = -EMSGSIZE; + goto out_put; + } + + genlmsg_end(skb, hdr); + ret = skb->len; +out_put: + for (i = 0; i < cnt; i++) + cache_put(items[i], cd); +out_alloc: + kfree(seqnos); + kfree(items); +out_unlock: + mutex_unlock(&nfsd_mutex); + return ret; +} + +/** + * nfsd_nl_parse_one_expkey - parse one expkey entry from netlink + * @cd: cache_detail for the expkey cache + * @attr: nested attribute containing expkey fields + * + * Parses one expkey entry from a netlink message and updates the + * cache. Mirrors the logic in expkey_parse(). + * + * Returns 0 on success or a negative errno. + */ +static int nfsd_nl_parse_one_expkey(struct cache_detail *cd, + struct nlattr *attr) +{ + struct nlattr *tb[NFSD_A_EXPKEY_PATH + 1]; + struct auth_domain *dom = NULL; + struct svc_expkey key; + struct svc_expkey *ek = NULL; + struct timespec64 boot; + int err; + u8 fsidtype; + int fsid_len; + + err = nla_parse_nested(tb, NFSD_A_EXPKEY_PATH, attr, + nfsd_expkey_nl_policy, NULL); + if (err) + return err; + + /* client (required) */ + if (!tb[NFSD_A_EXPKEY_CLIENT]) + return -EINVAL; + + dom = auth_domain_find(nla_data(tb[NFSD_A_EXPKEY_CLIENT])); + if (!dom) + return -ENOENT; + + /* fsidtype (required) */ + if (!tb[NFSD_A_EXPKEY_FSIDTYPE]) { + err = -EINVAL; + goto out_dom; + } + fsidtype = nla_get_u8(tb[NFSD_A_EXPKEY_FSIDTYPE]); + if (key_len(fsidtype) == 0) { + err = -EINVAL; + goto out_dom; + } + + /* fsid (required) */ + if (!tb[NFSD_A_EXPKEY_FSID]) { + err = -EINVAL; + goto out_dom; + } + fsid_len = nla_len(tb[NFSD_A_EXPKEY_FSID]); + if (fsid_len != key_len(fsidtype)) { + err = -EINVAL; + goto out_dom; + } + + /* expiry (required, wallclock seconds) */ + if (!tb[NFSD_A_EXPKEY_EXPIRY]) { + err = -EINVAL; + goto out_dom; + } + + key.h.flags = 0; + getboottime64(&boot); + key.h.expiry_time = nla_get_u64(tb[NFSD_A_EXPKEY_EXPIRY]) - + boot.tv_sec; + key.ek_client = dom; + key.ek_fsidtype = fsidtype; + memcpy(key.ek_fsid, nla_data(tb[NFSD_A_EXPKEY_FSID]), fsid_len); + + ek = svc_expkey_lookup(cd, &key); + if (!ek) { + err = -ENOMEM; + goto out_dom; + } + + if (tb[NFSD_A_EXPKEY_NEGATIVE]) { + set_bit(CACHE_NEGATIVE, &key.h.flags); + ek = svc_expkey_update(cd, &key, ek); + if (ek) + trace_nfsd_expkey_update(ek, NULL); + else + err = -ENOMEM; + } else if (tb[NFSD_A_EXPKEY_PATH]) { + err = kern_path(nla_data(tb[NFSD_A_EXPKEY_PATH]), 0, + &key.ek_path); + if (err) + goto out_ek; + ek = svc_expkey_update(cd, &key, ek); + if (ek) + trace_nfsd_expkey_update(ek, + nla_data(tb[NFSD_A_EXPKEY_PATH])); + else + err = -ENOMEM; + path_put(&key.ek_path); + } else { + err = -EINVAL; + goto out_ek; + } + + cache_flush(); + +out_ek: + if (ek) + cache_put(&ek->h, cd); +out_dom: + auth_domain_put(dom); + return err; +} + +/** + * nfsd_nl_expkey_set_reqs_doit - respond to expkey requests + * @skb: reply buffer + * @info: netlink metadata and command arguments + * + * Parse one or more expkey cache responses from userspace and + * update the expkey cache accordingly. + * + * Returns 0 on success or a negative errno. + */ +int nfsd_nl_expkey_set_reqs_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct nfsd_net *nn; + struct cache_detail *cd; + const struct nlattr *attr; + int rem, ret = 0; + + nn = net_generic(genl_info_net(info), nfsd_net_id); + + mutex_lock(&nfsd_mutex); + + cd = nn->svc_expkey_cache; + if (!cd) { + ret = -ENODEV; + goto out_unlock; + } + + nlmsg_for_each_attr_type(attr, NFSD_A_EXPKEY_REQS_REQUESTS, + info->nlhdr, GENL_HDRLEN, rem) { + ret = nfsd_nl_parse_one_expkey(cd, (struct nlattr *)attr); + if (ret) + break; + } + +out_unlock: + mutex_unlock(&nfsd_mutex); + return ret; +} #define EXPORT_HASHBITS 8 #define EXPORT_HASHMAX (1<< EXPORT_HASHBITS) @@ -386,9 +655,445 @@ static void svc_export_put(struct kref *ref) queue_rcu_work(nfsd_export_wq, &exp->ex_rwork); } +/** + * nfsd_nl_svc_export_get_reqs_dumpit - dump pending svc_export requests + * @skb: reply buffer + * @cb: netlink metadata and command arguments + * + * Walk the svc_export cache's pending request list and create a netlink + * message with a nested entry for each cache_request, containing the + * seqno, client string, and path. + * + * Uses cb->args[0] as a seqno cursor for dump continuation across + * multiple netlink messages. + * + * Returns the size of the reply or a negative errno. + */ +int nfsd_nl_svc_export_get_reqs_dumpit(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct nfsd_net *nn; + struct cache_detail *cd; + struct cache_head **items; + u64 *seqnos; + int cnt, i, emitted; + char *pathbuf; + void *hdr; + int ret; + + nn = net_generic(sock_net(skb->sk), nfsd_net_id); + + mutex_lock(&nfsd_mutex); + + cd = nn->svc_export_cache; + if (!cd) { + ret = -ENODEV; + goto out_unlock; + } + + cnt = sunrpc_cache_requests_count(cd); + if (!cnt) { + ret = 0; + goto out_unlock; + } + + items = kcalloc(cnt, sizeof(*items), GFP_KERNEL); + seqnos = kcalloc(cnt, sizeof(*seqnos), GFP_KERNEL); + pathbuf = kmalloc(PATH_MAX, GFP_KERNEL); + if (!items || !seqnos || !pathbuf) { + ret = -ENOMEM; + goto out_alloc; + } + + cnt = sunrpc_cache_requests_snapshot(cd, items, seqnos, cnt, + cb->args[0]); + if (!cnt) { + ret = 0; + goto out_alloc; + } + + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, &nfsd_nl_family, + NLM_F_MULTI, NFSD_CMD_SVC_EXPORT_GET_REQS); + if (!hdr) { + ret = -ENOBUFS; + goto out_put; + } + + emitted = 0; + for (i = 0; i < cnt; i++) { + struct svc_export *exp; + struct nlattr *nest; + char *pth; + + exp = container_of(items[i], struct svc_export, h); + + pth = d_path(&exp->ex_path, pathbuf, PATH_MAX); + if (IS_ERR(pth)) + continue; + + nest = nla_nest_start(skb, + NFSD_A_SVC_EXPORT_REQS_REQUESTS); + if (!nest) + break; + + if (nla_put_u64_64bit(skb, NFSD_A_SVC_EXPORT_SEQNO, + seqnos[i], 0) || + nla_put_string(skb, NFSD_A_SVC_EXPORT_CLIENT, + exp->ex_client->name) || + nla_put_string(skb, NFSD_A_SVC_EXPORT_PATH, pth)) { + nla_nest_cancel(skb, nest); + break; + } + + nla_nest_end(skb, nest); + cb->args[0] = seqnos[i]; + emitted++; + } + + if (!emitted) { + genlmsg_cancel(skb, hdr); + ret = -EMSGSIZE; + goto out_put; + } + + genlmsg_end(skb, hdr); + ret = skb->len; +out_put: + for (i = 0; i < cnt; i++) + cache_put(items[i], cd); +out_alloc: + kfree(pathbuf); + kfree(seqnos); + kfree(items); +out_unlock: + mutex_unlock(&nfsd_mutex); + return ret; +} + +/** + * nfsd_nl_parse_fslocations - parse fslocations from netlink + * @attr: NFSD_A_SVC_EXPORT_FSLOCATIONS nested attribute + * @fsloc: fslocations struct to fill in + * + * Returns 0 on success or a negative errno. + */ +static int nfsd_nl_parse_fslocations(struct nlattr *attr, + struct nfsd4_fs_locations *fsloc) +{ + struct nlattr *loc_attr; + int rem, count = 0; + int err; + + if (fsloc->locations) + return -EINVAL; + + /* Count locations first */ + nla_for_each_nested_type(loc_attr, NFSD_A_FSLOCATIONS_LOCATION, + attr, rem) + count++; + + if (count > MAX_FS_LOCATIONS) + return -EINVAL; + if (!count) + return 0; + + fsloc->locations = kcalloc(count, sizeof(struct nfsd4_fs_location), + GFP_KERNEL); + if (!fsloc->locations) + return -ENOMEM; + + nla_for_each_nested_type(loc_attr, NFSD_A_FSLOCATIONS_LOCATION, + attr, rem) { + struct nlattr *tb[NFSD_A_FSLOCATION_PATH + 1]; + struct nfsd4_fs_location *loc; + + err = nla_parse_nested(tb, NFSD_A_FSLOCATION_PATH, loc_attr, + nfsd_fslocation_nl_policy, NULL); + if (err) + goto out_free; + + if (!tb[NFSD_A_FSLOCATION_HOST] || + !tb[NFSD_A_FSLOCATION_PATH]) { + err = -EINVAL; + goto out_free; + } + + loc = &fsloc->locations[fsloc->locations_count++]; + loc->hosts = kstrdup(nla_data(tb[NFSD_A_FSLOCATION_HOST]), + GFP_KERNEL); + loc->path = kstrdup(nla_data(tb[NFSD_A_FSLOCATION_PATH]), + GFP_KERNEL); + if (!loc->hosts || !loc->path) { + err = -ENOMEM; + goto out_free; + } + } + + return 0; +out_free: + nfsd4_fslocs_free(fsloc); + return err; +} + +static struct svc_export *svc_export_update(struct svc_export *new, + struct svc_export *old); +static struct svc_export *svc_export_lookup(struct svc_export *); +static int check_export(const struct path *path, int *flags, + unsigned char *uuid); + +/** + * nfsd_nl_parse_one_export - parse one svc_export entry from a netlink message + * @cd: cache_detail for the svc_export cache + * @attr: nested attribute containing svc-export fields + * + * Parses one svc-export entry from a netlink message and updates the + * cache. Mirrors the logic in svc_export_parse(). + * + * Returns 0 on success or a negative errno. + */ +static int nfsd_nl_parse_one_export(struct cache_detail *cd, + struct nlattr *attr) +{ + struct nlattr *tb[NFSD_A_SVC_EXPORT_FSID + 1]; + struct auth_domain *dom = NULL; + struct svc_export exp = {}, *expp; + struct nlattr *secinfo_attr; + struct timespec64 boot; + int err, rem; + + err = nla_parse_nested(tb, NFSD_A_SVC_EXPORT_FSID, attr, + nfsd_svc_export_nl_policy, NULL); + if (err) + return err; + + /* client (required) */ + if (!tb[NFSD_A_SVC_EXPORT_CLIENT]) + return -EINVAL; + + dom = auth_domain_find(nla_data(tb[NFSD_A_SVC_EXPORT_CLIENT])); + if (!dom) + return -ENOENT; + + /* path (required) */ + if (!tb[NFSD_A_SVC_EXPORT_PATH]) { + err = -EINVAL; + goto out_dom; + } + + err = kern_path(nla_data(tb[NFSD_A_SVC_EXPORT_PATH]), 0, + &exp.ex_path); + if (err) + goto out_dom; + + exp.ex_client = dom; + exp.cd = cd; + exp.ex_devid_map = NULL; + exp.ex_xprtsec_modes = NFSEXP_XPRTSEC_ALL; + + /* expiry (required, wallclock seconds) */ + if (!tb[NFSD_A_SVC_EXPORT_EXPIRY]) { + err = -EINVAL; + goto out_path; + } + getboottime64(&boot); + exp.h.expiry_time = nla_get_u64(tb[NFSD_A_SVC_EXPORT_EXPIRY]) - + boot.tv_sec; + + if (tb[NFSD_A_SVC_EXPORT_NEGATIVE]) { + set_bit(CACHE_NEGATIVE, &exp.h.flags); + } else { + /* flags */ + if (tb[NFSD_A_SVC_EXPORT_FLAGS]) + exp.ex_flags = nla_get_u32(tb[NFSD_A_SVC_EXPORT_FLAGS]); + + /* anon uid */ + if (tb[NFSD_A_SVC_EXPORT_ANON_UID]) { + u32 uid = nla_get_u32(tb[NFSD_A_SVC_EXPORT_ANON_UID]); + + exp.ex_anon_uid = make_kuid(current_user_ns(), uid); + } + + /* anon gid */ + if (tb[NFSD_A_SVC_EXPORT_ANON_GID]) { + u32 gid = nla_get_u32(tb[NFSD_A_SVC_EXPORT_ANON_GID]); + + exp.ex_anon_gid = make_kgid(current_user_ns(), gid); + } + + /* fsid */ + if (tb[NFSD_A_SVC_EXPORT_FSID]) + exp.ex_fsid = nla_get_s32(tb[NFSD_A_SVC_EXPORT_FSID]); + + /* fslocations */ + if (tb[NFSD_A_SVC_EXPORT_FSLOCATIONS]) { + struct nlattr *fsl = tb[NFSD_A_SVC_EXPORT_FSLOCATIONS]; + + err = nfsd_nl_parse_fslocations(fsl, + &exp.ex_fslocs); + if (err) + goto out_path; + } + + /* uuid */ + if (tb[NFSD_A_SVC_EXPORT_UUID]) { + if (nla_len(tb[NFSD_A_SVC_EXPORT_UUID]) != + EX_UUID_LEN) { + err = -EINVAL; + goto out_fslocs; + } + exp.ex_uuid = kmemdup(nla_data(tb[NFSD_A_SVC_EXPORT_UUID]), + EX_UUID_LEN, GFP_KERNEL); + if (!exp.ex_uuid) { + err = -ENOMEM; + goto out_fslocs; + } + } + + /* secinfo (multi-attr) */ + nla_for_each_nested_type(secinfo_attr, + NFSD_A_SVC_EXPORT_SECINFO, + attr, rem) { + struct nlattr *ftb[NFSD_A_AUTH_FLAVOR_FLAGS + 1]; + struct exp_flavor_info *f; + + if (exp.ex_nflavors >= MAX_SECINFO_LIST) { + err = -EINVAL; + goto out_uuid; + } + + err = nla_parse_nested(ftb, + NFSD_A_AUTH_FLAVOR_FLAGS, + secinfo_attr, + nfsd_auth_flavor_nl_policy, + NULL); + if (err) + goto out_uuid; + + f = &exp.ex_flavors[exp.ex_nflavors++]; + + if (ftb[NFSD_A_AUTH_FLAVOR_PSEUDOFLAVOR]) + f->pseudoflavor = nla_get_u32(ftb[NFSD_A_AUTH_FLAVOR_PSEUDOFLAVOR]); + + if (ftb[NFSD_A_AUTH_FLAVOR_FLAGS]) + f->flags = nla_get_u32(ftb[NFSD_A_AUTH_FLAVOR_FLAGS]); + + /* Only some flags are allowed to differ between flavors: */ + if (~NFSEXP_SECINFO_FLAGS & (f->flags ^ exp.ex_flags)) { + err = -EINVAL; + goto out_uuid; + } + } + + /* xprtsec (multi-attr u32) */ + if (tb[NFSD_A_SVC_EXPORT_XPRTSEC]) { + struct nlattr *xp_attr; + + exp.ex_xprtsec_modes = 0; + nla_for_each_nested_type(xp_attr, + NFSD_A_SVC_EXPORT_XPRTSEC, + attr, rem) { + u32 mode = nla_get_u32(xp_attr); + + if (mode > NFSEXP_XPRTSEC_MTLS) { + err = -EINVAL; + goto out_uuid; + } + exp.ex_xprtsec_modes |= mode; + } + } + + err = check_export(&exp.ex_path, &exp.ex_flags, + exp.ex_uuid); + if (err) + goto out_uuid; + + if (exp.h.expiry_time < seconds_since_boot()) + goto out_uuid; + + err = -EINVAL; + if (!uid_valid(exp.ex_anon_uid)) + goto out_uuid; + if (!gid_valid(exp.ex_anon_gid)) + goto out_uuid; + err = 0; + + nfsd4_setup_layout_type(&exp); + } + + expp = svc_export_lookup(&exp); + if (!expp) { + err = -ENOMEM; + goto out_uuid; + } + expp = svc_export_update(&exp, expp); + if (expp) { + trace_nfsd_export_update(expp); + cache_flush(); + exp_put(expp); + } else { + err = -ENOMEM; + } + +out_uuid: + kfree(exp.ex_uuid); +out_fslocs: + nfsd4_fslocs_free(&exp.ex_fslocs); +out_path: + path_put(&exp.ex_path); +out_dom: + auth_domain_put(dom); + return err; +} + +/** + * nfsd_nl_svc_export_set_reqs_doit - respond to svc_export requests + * @skb: reply buffer + * @info: netlink metadata and command arguments + * + * Parse one or more svc_export cache responses from userspace and + * update the export cache accordingly. + * + * Returns 0 on success or a negative errno. + */ +int nfsd_nl_svc_export_set_reqs_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct nfsd_net *nn; + struct cache_detail *cd; + const struct nlattr *attr; + int rem, ret = 0; + + nn = net_generic(genl_info_net(info), nfsd_net_id); + + mutex_lock(&nfsd_mutex); + + cd = nn->svc_export_cache; + if (!cd) { + ret = -ENODEV; + goto out_unlock; + } + + nlmsg_for_each_attr_type(attr, NFSD_A_SVC_EXPORT_REQS_REQUESTS, + info->nlhdr, GENL_HDRLEN, rem) { + ret = nfsd_nl_parse_one_export(cd, (struct nlattr *)attr); + if (ret) + break; + } + +out_unlock: + mutex_unlock(&nfsd_mutex); + return ret; +} + static int svc_export_upcall(struct cache_detail *cd, struct cache_head *h) { - return sunrpc_cache_pipe_upcall(cd, h); + return sunrpc_cache_upcall(cd, h); +} + +static int svc_export_notify(struct cache_detail *cd, struct cache_head *h) +{ + return nfsd_cache_notify(cd, h, NFSD_CACHE_TYPE_SVC_EXPORT); } static void svc_export_request(struct cache_detail *cd, @@ -410,10 +1115,6 @@ static void svc_export_request(struct cache_detail *cd, (*bpp)[-1] = '\n'; } -static struct svc_export *svc_export_update(struct svc_export *new, - struct svc_export *old); -static struct svc_export *svc_export_lookup(struct svc_export *); - static int check_export(const struct path *path, int *flags, unsigned char *uuid) { struct inode *inode = d_inode(path->dentry); @@ -735,7 +1436,8 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) goto out4; err = 0; - nfsd4_setup_layout_type(&exp); + if (exp.ex_flags & NFSEXP_PNFS) + nfsd4_setup_layout_type(&exp); } expp = svc_export_lookup(&exp); @@ -907,6 +1609,7 @@ static const struct cache_detail svc_export_cache_template = { .name = "nfsd.export", .cache_put = svc_export_put, .cache_upcall = svc_export_upcall, + .cache_notify = svc_export_notify, .cache_request = svc_export_request, .cache_parse = svc_export_parse, .cache_show = svc_export_show, diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index 1e2b38ed1d35d..24511c3208db9 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -724,6 +724,52 @@ nfsd_file_close_inode_sync(struct inode *inode) nfsd_file_dispose_list(&dispose); } +/** + * nfsd_file_close_export - close cached file handles for an export + * @net: net namespace in which to operate + * @path: export path whose cached files should be closed + * + * Close out GC-managed nfsd_file entries whose underlying file is on + * the same filesystem as, and a descendant of, @path. nfsd_file + * entries do not carry an export reference, so the check uses the + * file's dentry ancestry. False positives (closing a cached handle + * that did not originate from the target export) are harmless -- the + * handle is simply reopened on the next access. + * + * Called from the NFSD_CMD_UNLOCK_EXPORT handler before revoking + * NFSv4 state, to ensure that cached file handles do not hold the + * filesystem busy. + */ +void nfsd_file_close_export(struct net *net, const struct path *path) +{ + struct rhashtable_iter iter; + struct nfsd_file *nf; + LIST_HEAD(dispose); + + rhltable_walk_enter(&nfsd_file_rhltable, &iter); + do { + rhashtable_walk_start(&iter); + + nf = rhashtable_walk_next(&iter); + while (!IS_ERR_OR_NULL(nf)) { + if (nf->nf_net == net && + test_bit(NFSD_FILE_GC, &nf->nf_flags) && + nf->nf_file && + file_inode(nf->nf_file)->i_sb == + path->dentry->d_sb && + is_subdir(nf->nf_file->f_path.dentry, + path->dentry)) + nfsd_file_cond_queue(nf, &dispose); + nf = rhashtable_walk_next(&iter); + } + + rhashtable_walk_stop(&iter); + } while (nf == ERR_PTR(-EAGAIN)); + rhashtable_walk_exit(&iter); + + nfsd_file_dispose_list(&dispose); +} + static int nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg, void *data) diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h index b383dbc5b9218..683b6437cacc2 100644 --- a/fs/nfsd/filecache.h +++ b/fs/nfsd/filecache.h @@ -70,6 +70,7 @@ struct net *nfsd_file_put_local(struct nfsd_file __rcu **nf); struct nfsd_file *nfsd_file_get(struct nfsd_file *nf); struct file *nfsd_file_file(struct nfsd_file *nf); void nfsd_file_close_inode_sync(struct inode *inode); +void nfsd_file_close_export(struct net *net, const struct path *path); void nfsd_file_net_dispose(struct nfsd_net *nn); bool nfsd_file_is_cached(struct inode *inode); __be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp, diff --git a/fs/nfsd/netlink.c b/fs/nfsd/netlink.c index 81c943345d13d..fbee3676d2539 100644 --- a/fs/nfsd/netlink.c +++ b/fs/nfsd/netlink.c @@ -12,11 +12,51 @@ #include <uapi/linux/nfsd_netlink.h> /* Common nested types */ +const struct nla_policy nfsd_auth_flavor_nl_policy[NFSD_A_AUTH_FLAVOR_FLAGS + 1] = { + [NFSD_A_AUTH_FLAVOR_PSEUDOFLAVOR] = { .type = NLA_U32, }, + [NFSD_A_AUTH_FLAVOR_FLAGS] = NLA_POLICY_MASK(NLA_U32, 0x3ffff), +}; + +const struct nla_policy nfsd_expkey_nl_policy[NFSD_A_EXPKEY_PATH + 1] = { + [NFSD_A_EXPKEY_SEQNO] = { .type = NLA_U64, }, + [NFSD_A_EXPKEY_CLIENT] = { .type = NLA_NUL_STRING, }, + [NFSD_A_EXPKEY_FSIDTYPE] = { .type = NLA_U8, }, + [NFSD_A_EXPKEY_FSID] = { .type = NLA_BINARY, }, + [NFSD_A_EXPKEY_NEGATIVE] = { .type = NLA_FLAG, }, + [NFSD_A_EXPKEY_EXPIRY] = { .type = NLA_U64, }, + [NFSD_A_EXPKEY_PATH] = { .type = NLA_NUL_STRING, }, +}; + +const struct nla_policy nfsd_fslocation_nl_policy[NFSD_A_FSLOCATION_PATH + 1] = { + [NFSD_A_FSLOCATION_HOST] = { .type = NLA_NUL_STRING, }, + [NFSD_A_FSLOCATION_PATH] = { .type = NLA_NUL_STRING, }, +}; + +const struct nla_policy nfsd_fslocations_nl_policy[NFSD_A_FSLOCATIONS_LOCATION + 1] = { + [NFSD_A_FSLOCATIONS_LOCATION] = NLA_POLICY_NESTED(nfsd_fslocation_nl_policy), +}; + const struct nla_policy nfsd_sock_nl_policy[NFSD_A_SOCK_TRANSPORT_NAME + 1] = { [NFSD_A_SOCK_ADDR] = { .type = NLA_BINARY, }, [NFSD_A_SOCK_TRANSPORT_NAME] = { .type = NLA_NUL_STRING, }, }; +const struct nla_policy nfsd_svc_export_nl_policy[NFSD_A_SVC_EXPORT_FSID + 1] = { + [NFSD_A_SVC_EXPORT_SEQNO] = { .type = NLA_U64, }, + [NFSD_A_SVC_EXPORT_CLIENT] = { .type = NLA_NUL_STRING, }, + [NFSD_A_SVC_EXPORT_PATH] = { .type = NLA_NUL_STRING, }, + [NFSD_A_SVC_EXPORT_NEGATIVE] = { .type = NLA_FLAG, }, + [NFSD_A_SVC_EXPORT_EXPIRY] = { .type = NLA_U64, }, + [NFSD_A_SVC_EXPORT_ANON_UID] = { .type = NLA_U32, }, + [NFSD_A_SVC_EXPORT_ANON_GID] = { .type = NLA_U32, }, + [NFSD_A_SVC_EXPORT_FSLOCATIONS] = NLA_POLICY_NESTED(nfsd_fslocations_nl_policy), + [NFSD_A_SVC_EXPORT_UUID] = { .type = NLA_BINARY, }, + [NFSD_A_SVC_EXPORT_SECINFO] = NLA_POLICY_NESTED(nfsd_auth_flavor_nl_policy), + [NFSD_A_SVC_EXPORT_XPRTSEC] = NLA_POLICY_MASK(NLA_U32, 0x7), + [NFSD_A_SVC_EXPORT_FLAGS] = NLA_POLICY_MASK(NLA_U32, 0x3ffff), + [NFSD_A_SVC_EXPORT_FSID] = { .type = NLA_S32, }, +}; + const struct nla_policy nfsd_version_nl_policy[NFSD_A_VERSION_ENABLED + 1] = { [NFSD_A_VERSION_MAJOR] = { .type = NLA_U32, }, [NFSD_A_VERSION_MINOR] = { .type = NLA_U32, }, @@ -48,6 +88,36 @@ static const struct nla_policy nfsd_pool_mode_set_nl_policy[NFSD_A_POOL_MODE_MOD [NFSD_A_POOL_MODE_MODE] = { .type = NLA_NUL_STRING, }, }; +/* NFSD_CMD_SVC_EXPORT_SET_REQS - do */ +static const struct nla_policy nfsd_svc_export_set_reqs_nl_policy[NFSD_A_SVC_EXPORT_REQS_REQUESTS + 1] = { + [NFSD_A_SVC_EXPORT_REQS_REQUESTS] = NLA_POLICY_NESTED(nfsd_svc_export_nl_policy), +}; + +/* NFSD_CMD_EXPKEY_SET_REQS - do */ +static const struct nla_policy nfsd_expkey_set_reqs_nl_policy[NFSD_A_EXPKEY_REQS_REQUESTS + 1] = { + [NFSD_A_EXPKEY_REQS_REQUESTS] = NLA_POLICY_NESTED(nfsd_expkey_nl_policy), +}; + +/* NFSD_CMD_CACHE_FLUSH - do */ +static const struct nla_policy nfsd_cache_flush_nl_policy[NFSD_A_CACHE_FLUSH_MASK + 1] = { + [NFSD_A_CACHE_FLUSH_MASK] = NLA_POLICY_MASK(NLA_U32, 0x3), +}; + +/* NFSD_CMD_UNLOCK_IP - do */ +static const struct nla_policy nfsd_unlock_ip_nl_policy[NFSD_A_UNLOCK_IP_ADDRESS + 1] = { + [NFSD_A_UNLOCK_IP_ADDRESS] = NLA_POLICY_MIN_LEN(16), +}; + +/* NFSD_CMD_UNLOCK_FILESYSTEM - do */ +static const struct nla_policy nfsd_unlock_filesystem_nl_policy[NFSD_A_UNLOCK_FILESYSTEM_PATH + 1] = { + [NFSD_A_UNLOCK_FILESYSTEM_PATH] = { .type = NLA_NUL_STRING, }, +}; + +/* NFSD_CMD_UNLOCK_EXPORT - do */ +static const struct nla_policy nfsd_unlock_export_nl_policy[NFSD_A_UNLOCK_EXPORT_PATH + 1] = { + [NFSD_A_UNLOCK_EXPORT_PATH] = { .type = NLA_NUL_STRING, }, +}; + /* Ops table for nfsd */ static const struct genl_split_ops nfsd_nl_ops[] = { { @@ -103,6 +173,63 @@ static const struct genl_split_ops nfsd_nl_ops[] = { .doit = nfsd_nl_pool_mode_get_doit, .flags = GENL_CMD_CAP_DO, }, + { + .cmd = NFSD_CMD_SVC_EXPORT_GET_REQS, + .dumpit = nfsd_nl_svc_export_get_reqs_dumpit, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DUMP, + }, + { + .cmd = NFSD_CMD_SVC_EXPORT_SET_REQS, + .doit = nfsd_nl_svc_export_set_reqs_doit, + .policy = nfsd_svc_export_set_reqs_nl_policy, + .maxattr = NFSD_A_SVC_EXPORT_REQS_REQUESTS, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = NFSD_CMD_EXPKEY_GET_REQS, + .dumpit = nfsd_nl_expkey_get_reqs_dumpit, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DUMP, + }, + { + .cmd = NFSD_CMD_EXPKEY_SET_REQS, + .doit = nfsd_nl_expkey_set_reqs_doit, + .policy = nfsd_expkey_set_reqs_nl_policy, + .maxattr = NFSD_A_EXPKEY_REQS_REQUESTS, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = NFSD_CMD_CACHE_FLUSH, + .doit = nfsd_nl_cache_flush_doit, + .policy = nfsd_cache_flush_nl_policy, + .maxattr = NFSD_A_CACHE_FLUSH_MASK, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = NFSD_CMD_UNLOCK_IP, + .doit = nfsd_nl_unlock_ip_doit, + .policy = nfsd_unlock_ip_nl_policy, + .maxattr = NFSD_A_UNLOCK_IP_ADDRESS, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = NFSD_CMD_UNLOCK_FILESYSTEM, + .doit = nfsd_nl_unlock_filesystem_doit, + .policy = nfsd_unlock_filesystem_nl_policy, + .maxattr = NFSD_A_UNLOCK_FILESYSTEM_PATH, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, + { + .cmd = NFSD_CMD_UNLOCK_EXPORT, + .doit = nfsd_nl_unlock_export_doit, + .policy = nfsd_unlock_export_nl_policy, + .maxattr = NFSD_A_UNLOCK_EXPORT_PATH, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, +}; + +static const struct genl_multicast_group nfsd_nl_mcgrps[] = { + [NFSD_NLGRP_NONE] = { "none", }, + [NFSD_NLGRP_EXPORTD] = { "exportd", }, }; struct genl_family nfsd_nl_family __ro_after_init = { @@ -113,4 +240,6 @@ struct genl_family nfsd_nl_family __ro_after_init = { .module = THIS_MODULE, .split_ops = nfsd_nl_ops, .n_split_ops = ARRAY_SIZE(nfsd_nl_ops), + .mcgrps = nfsd_nl_mcgrps, + .n_mcgrps = ARRAY_SIZE(nfsd_nl_mcgrps), }; diff --git a/fs/nfsd/netlink.h b/fs/nfsd/netlink.h index 478117ff6b8c0..af41aa0d4a65d 100644 --- a/fs/nfsd/netlink.h +++ b/fs/nfsd/netlink.h @@ -13,7 +13,12 @@ #include <uapi/linux/nfsd_netlink.h> /* Common nested types */ +extern const struct nla_policy nfsd_auth_flavor_nl_policy[NFSD_A_AUTH_FLAVOR_FLAGS + 1]; +extern const struct nla_policy nfsd_expkey_nl_policy[NFSD_A_EXPKEY_PATH + 1]; +extern const struct nla_policy nfsd_fslocation_nl_policy[NFSD_A_FSLOCATION_PATH + 1]; +extern const struct nla_policy nfsd_fslocations_nl_policy[NFSD_A_FSLOCATIONS_LOCATION + 1]; extern const struct nla_policy nfsd_sock_nl_policy[NFSD_A_SOCK_TRANSPORT_NAME + 1]; +extern const struct nla_policy nfsd_svc_export_nl_policy[NFSD_A_SVC_EXPORT_FSID + 1]; extern const struct nla_policy nfsd_version_nl_policy[NFSD_A_VERSION_ENABLED + 1]; int nfsd_nl_rpc_status_get_dumpit(struct sk_buff *skb, @@ -26,6 +31,22 @@ int nfsd_nl_listener_set_doit(struct sk_buff *skb, struct genl_info *info); int nfsd_nl_listener_get_doit(struct sk_buff *skb, struct genl_info *info); int nfsd_nl_pool_mode_set_doit(struct sk_buff *skb, struct genl_info *info); int nfsd_nl_pool_mode_get_doit(struct sk_buff *skb, struct genl_info *info); +int nfsd_nl_svc_export_get_reqs_dumpit(struct sk_buff *skb, + struct netlink_callback *cb); +int nfsd_nl_svc_export_set_reqs_doit(struct sk_buff *skb, + struct genl_info *info); +int nfsd_nl_expkey_get_reqs_dumpit(struct sk_buff *skb, + struct netlink_callback *cb); +int nfsd_nl_expkey_set_reqs_doit(struct sk_buff *skb, struct genl_info *info); +int nfsd_nl_cache_flush_doit(struct sk_buff *skb, struct genl_info *info); +int nfsd_nl_unlock_ip_doit(struct sk_buff *skb, struct genl_info *info); +int nfsd_nl_unlock_filesystem_doit(struct sk_buff *skb, struct genl_info *info); +int nfsd_nl_unlock_export_doit(struct sk_buff *skb, struct genl_info *info); + +enum { + NFSD_NLGRP_NONE, + NFSD_NLGRP_EXPORTD, +}; extern struct genl_family nfsd_nl_family; diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 42adc5461db03..aeda7a802bdf7 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c @@ -710,23 +710,46 @@ nfsd3_proc_pathconf(struct svc_rqst *rqstp) resp->p_name_max = 255; /* at least */ resp->p_no_trunc = 0; resp->p_chown_restricted = 1; - resp->p_case_insensitive = 0; - resp->p_case_preserving = 1; + resp->p_case_insensitive = false; + resp->p_case_preserving = true; resp->status = fh_verify(rqstp, &argp->fh, 0, NFSD_MAY_NOP); if (resp->status == nfs_ok) { struct super_block *sb = argp->fh.fh_dentry->d_sb; + int err; - /* Note that we don't care for remote fs's here */ - switch (sb->s_magic) { - case EXT2_SUPER_MAGIC: + if (sb->s_magic == EXT2_SUPER_MAGIC) { resp->p_link_max = EXT2_LINK_MAX; resp->p_name_max = EXT2_NAME_LEN; + } + + err = nfsd_get_case_info(argp->fh.fh_dentry, + &resp->p_case_insensitive, + &resp->p_case_preserving); + /* + * RFC 1813 lists NFS3ERR_STALE, NFS3ERR_BADHANDLE, and + * NFS3ERR_SERVERFAULT as the only PATHCONF errors. + */ + switch (err) { + case 0: + case -EOPNOTSUPP: + /* Both arms leave the output booleans valid. */ + break; + case -EACCES: + case -EPERM: + /* + * Policy denied the query. Report STALE so the + * handle is unusable without implying a server + * malfunction. + */ + resp->status = nfserr_stale; + break; + case -ESTALE: + resp->status = nfserr_stale; break; - case MSDOS_SUPER_MAGIC: - resp->p_case_insensitive = 1; - resp->p_case_preserving = 0; + default: + resp->status = nfserr_serverfault; break; } } diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index ba06d3d3e6dd9..71ba61b5d0a3a 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -126,7 +126,7 @@ idtoname_hash(struct ent *ent) static int idtoname_upcall(struct cache_detail *cd, struct cache_head *h) { - return sunrpc_cache_pipe_upcall_timeout(cd, h); + return sunrpc_cache_upcall_warn(cd, h); } static void @@ -306,7 +306,7 @@ nametoid_hash(struct ent *ent) static int nametoid_upcall(struct cache_detail *cd, struct cache_head *h) { - return sunrpc_cache_pipe_upcall_timeout(cd, h); + return sunrpc_cache_upcall_warn(cd, h); } static void diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 69e41105efdd5..c550b83f44323 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -2,7 +2,7 @@ /* * Copyright (c) 2014 Christoph Hellwig. */ -#include <linux/blkdev.h> +#include <linux/exportfs_block.h> #include <linux/kmod.h> #include <linux/file.h> #include <linux/jhash.h> @@ -127,30 +127,17 @@ nfsd4_set_deviceid(struct nfsd4_deviceid *id, const struct svc_fh *fhp, void nfsd4_setup_layout_type(struct svc_export *exp) { -#if defined(CONFIG_NFSD_BLOCKLAYOUT) || defined(CONFIG_NFSD_SCSILAYOUT) struct super_block *sb = exp->ex_path.mnt->mnt_sb; -#endif - - if (!(exp->ex_flags & NFSEXP_PNFS)) - return; + expfs_block_layouts_t block_supported = exportfs_layouts_supported(sb); -#ifdef CONFIG_NFSD_FLEXFILELAYOUT - exp->ex_layout_types |= 1 << LAYOUT_FLEX_FILES; -#endif -#ifdef CONFIG_NFSD_BLOCKLAYOUT - if (sb->s_export_op->get_uuid && - sb->s_export_op->map_blocks && - sb->s_export_op->commit_blocks) + if (IS_ENABLED(CONFIG_NFSD_FLEXFILELAYOUT)) + exp->ex_layout_types |= 1 << LAYOUT_FLEX_FILES; + if (IS_ENABLED(CONFIG_NFSD_BLOCKLAYOUT) && + (block_supported & EXPFS_BLOCK_IN_BAND_ID)) exp->ex_layout_types |= 1 << LAYOUT_BLOCK_VOLUME; -#endif -#ifdef CONFIG_NFSD_SCSILAYOUT - if (sb->s_export_op->map_blocks && - sb->s_export_op->commit_blocks && - sb->s_bdev && - sb->s_bdev->bd_disk->fops->pr_ops && - sb->s_bdev->bd_disk->fops->get_unique_id) + if (IS_ENABLED(CONFIG_NFSD_SCSILAYOUT) && + (block_supported & EXPFS_BLOCK_OUT_OF_BAND_ID)) exp->ex_layout_types |= 1 << LAYOUT_SCSI; -#endif } void nfsd4_close_layout(struct nfs4_layout_stateid *ls) @@ -247,6 +234,8 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate, get_nfs4_file(fp); stp->sc_file = fp; + if (parent->sc_export) + stp->sc_export = exp_get(parent->sc_export); ls = layoutstateid(stp); INIT_LIST_HEAD(&ls->ls_perclnt); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 6837b63d98645..2cf021b202a64 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -1168,6 +1168,7 @@ out_dec: void nfs4_put_stid(struct nfs4_stid *s) { + struct svc_export *exp = s->sc_export; struct nfs4_file *fp = s->sc_file; struct nfs4_client *clp = s->sc_client; @@ -1183,6 +1184,8 @@ nfs4_put_stid(struct nfs4_stid *s) nfs4_free_cpntf_statelist(clp->net, s); spin_unlock(&clp->cl_lock); s->sc_free(s); + if (exp) + exp_put(exp); if (fp) put_nfs4_file(fp); } @@ -1376,7 +1379,8 @@ static void destroy_delegation(struct nfs4_delegation *dp) * stateid or it's called from a laundromat thread (nfsd4_landromat()) that * determined that this specific state has expired and needs to be revoked * (both mark state with the appropriate stid sc_status mode). It is also - * assumed that a reference was taken on the @dp state. + * assumed that a reference was taken on the @dp state. This function + * consumes that reference. * * If this function finds that the @dp state is SC_STATUS_FREED it means * that a FREE_STATEID operation for this stateid has been processed and @@ -1745,21 +1749,119 @@ static struct nfs4_stid *find_one_sb_stid(struct nfs4_client *clp, struct super_block *sb, unsigned int sc_types) { - unsigned long id, tmp; + unsigned long id = 0; struct nfs4_stid *stid; spin_lock(&clp->cl_lock); - idr_for_each_entry_ul(&clp->cl_stateids, stid, tmp, id) + while ((stid = idr_get_next_ul(&clp->cl_stateids, &id)) != NULL) { if ((stid->sc_type & sc_types) && stid->sc_status == 0 && stid->sc_file->fi_inode->i_sb == sb) { refcount_inc(&stid->sc_count); break; } + id++; + } spin_unlock(&clp->cl_lock); return stid; } +/* + * Release the export reference an admin-revoked stateid holds, + * so the svc_export (and its vfsmount) is not pinned until the + * client issues FREE_STATEID. sc_export is no longer consulted + * once SC_STATUS_ADMIN_REVOKED is set. + */ +static void drop_stid_export(struct nfs4_client *clp, + struct nfs4_stid *stid) +{ + struct svc_export *exp; + + spin_lock(&clp->cl_lock); + exp = stid->sc_export; + stid->sc_export = NULL; + spin_unlock(&clp->cl_lock); + if (exp) + exp_put(exp); +} + +static void revoke_ol_stid(struct nfs4_client *clp, + struct nfs4_ol_stateid *stp) +{ + struct nfs4_stid *stid = &stp->st_stid; + + lockdep_assert_held(&stp->st_mutex); + spin_lock(&clp->cl_lock); + if (stid->sc_status == 0) { + stid->sc_status |= SC_STATUS_ADMIN_REVOKED; + atomic_inc(&clp->cl_admin_revoked); + spin_unlock(&clp->cl_lock); + if (stid->sc_type == SC_TYPE_LOCK) { + struct nfs4_lockowner *lo = + lockowner(stp->st_stateowner); + struct nfsd_file *nf; + + nf = find_any_file(stp->st_stid.sc_file); + if (nf) { + get_file(nf->nf_file); + filp_close(nf->nf_file, (fl_owner_t)lo); + nfsd_file_put(nf); + } + } + release_all_access(stp); + drop_stid_export(clp, stid); + } else + spin_unlock(&clp->cl_lock); +} + +static void revoke_one_stid(struct nfsd_net *nn, struct nfs4_client *clp, + struct nfs4_stid *stid) +{ + struct nfs4_ol_stateid *stp; + struct nfs4_delegation *dp; + + switch (stid->sc_type) { + case SC_TYPE_OPEN: + stp = openlockstateid(stid); + mutex_lock_nested(&stp->st_mutex, OPEN_STATEID_MUTEX); + revoke_ol_stid(clp, stp); + mutex_unlock(&stp->st_mutex); + break; + case SC_TYPE_LOCK: + stp = openlockstateid(stid); + mutex_lock_nested(&stp->st_mutex, LOCK_STATEID_MUTEX); + revoke_ol_stid(clp, stp); + mutex_unlock(&stp->st_mutex); + break; + case SC_TYPE_DELEG: + /* + * Extra reference guards against concurrent FREE_STATEID. + */ + refcount_inc(&stid->sc_count); + dp = delegstateid(stid); + spin_lock(&nn->deleg_lock); + if (!unhash_delegation_locked(dp, SC_STATUS_ADMIN_REVOKED)) + dp = NULL; + spin_unlock(&nn->deleg_lock); + if (dp) { + revoke_delegation(dp); + drop_stid_export(clp, stid); + } else + nfs4_put_stid(stid); + break; + case SC_TYPE_LAYOUT: + spin_lock(&clp->cl_lock); + if (stid->sc_status == 0) { + stid->sc_status |= SC_STATUS_ADMIN_REVOKED; + atomic_inc(&clp->cl_admin_revoked); + } + spin_unlock(&clp->cl_lock); + nfsd4_close_layout(layoutstateid(stid)); + drop_stid_export(clp, stid); + break; + } +} + /** * nfsd4_revoke_states - revoke all nfsv4 states associated with given filesystem * @nn: used to identify instance of nfsd (there is one per net namespace) @@ -1790,77 +1892,8 @@ void nfsd4_revoke_states(struct nfsd_net *nn, struct super_block *sb) struct nfs4_stid *stid = find_one_sb_stid(clp, sb, sc_types); if (stid) { - struct nfs4_ol_stateid *stp; - struct nfs4_delegation *dp; - struct nfs4_layout_stateid *ls; - spin_unlock(&nn->client_lock); - switch (stid->sc_type) { - case SC_TYPE_OPEN: - stp = openlockstateid(stid); - mutex_lock_nested(&stp->st_mutex, - OPEN_STATEID_MUTEX); - - spin_lock(&clp->cl_lock); - if (stid->sc_status == 0) { - stid->sc_status |= - SC_STATUS_ADMIN_REVOKED; - atomic_inc(&clp->cl_admin_revoked); - spin_unlock(&clp->cl_lock); - release_all_access(stp); - } else - spin_unlock(&clp->cl_lock); - mutex_unlock(&stp->st_mutex); - break; - case SC_TYPE_LOCK: - stp = openlockstateid(stid); - mutex_lock_nested(&stp->st_mutex, - LOCK_STATEID_MUTEX); - spin_lock(&clp->cl_lock); - if (stid->sc_status == 0) { - struct nfs4_lockowner *lo = - lockowner(stp->st_stateowner); - struct nfsd_file *nf; - - stid->sc_status |= - SC_STATUS_ADMIN_REVOKED; - atomic_inc(&clp->cl_admin_revoked); - spin_unlock(&clp->cl_lock); - nf = find_any_file(stp->st_stid.sc_file); - if (nf) { - get_file(nf->nf_file); - filp_close(nf->nf_file, - (fl_owner_t)lo); - nfsd_file_put(nf); - } - release_all_access(stp); - } else - spin_unlock(&clp->cl_lock); - mutex_unlock(&stp->st_mutex); - break; - case SC_TYPE_DELEG: - refcount_inc(&stid->sc_count); - dp = delegstateid(stid); - spin_lock(&nn->deleg_lock); - if (!unhash_delegation_locked( - dp, SC_STATUS_ADMIN_REVOKED)) - dp = NULL; - spin_unlock(&nn->deleg_lock); - if (dp) - revoke_delegation(dp); - break; - case SC_TYPE_LAYOUT: - ls = layoutstateid(stid); - spin_lock(&clp->cl_lock); - if (stid->sc_status == 0) { - stid->sc_status |= - SC_STATUS_ADMIN_REVOKED; - atomic_inc(&clp->cl_admin_revoked); - } - spin_unlock(&clp->cl_lock); - nfsd4_close_layout(ls); - break; - } + revoke_one_stid(nn, clp, stid); nfs4_put_stid(stid); spin_lock(&nn->client_lock); if (clp->cl_minorversion == 0) @@ -1878,6 +1911,73 @@ void nfsd4_revoke_states(struct nfsd_net *nn, struct super_block *sb) spin_unlock(&nn->client_lock); } +static struct nfs4_stid *find_one_export_stid(struct nfs4_client *clp, + const struct path *path, + unsigned int sc_types) +{ + unsigned long id = 0; + struct nfs4_stid *stid; + + spin_lock(&clp->cl_lock); + while ((stid = idr_get_next_ul(&clp->cl_stateids, &id)) != NULL) { + if ((stid->sc_type & sc_types) && + stid->sc_status == 0 && + stid->sc_export && + path_equal(&stid->sc_export->ex_path, path)) { + refcount_inc(&stid->sc_count); + break; + } + id++; + } + spin_unlock(&clp->cl_lock); + return stid; +} + +/** + * nfsd4_revoke_export_states - revoke nfsv4 states acquired through an export + * @nn: used to identify instance of nfsd (there is one per net namespace) + * @path: export path whose states should be revoked + * + * All nfs4 states (open, lock, delegation, layout) acquired through any + * export matching @path are revoked, regardless of which client holds + * them. Matching is by path identity (dentry + vfsmount), so multiple + * svc_export objects for the same path -- one per auth_domain -- are + * handled correctly. + * + * Userspace (exportfs -u) sends this after removing the last client + * for a path, enabling the underlying filesystem to be unmounted. + */ +void nfsd4_revoke_export_states(struct nfsd_net *nn, const struct path *path) +{ + unsigned int idhashval; + unsigned int sc_types; + + sc_types = SC_TYPE_OPEN | SC_TYPE_LOCK | SC_TYPE_DELEG | SC_TYPE_LAYOUT; + + spin_lock(&nn->client_lock); + for (idhashval = 0; idhashval < CLIENT_HASH_SIZE; idhashval++) { + struct list_head *head = &nn->conf_id_hashtbl[idhashval]; + struct nfs4_client *clp; + retry: + list_for_each_entry(clp, head, cl_idhash) { + struct nfs4_stid *stid = find_one_export_stid( + clp, path, + sc_types); + if (stid) { + spin_unlock(&nn->client_lock); + revoke_one_stid(nn, clp, stid); + nfs4_put_stid(stid); + spin_lock(&nn->client_lock); + if (clp->cl_minorversion == 0) + nn->nfs40_last_revoke = + ktime_get_boottime_seconds(); + goto retry; + } + } + } + spin_unlock(&nn->client_lock); +} + static inline int hash_sessionid(struct nfs4_sessionid *sessionid) { @@ -5049,6 +5149,7 @@ static void nfsd4_drop_revoked_stid(struct nfs4_stid *s) { struct nfs4_client *cl = s->sc_client; LIST_HEAD(reaplist); + struct nfs4_layout_stateid *ls; struct nfs4_ol_stateid *stp; struct nfs4_delegation *dp; bool unhashed; @@ -5074,6 +5175,12 @@ static void nfsd4_drop_revoked_stid(struct nfs4_stid *s) spin_unlock(&cl->cl_lock); nfs4_put_stid(s); break; + case SC_TYPE_LAYOUT: + ls = layoutstateid(s); + list_del_init(&ls->ls_perclnt); + spin_unlock(&cl->cl_lock); + nfs4_put_stid(s); + break; default: spin_unlock(&cl->cl_lock); } @@ -6157,6 +6264,8 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp, dp = alloc_init_deleg(clp, fp, odstate, dl_type); if (!dp) goto out_delegees; + if (stp->st_stid.sc_export) + dp->dl_stid.sc_export = exp_get(stp->st_stid.sc_export); fl = nfs4_alloc_init_lease(dp); if (!fl) @@ -6490,8 +6599,11 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf goto out; } - if (!open->op_stp) + if (!open->op_stp) { new_stp = true; + stp->st_stid.sc_export = + exp_get(current_fh->fh_export); + } } /* @@ -8187,6 +8299,9 @@ retry: stp->st_stateowner = nfs4_get_stateowner(&lo->lo_owner); get_nfs4_file(fp); stp->st_stid.sc_file = fp; + if (open_stp->st_stid.sc_export) + stp->st_stid.sc_export = + exp_get(open_stp->st_stid.sc_export); stp->st_access_bmap = 0; stp->st_deny_bmap = open_stp->st_deny_bmap; stp->st_openstp = open_stp; @@ -9517,6 +9632,9 @@ nfsd_get_dir_deleg(struct nfsd4_compound_state *cstate, dp = alloc_init_deleg(clp, fp, NULL, NFS4_OPEN_DELEGATE_READ); if (!dp) goto out_delegees; + if (cstate->current_fh.fh_export) + dp->dl_stid.sc_export = + exp_get(cstate->current_fh.fh_export); fl = nfs4_alloc_init_lease(dp); if (!fl) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 2a0946c630e1d..20355dc3f1d16 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3158,6 +3158,8 @@ struct nfsd4_fattr_args { u32 rdattr_err; bool contextsupport; bool ignore_crossmnt; + bool case_insensitive; + bool case_preserving; }; typedef __be32(*nfsd4_enc_attr)(struct xdr_stream *xdr, @@ -3356,6 +3358,33 @@ static __be32 nfsd4_encode_fattr4_acl(struct xdr_stream *xdr, return nfs_ok; } +static __be32 nfsd4_encode_fattr4_case_insensitive(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + return nfsd4_encode_bool(xdr, args->case_insensitive); +} + +static __be32 nfsd4_encode_fattr4_case_preserving(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + return nfsd4_encode_bool(xdr, args->case_preserving); +} + +static __be32 nfsd4_encode_fattr4_homogeneous(struct xdr_stream *xdr, + const struct nfsd4_fattr_args *args) +{ + /* + * Casefold-capable filesystems (e.g. ext4 or f2fs with the + * casefold feature) attach a Unicode encoding at mount time + * but apply case folding per directory. The per-file-system + * case_insensitive and case_preserving values can therefore + * legitimately differ across objects that share the same fsid. + * Report FATTR4_HOMOGENEOUS = FALSE on such filesystems to + * keep that variation consistent with RFC 8881 Section 5.8.2.16. + */ + return nfsd4_encode_bool(xdr, !sb_has_encoding(args->dentry->d_sb)); +} + static __be32 nfsd4_encode_fattr4_filehandle(struct xdr_stream *xdr, const struct nfsd4_fattr_args *args) { @@ -3748,8 +3777,8 @@ static const nfsd4_enc_attr nfsd4_enc_fattr4_encode_ops[] = { [FATTR4_ACLSUPPORT] = nfsd4_encode_fattr4_aclsupport, [FATTR4_ARCHIVE] = nfsd4_encode_fattr4__noop, [FATTR4_CANSETTIME] = nfsd4_encode_fattr4__true, - [FATTR4_CASE_INSENSITIVE] = nfsd4_encode_fattr4__false, - [FATTR4_CASE_PRESERVING] = nfsd4_encode_fattr4__true, + [FATTR4_CASE_INSENSITIVE] = nfsd4_encode_fattr4_case_insensitive, + [FATTR4_CASE_PRESERVING] = nfsd4_encode_fattr4_case_preserving, [FATTR4_CHOWN_RESTRICTED] = nfsd4_encode_fattr4__true, [FATTR4_FILEHANDLE] = nfsd4_encode_fattr4_filehandle, [FATTR4_FILEID] = nfsd4_encode_fattr4_fileid, @@ -3758,7 +3787,7 @@ static const nfsd4_enc_attr nfsd4_enc_fattr4_encode_ops[] = { [FATTR4_FILES_TOTAL] = nfsd4_encode_fattr4_files_total, [FATTR4_FS_LOCATIONS] = nfsd4_encode_fattr4_fs_locations, [FATTR4_HIDDEN] = nfsd4_encode_fattr4__noop, - [FATTR4_HOMOGENEOUS] = nfsd4_encode_fattr4__true, + [FATTR4_HOMOGENEOUS] = nfsd4_encode_fattr4_homogeneous, [FATTR4_MAXFILESIZE] = nfsd4_encode_fattr4_maxfilesize, [FATTR4_MAXLINK] = nfsd4_encode_fattr4_maxlink, [FATTR4_MAXNAME] = nfsd4_encode_fattr4_maxname, @@ -3854,13 +3883,16 @@ static const nfsd4_enc_attr nfsd4_enc_fattr4_encode_ops[] = { /* * Note: @fhp can be NULL; in this case, we might have to compose the filehandle - * ourselves. + * ourselves. @case_cache is NULL for callers that encode a single dentry + * (GETATTR, the buffer wrapper); READDIR passes a per-request cache so + * non-directory children share the parent's case-folding probe result. */ static __be32 nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, const u32 *bmval, - int ignore_crossmnt) + int ignore_crossmnt, + struct nfsd_case_attrs_cache *case_cache) { DECLARE_BITMAP(attr_bitmap, ARRAY_SIZE(nfsd4_enc_fattr4_encode_ops)); struct nfs4_delegation *dp = NULL; @@ -3968,6 +4000,47 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, args.fhp = tempfh; } else args.fhp = fhp; + if (attrmask[0] & (FATTR4_WORD0_CASE_INSENSITIVE | + FATTR4_WORD0_CASE_PRESERVING)) { + /* + * In a batched encoder (READDIR) every non-directory + * child shares the same case-folding answer, so the + * directory being read is probed once and the result is + * cached. The probe targets case_cache->dir, the held + * readdir filehandle's dentry, instead of the child's + * locklessly-acquired dentry, which a concurrent rename + * could move under an unrelated parent. Directory + * entries are queried directly because casefold-capable + * filesystems answer per directory. + * + * Per RFC 8881 Section 18.7.3, an attribute advertised + * in SUPPORTED_ATTRS must come back with a value or the + * GETATTR must fail. nfsd_get_case_info() fills POSIX + * defaults and returns -EOPNOTSUPP when the underlying + * filesystem does not expose case state; encode those + * defaults so the reply agrees with what SUPPORTED_ATTRS + * advertises. Other errors fail the operation as the + * spec requires. + */ + if (case_cache && !d_is_dir(dentry)) { + if (!case_cache->valid) { + err = nfsd_get_case_info(case_cache->dir, + &case_cache->insensitive, + &case_cache->preserving); + if (err && err != -EOPNOTSUPP) + goto out_nfserr; + case_cache->valid = true; + } + args.case_insensitive = case_cache->insensitive; + args.case_preserving = case_cache->preserving; + } else { + err = nfsd_get_case_info(dentry, + &args.case_insensitive, + &args.case_preserving); + if (err && err != -EOPNOTSUPP) + goto out_nfserr; + } + } if (attrmask[0] & FATTR4_WORD0_ACL) { err = nfsd4_get_nfs4_acl(rqstp, dentry, &args.acl); @@ -4124,7 +4197,7 @@ __be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words, svcxdr_init_encode_from_buffer(&xdr, &dummy, *p, words << 2); ret = nfsd4_encode_fattr4(rqstp, &xdr, fhp, exp, dentry, bmval, - ignore_crossmnt); + ignore_crossmnt, NULL); *p = xdr.p; return ret; } @@ -4162,6 +4235,7 @@ nfsd4_encode_entry4_fattr(struct nfsd4_readdir *cd, const char *name, struct dentry *dentry; __be32 nfserr; int ignore_crossmnt = 0; + bool crossed = false; dentry = lookup_one_positive_unlocked(&nop_mnt_idmap, &QSTR_LEN(name, namlen), @@ -4198,11 +4272,18 @@ nfsd4_encode_entry4_fattr(struct nfsd4_readdir *cd, const char *name, nfserr = check_nfsd_access(exp, cd->rd_rqstp, false); if (nfserr) goto out_put; + crossed = true; } out_encode: + /* + * A crossed entry no longer shares a parent with the directory + * being read, so it must neither consume nor populate the + * per-readdir case-folding cache. + */ nfserr = nfsd4_encode_fattr4(cd->rd_rqstp, cd->xdr, NULL, exp, dentry, - cd->rd_bmval, ignore_crossmnt); + cd->rd_bmval, ignore_crossmnt, + crossed ? NULL : &cd->rd_case_cache); out_put: dput(dentry); exp_put(exp); @@ -4449,7 +4530,7 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, /* obj_attributes */ return nfsd4_encode_fattr4(resp->rqstp, xdr, fhp, fhp->fh_export, - fhp->fh_dentry, getattr->ga_bmval, 0); + fhp->fh_dentry, getattr->ga_bmval, 0, NULL); } static __be32 @@ -4976,6 +5057,8 @@ static __be32 nfsd4_encode_dirlist4(struct xdr_stream *xdr, readdir->rd_maxcount = maxcount; readdir->common.err = 0; readdir->cookie_offset = 0; + readdir->rd_case_cache.dir = readdir->rd_fhp->fh_dentry; + readdir->rd_case_cache.valid = false; offset = readdir->rd_cookie; status = nfsd_readdir(readdir->rd_rqstp, readdir->rd_fhp, &offset, &readdir->common, nfsd4_encode_entry4); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 04e3954d54bd9..92f4c333f0ff5 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -10,6 +10,7 @@ #include <linux/ctype.h> #include <linux/fs_context.h> +#include <linux/sunrpc/cache.h> #include <linux/sunrpc/svcsock.h> #include <linux/lockd/bind.h> #include <linux/sunrpc/addr.h> @@ -246,7 +247,7 @@ static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size) if (rpc_pton(net, fo_path, size, sap, salen) == 0) return -EINVAL; - trace_nfsd_ctl_unlock_ip(net, buf); + trace_nfsd_ctl_unlock_ip(net, sap, svc_addr_len(sap)); return nlmsvc_unlock_all_by_ip(sap); } @@ -1412,6 +1413,21 @@ static int create_proc_exports_entry(void) unsigned int nfsd_net_id; +struct nfsd_genl_rqstp { + struct sockaddr rq_daddr; + struct sockaddr rq_saddr; + unsigned long rq_flags; + ktime_t rq_stime; + __be32 rq_xid; + u32 rq_vers; + u32 rq_prog; + u32 rq_proc; + + /* NFSv4 compound */ + u32 rq_opcnt; + u32 rq_opnum[16]; +}; + static int nfsd_genl_rpc_status_compose_msg(struct sk_buff *skb, struct netlink_callback *cb, struct nfsd_genl_rqstp *genl_rqstp) @@ -2198,6 +2214,193 @@ err_free_msg: } /** + * nfsd_nl_cache_flush_doit - flush nfsd caches via netlink + * @skb: reply buffer + * @info: netlink metadata and command arguments + * + * Flush the svc_export and/or expkey caches. If NFSD_A_CACHE_FLUSH_MASK + * is provided, only flush the caches indicated by the bitmask (bit 0 = + * svc_export, bit 1 = expkey). If omitted, flush both. + * + * Return 0 on success or a negative errno. + */ +int nfsd_nl_cache_flush_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + u32 mask = ~0U; + + if (info->attrs[NFSD_A_CACHE_FLUSH_MASK]) + mask = nla_get_u32(info->attrs[NFSD_A_CACHE_FLUSH_MASK]); + + mutex_lock(&nfsd_mutex); + + if ((mask & NFSD_CACHE_TYPE_SVC_EXPORT) && + nn->svc_export_cache) + cache_purge(nn->svc_export_cache); + + if ((mask & NFSD_CACHE_TYPE_EXPKEY) && + nn->svc_expkey_cache) + cache_purge(nn->svc_expkey_cache); + + mutex_unlock(&nfsd_mutex); + + return 0; +} + +int nfsd_cache_notify(struct cache_detail *cd, struct cache_head *h, u32 cache_type) +{ + struct genlmsghdr *hdr; + struct sk_buff *msg; + + if (!genl_has_listeners(&nfsd_nl_family, cd->net, NFSD_NLGRP_EXPORTD)) + return -ENOLINK; + + msg = genlmsg_new(nla_total_size(sizeof(u32)), GFP_KERNEL); + if (!msg) + return -ENOMEM; + + hdr = genlmsg_put(msg, 0, 0, &nfsd_nl_family, 0, NFSD_CMD_CACHE_NOTIFY); + if (!hdr) { + nlmsg_free(msg); + return -ENOMEM; + } + + if (nla_put_u32(msg, NFSD_A_CACHE_NOTIFY_CACHE_TYPE, cache_type)) { + nlmsg_free(msg); + return -ENOMEM; + } + + genlmsg_end(msg, hdr); + return genlmsg_multicast_netns(&nfsd_nl_family, cd->net, msg, 0, + NFSD_NLGRP_EXPORTD, GFP_KERNEL); +} + +/** + * nfsd_nl_unlock_ip_doit - release NLM locks held by an IP address + * @skb: reply buffer + * @info: netlink metadata and command arguments + * + * Return: 0 on success or a negative errno. + */ +int nfsd_nl_unlock_ip_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct sockaddr *sap; + + if (GENL_REQ_ATTR_CHECK(info, NFSD_A_UNLOCK_IP_ADDRESS)) + return -EINVAL; + sap = nla_data(info->attrs[NFSD_A_UNLOCK_IP_ADDRESS]); + switch (sap->sa_family) { + case AF_INET: + if (nla_len(info->attrs[NFSD_A_UNLOCK_IP_ADDRESS]) < + sizeof(struct sockaddr_in)) + return -EINVAL; + break; + case AF_INET6: + if (nla_len(info->attrs[NFSD_A_UNLOCK_IP_ADDRESS]) < + sizeof(struct sockaddr_in6)) + return -EINVAL; + break; + default: + return -EAFNOSUPPORT; + } + /* + * nlmsvc_unlock_all_by_ip() releases matching locks + * across all network namespaces because lockd operates + * a single global instance. + */ + trace_nfsd_ctl_unlock_ip(genl_info_net(info), sap, + svc_addr_len(sap)); + return nlmsvc_unlock_all_by_ip(sap); +} + +/** + * nfsd_nl_unlock_filesystem_doit - revoke NFS state under a filesystem path + * @skb: reply buffer + * @info: netlink metadata and command arguments + * + * Return: 0 on success or a negative errno. + */ +int nfsd_nl_unlock_filesystem_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct path path; + int error; + + if (GENL_REQ_ATTR_CHECK(info, NFSD_A_UNLOCK_FILESYSTEM_PATH)) + return -EINVAL; + + trace_nfsd_ctl_unlock_fs(net, + nla_data(info->attrs[NFSD_A_UNLOCK_FILESYSTEM_PATH])); + error = kern_path( + nla_data(info->attrs[NFSD_A_UNLOCK_FILESYSTEM_PATH]), + 0, &path); + if (error) + return error; + + nfsd4_cancel_copy_by_sb(net, path.dentry->d_sb); + error = nlmsvc_unlock_all_by_sb(path.dentry->d_sb); + + mutex_lock(&nfsd_mutex); + if (nn->nfsd_serv) + nfsd4_revoke_states(nn, path.dentry->d_sb); + else + error = -EINVAL; + mutex_unlock(&nfsd_mutex); + + path_put(&path); + return error; +} + +/** + * nfsd_nl_unlock_export_doit - revoke NFSv4 state for an export path + * @skb: reply buffer + * @info: netlink metadata and command arguments + * + * Revokes all NFSv4 state (opens, locks, delegations, layouts) acquired + * through any export of the given path, regardless of which client holds + * the state. Userspace (exportfs -u) sends this after removing the last + * client for a path so the underlying filesystem can be unmounted. + * + * Unlike NFSD_CMD_UNLOCK_FILESYSTEM, which operates at superblock + * granularity, this command revokes only the state associated with + * exports of a specific path. + * + * Return: 0 on success or a negative errno. + */ +int nfsd_nl_unlock_export_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + struct path path; + int error; + + if (GENL_REQ_ATTR_CHECK(info, NFSD_A_UNLOCK_EXPORT_PATH)) + return -EINVAL; + + trace_nfsd_ctl_unlock_export(net, + nla_data(info->attrs[NFSD_A_UNLOCK_EXPORT_PATH])); + error = kern_path( + nla_data(info->attrs[NFSD_A_UNLOCK_EXPORT_PATH]), + 0, &path); + if (error) + return error; + + mutex_lock(&nfsd_mutex); + if (nn->nfsd_serv) { + nfsd_file_close_export(net, &path); + nfsd4_revoke_export_states(nn, &path); + } else + error = -EINVAL; + mutex_unlock(&nfsd_mutex); + + path_put(&path); + return error; +} + +/** * nfsd_net_init - Prepare the nfsd_net portion of a new net namespace * @net: a freshly-created network namespace * diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 7c009f07c90b5..11bce03b90316 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -45,11 +45,10 @@ bool nfsd_support_version(int vers); /* * Default and maximum payload size (NFS READ or WRITE), in bytes. - * The default is historical, and the maximum is an implementation - * limit. + * The maximum is an implementation limit. */ enum { - NFSSVC_DEFBLKSIZE = 1 * 1024 * 1024, + NFSSVC_DEFBLKSIZE = 4 * 1024 * 1024, NFSSVC_MAXBLKSIZE = RPCSVC_MAXPAYLOAD, }; @@ -60,21 +59,6 @@ struct readdir_cd { /* Maximum number of operations per session compound */ #define NFSD_MAX_OPS_PER_COMPOUND 200 -struct nfsd_genl_rqstp { - struct sockaddr rq_daddr; - struct sockaddr rq_saddr; - unsigned long rq_flags; - ktime_t rq_stime; - __be32 rq_xid; - u32 rq_vers; - u32 rq_prog; - u32 rq_proc; - - /* NFSv4 compound */ - u32 rq_opcnt; - u32 rq_opnum[16]; -}; - extern struct svc_program nfsd_programs[]; extern const struct svc_version nfsd_version2, nfsd_version3, nfsd_version4; extern struct mutex nfsd_mutex; @@ -123,7 +107,7 @@ struct dentry *nfsd_client_mkdir(struct nfsd_net *nn, const struct tree_descr *, struct dentry **fdentries); void nfsd_client_rmdir(struct dentry *dentry); - +int nfsd_cache_notify(struct cache_detail *cd, struct cache_head *h, u32 cache_type); #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) #ifdef CONFIG_NFSD_V2_ACL diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index c5ccea64c2817..dec83e92650d1 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -145,6 +145,7 @@ struct nfs4_stid { spinlock_t sc_lock; struct nfs4_client *sc_client; struct nfs4_file *sc_file; + struct svc_export *sc_export; void (*sc_free)(struct nfs4_stid *); }; @@ -862,6 +863,7 @@ struct nfsd_file *find_any_file(struct nfs4_file *f); #ifdef CONFIG_NFSD_V4 void nfsd4_revoke_states(struct nfsd_net *nn, struct super_block *sb); +void nfsd4_revoke_export_states(struct nfsd_net *nn, const struct path *path); void nfsd4_cancel_copy_by_sb(struct net *net, struct super_block *sb); int nfsd_net_cb_init(struct nfsd_net *nn); void nfsd_net_cb_shutdown(struct nfsd_net *nn); @@ -869,6 +871,10 @@ void nfsd_net_cb_shutdown(struct nfsd_net *nn); static inline void nfsd4_revoke_states(struct nfsd_net *nn, struct super_block *sb) { } +static inline void nfsd4_revoke_export_states(struct nfsd_net *nn, + const struct path *path) +{ +} static inline void nfsd4_cancel_copy_by_sb(struct net *net, struct super_block *sb) { } diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h index b631a472222be..1c5a1e50f9465 100644 --- a/fs/nfsd/trace.h +++ b/fs/nfsd/trace.h @@ -1985,19 +1985,20 @@ TRACE_EVENT(nfsd_cb_recall_any_done, TRACE_EVENT(nfsd_ctl_unlock_ip, TP_PROTO( const struct net *net, - const char *address + const struct sockaddr *addr, + const unsigned int addrlen ), - TP_ARGS(net, address), + TP_ARGS(net, addr, addrlen), TP_STRUCT__entry( __field(unsigned int, netns_ino) - __string(address, address) + __sockaddr(addr, addrlen) ), TP_fast_assign( __entry->netns_ino = net->ns.inum; - __assign_str(address); + __assign_sockaddr(addr, addr, addrlen); ), - TP_printk("address=%s", - __get_str(address) + TP_printk("addr=%pISpc", + __get_sockaddr(addr) ) ); @@ -2020,6 +2021,25 @@ TRACE_EVENT(nfsd_ctl_unlock_fs, ) ); +TRACE_EVENT(nfsd_ctl_unlock_export, + TP_PROTO( + const struct net *net, + const char *path + ), + TP_ARGS(net, path), + TP_STRUCT__entry( + __field(unsigned int, netns_ino) + __string(path, path) + ), + TP_fast_assign( + __entry->netns_ino = net->ns.inum; + __assign_str(path); + ), + TP_printk("path=%s", + __get_str(path) + ) +); + TRACE_EVENT(nfsd_ctl_filehandle, TP_PROTO( const struct net *net, diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index eafdf7b7890fd..ba97e287c0072 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -32,6 +32,7 @@ #include <linux/writeback.h> #include <linux/security.h> #include <linux/sunrpc/xdr.h> +#include <linux/fileattr.h> #include "xdr3.h" @@ -2891,3 +2892,88 @@ nfsd_permission(struct svc_cred *cred, struct svc_export *exp, return err? nfserrno(err) : 0; } + +/** + * nfsd_get_case_info - get case sensitivity info for a dentry + * @dentry: dentry to query + * @case_insensitive: set to true if name comparison ignores case + * @case_preserving: set to true if case is preserved on disk + * + * On casefold-capable filesystems the flag lives on the directory, + * not on its entries, so for a non-directory @dentry the parent is + * queried instead. A directory (including an export root, whose + * parent lies outside the export) is queried as-is so its own + * contents' lookup behavior is reported. NFSD advertises + * fattr4_homogeneous as FALSE, so per-directory answers may differ + * within an export. + * + * The probe runs with kernel credentials. case_insensitive and + * case_preserving describe the directory's structural lookup + * behavior, not the caller's identity; running under the calling + * client's mapped credentials would let per-client MAC policy on + * the parent directory turn this query into NFS4ERR_ACCESS even + * though the underlying property is the same for every client. + * + * When the filesystem does not expose case-folding state (no + * ->fileattr_get, or the callback returns -EOPNOTSUPP / + * -ENOIOCTLCMD / -ENOTTY / -EINVAL), the outputs are filled with + * POSIX defaults (case-sensitive, case-preserving) on the premise + * that a filesystem with case-folding support wires up + * fileattr_get. + * + * Return: 0 with outputs filled, -EOPNOTSUPP with outputs filled + * to POSIX defaults, or a negative errno (e.g., -EIO, + * -ESTALE, -ENOMEM) with outputs unmodified. + */ +int +nfsd_get_case_info(struct dentry *dentry, bool *case_insensitive, + bool *case_preserving) +{ + struct file_kattr fa = {}; + const struct cred *saved; + struct cred *probe; + struct dentry *cd; + bool put = false; + int err; + + if (d_is_dir(dentry)) { + cd = dentry; + } else { + cd = dget_parent(dentry); + put = true; + } + + probe = prepare_kernel_cred(&init_task); + if (!probe) { + err = -ENOMEM; + goto out; + } + saved = override_creds(probe); + + err = vfs_fileattr_get(cd, &fa); + + put_cred(revert_creds(saved)); +out: + if (put) + dput(cd); + switch (err) { + case 0: + *case_insensitive = fa.fsx_xflags & FS_XFLAG_CASEFOLD; + *case_preserving = + !(fa.fsx_xflags & FS_XFLAG_CASENONPRESERVING); + return 0; + case -EINVAL: + case -ENOTTY: + case -ENOIOCTLCMD: + case -EOPNOTSUPP: + /* + * Filesystem does not expose case state. + * Report POSIX defaults. + */ + *case_insensitive = false; + *case_preserving = true; + return -EOPNOTSUPP; + default: + return err; + } +} diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index 702a844f2106b..e09ea04a51b99 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -156,6 +156,9 @@ __be32 nfsd_readdir(struct svc_rqst *, struct svc_fh *, loff_t *, struct readdir_cd *, nfsd_filldir_t); __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *, struct kstatfs *, int access); +int nfsd_get_case_info(struct dentry *dentry, + bool *case_insensitive, + bool *case_preserving); __be32 nfsd_permission(struct svc_cred *cred, struct svc_export *exp, struct dentry *dentry, int acc); diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h index 522067b7fd755..a7c9714b0b0e1 100644 --- a/fs/nfsd/xdr3.h +++ b/fs/nfsd/xdr3.h @@ -209,8 +209,8 @@ struct nfsd3_pathconfres { __u32 p_name_max; __u32 p_no_trunc; __u32 p_chown_restricted; - __u32 p_case_insensitive; - __u32 p_case_preserving; + bool p_case_insensitive; + bool p_case_preserving; }; struct nfsd3_commitres { diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 9a4124c77e049..85574b2a139af 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -432,6 +432,19 @@ struct nfsd4_read { u32 rd_eof; /* response */ }; +/* + * Cache the case-folding properties of @dir so a batched encoder + * (e.g., READDIR) does not re-probe per child. @dir is the + * directory being read, held by the request, so it is stable + * against rename for the duration of the cache's lifetime. + */ +struct nfsd_case_attrs_cache { + struct dentry *dir; + bool valid; + bool insensitive; + bool preserving; +}; + struct nfsd4_readdir { u64 rd_cookie; /* request */ nfs4_verifier rd_verf; /* request */ @@ -444,6 +457,7 @@ struct nfsd4_readdir { struct readdir_cd common; struct xdr_stream *xdr; int cookie_offset; + struct nfsd_case_attrs_cache rd_case_cache; }; struct nfsd4_release_lockowner { diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 2dac70b99b0d7..7e2f330fd2837 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -14,6 +14,9 @@ #include <linux/fsnotify_backend.h> #include "fsnotify.h" +#define CREATE_TRACE_POINTS +#include <trace/events/fsnotify.h> + /* * Clear all of the marks on an inode when it is being evicted from core */ @@ -504,6 +507,8 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, int ret = 0; __u32 test_mask, marks_mask = 0; + trace_fsnotify(mask, data, data_type, dir, file_name, inode, cookie); + if (path) mnt = real_mount(path->mnt); diff --git a/fs/notify/mark.c b/fs/notify/mark.c index e256b420100dc..b2640d836a712 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -343,6 +343,35 @@ void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn) fsnotify_conn_set_children_dentry_flags(conn); } +/** + * fsnotify_modify_mark_mask - set and/or clear flags in a mark's mask + * @mark: mark to be modified + * @set: bits to be set in mask + * @clear: bits to be cleared in mask + * + * Modify a fsnotify_mark mask as directed, and update its associated conn. + * The caller is expected to hold a reference to the mark. + */ +void fsnotify_modify_mark_mask(struct fsnotify_mark *mark, u32 set, u32 clear) +{ + bool recalc = false; + u32 mask; + + WARN_ON_ONCE(clear & set); + + spin_lock(&mark->lock); + mask = mark->mask; + mark->mask |= set; + mark->mask &= ~clear; + if (mark->mask != mask) + recalc = true; + spin_unlock(&mark->lock); + + if (recalc) + fsnotify_recalc_mask(mark->connector); +} +EXPORT_SYMBOL_GPL(fsnotify_modify_mark_mask); + /* Free all connectors queued for freeing once SRCU period ends */ static void fsnotify_connector_destroy_workfn(struct work_struct *work) { diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index b041639ab406f..ad9350d7fc3fd 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -181,6 +181,34 @@ long ntfs_compat_ioctl(struct file *filp, u32 cmd, unsigned long arg) #endif /* + * ntfs_fileattr_get - inode_operations::fileattr_get + */ +int ntfs_fileattr_get(struct dentry *dentry, struct file_kattr *fa) +{ + struct inode *inode = d_inode(dentry); + struct ntfs_sb_info *sbi = inode->i_sb->s_fs_info; + + /* Avoid any operation if inode is bad. */ + if (unlikely(is_bad_ni(ntfs_i(inode)))) + return -EINVAL; + + /* + * NTFS preserves case (the default). Case sensitivity depends on + * mount options: with "nocase", NTFS is case-insensitive; + * otherwise it is case-sensitive. + */ + if (sbi->options->nocase) { + fa->fsx_xflags |= FS_XFLAG_CASEFOLD; + fa->flags |= FS_CASEFOLD_FL; + } + if (inode->i_flags & S_IMMUTABLE) { + fa->fsx_xflags |= FS_XFLAG_IMMUTABLE; + fa->flags |= FS_IMMUTABLE_FL; + } + return 0; +} + +/* * ntfs_getattr - inode_operations::getattr */ int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path, @@ -1547,6 +1575,7 @@ const struct inode_operations ntfs_file_inode_operations = { .get_acl = ntfs_get_acl, .set_acl = ntfs_set_acl, .fiemap = ntfs_fiemap, + .fileattr_get = ntfs_fileattr_get, }; const struct file_operations ntfs_file_operations = { diff --git a/fs/ntfs3/namei.c b/fs/ntfs3/namei.c index b2af8f695e60f..e159ba66a34a4 100644 --- a/fs/ntfs3/namei.c +++ b/fs/ntfs3/namei.c @@ -518,6 +518,7 @@ const struct inode_operations ntfs_dir_inode_operations = { .getattr = ntfs_getattr, .listxattr = ntfs_listxattr, .fiemap = ntfs_fiemap, + .fileattr_get = ntfs_fileattr_get, }; const struct inode_operations ntfs_special_inode_operations = { diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h index bbf3b6a1dcbee..41db22d652c47 100644 --- a/fs/ntfs3/ntfs_fs.h +++ b/fs/ntfs3/ntfs_fs.h @@ -529,6 +529,7 @@ bool dir_is_empty(struct inode *dir); extern const struct file_operations ntfs_dir_operations; /* Globals from file.c */ +int ntfs_fileattr_get(struct dentry *dentry, struct file_kattr *fa); int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, u32 flags); int ntfs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 12591c95c9256..b4bfe4ddf64ea 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -1126,7 +1126,7 @@ retry_deleg: if (error) goto out_inode_unlock; - error = try_break_deleg(inode, &delegated_inode); + error = try_break_deleg(inode, 0, &delegated_inode); if (error) goto out_inode_unlock; @@ -1234,7 +1234,7 @@ retry_deleg: if (error) goto out_inode_unlock; - error = try_break_deleg(inode, &delegated_inode); + error = try_break_deleg(inode, 0, &delegated_inode); if (error) goto out_inode_unlock; diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index ce23924f01b3a..b14ae32835530 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -30,6 +30,7 @@ #include <linux/xattr.h> #include <linux/mm.h> #include <linux/key-type.h> +#include <linux/fileattr.h> #include <uapi/linux/magic.h> #include <net/ipv6.h> #include "cifsfs.h" @@ -1165,6 +1166,56 @@ struct file_system_type smb3_fs_type = { MODULE_ALIAS_FS("smb3"); MODULE_ALIAS("smb3"); +int cifs_fileattr_get(struct dentry *dentry, struct file_kattr *fa) +{ + struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb); + struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); + struct inode *inode = d_inode(dentry); + u32 attrs; + + /* Preserve FS_COMPR_FL previously reported by cifs_ioctl(). */ + if (CIFS_I(inode)->cifsAttrs & ATTR_COMPRESSED) + fa->flags |= FS_COMPR_FL; + + /* + * FS_CASEFOLD_FL is defined by UAPI as a folder attribute, + * and userspace tools (e.g., lsattr) display it only on + * directories. Confine the case-handling bits to directories + * to match that convention; for non-directories the share's + * case semantics are still discoverable through the parent. + */ + if (!S_ISDIR(inode->i_mode)) + return 0; + + /* + * The server's FS_ATTRIBUTE_INFORMATION response, cached on + * the tcon at mount, reflects the share's case-handling + * semantics after any POSIX extensions negotiation. Prefer + * it over the client-local nocase mount option, which only + * governs dentry comparison on this superblock. + * + * QueryFSInfo is best-effort at mount; when it did not + * populate fsAttrInfo, MaxPathNameComponentLength remains + * zero. In that case fall back to nocase so the reporting + * matches the comparison behavior installed on the sb. + */ + if (le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength) == 0) { + if (tcon->nocase) { + fa->fsx_xflags |= FS_XFLAG_CASEFOLD; + fa->flags |= FS_CASEFOLD_FL; + } + return 0; + } + attrs = le32_to_cpu(tcon->fsAttrInfo.Attributes); + if (!(attrs & FILE_CASE_SENSITIVE_SEARCH)) { + fa->fsx_xflags |= FS_XFLAG_CASEFOLD; + fa->flags |= FS_CASEFOLD_FL; + } + if (!(attrs & FILE_CASE_PRESERVED_NAMES)) + fa->fsx_xflags |= FS_XFLAG_CASENONPRESERVING; + return 0; +} + const struct inode_operations cifs_dir_inode_ops = { .create = cifs_create, .atomic_open = cifs_atomic_open, @@ -1183,6 +1234,7 @@ const struct inode_operations cifs_dir_inode_ops = { .listxattr = cifs_listxattr, .get_acl = cifs_get_acl, .set_acl = cifs_set_acl, + .fileattr_get = cifs_fileattr_get, }; const struct inode_operations cifs_file_inode_ops = { @@ -1193,6 +1245,7 @@ const struct inode_operations cifs_file_inode_ops = { .fiemap = cifs_fiemap, .get_acl = cifs_get_acl, .set_acl = cifs_set_acl, + .fileattr_get = cifs_fileattr_get, }; const char *cifs_get_link(struct dentry *dentry, struct inode *inode, diff --git a/fs/smb/client/cifsfs.h b/fs/smb/client/cifsfs.h index c455b15f27782..9d85224fafab5 100644 --- a/fs/smb/client/cifsfs.h +++ b/fs/smb/client/cifsfs.h @@ -89,6 +89,9 @@ extern const struct inode_operations cifs_file_inode_ops; extern const struct inode_operations cifs_symlink_inode_ops; extern const struct inode_operations cifs_namespace_inode_operations; +struct file_kattr; +int cifs_fileattr_get(struct dentry *dentry, struct file_kattr *fa); + /* Functions related to files and directories */ extern const struct netfs_request_ops cifs_req_ops; diff --git a/fs/smb/client/namespace.c b/fs/smb/client/namespace.c index 52a520349cb76..52a51b032fae3 100644 --- a/fs/smb/client/namespace.c +++ b/fs/smb/client/namespace.c @@ -294,4 +294,5 @@ struct vfsmount *cifs_d_automount(struct path *path) } const struct inode_operations cifs_namespace_inode_operations = { + .fileattr_get = cifs_fileattr_get, }; diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 620bcfbbfd92e..5473aea5d7f56 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -14,6 +14,7 @@ #include <linux/falloc.h> #include <linux/mount.h> #include <linux/filelock.h> +#include <linux/fileattr.h> #include "glob.h" #include "smbfsctl.h" @@ -5561,16 +5562,33 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work, case FS_ATTRIBUTE_INFORMATION: { FILE_SYSTEM_ATTRIBUTE_INFO *info; + struct file_kattr fa = {}; size_t sz; + u32 attrs; + int err; info = (FILE_SYSTEM_ATTRIBUTE_INFO *)rsp->Buffer; - info->Attributes = cpu_to_le32(FILE_SUPPORTS_OBJECT_IDS | - FILE_PERSISTENT_ACLS | - FILE_UNICODE_ON_DISK | - FILE_CASE_PRESERVED_NAMES | - FILE_CASE_SENSITIVE_SEARCH | - FILE_SUPPORTS_BLOCK_REFCOUNTING); + attrs = FILE_SUPPORTS_OBJECT_IDS | + FILE_PERSISTENT_ACLS | + FILE_UNICODE_ON_DISK | + FILE_SUPPORTS_BLOCK_REFCOUNTING; + + err = vfs_fileattr_get(path.dentry, &fa); + /* + * -EINVAL, -EOPNOTSUPP: ntfs-3g and other FUSE + * filesystems that lack FS_IOC_FSGETXATTR support. + */ + if (err && err != -ENOIOCTLCMD && err != -ENOTTY && + err != -EINVAL && err != -EOPNOTSUPP) { + path_put(&path); + return err; + } + if (!(fa.fsx_xflags & FS_XFLAG_CASEFOLD)) + attrs |= FILE_CASE_SENSITIVE_SEARCH; + if (!(fa.fsx_xflags & FS_XFLAG_CASENONPRESERVING)) + attrs |= FILE_CASE_PRESERVED_NAMES; + info->Attributes = cpu_to_le32(attrs); info->Attributes |= cpu_to_le32(server_conf.share_fake_fscaps); if (test_share_config_flag(work->tcon->share_conf, diff --git a/fs/vboxsf/dir.c b/fs/vboxsf/dir.c index 42bedc4ec7af7..c5bd3271aa961 100644 --- a/fs/vboxsf/dir.c +++ b/fs/vboxsf/dir.c @@ -477,4 +477,5 @@ const struct inode_operations vboxsf_dir_iops = { .symlink = vboxsf_dir_symlink, .getattr = vboxsf_getattr, .setattr = vboxsf_setattr, + .fileattr_get = vboxsf_fileattr_get, }; diff --git a/fs/vboxsf/file.c b/fs/vboxsf/file.c index 7a7a3fbb26514..943953867e18e 100644 --- a/fs/vboxsf/file.c +++ b/fs/vboxsf/file.c @@ -222,7 +222,8 @@ const struct file_operations vboxsf_reg_fops = { const struct inode_operations vboxsf_reg_iops = { .getattr = vboxsf_getattr, - .setattr = vboxsf_setattr + .setattr = vboxsf_setattr, + .fileattr_get = vboxsf_fileattr_get, }; static int vboxsf_read_folio(struct file *file, struct folio *folio) @@ -389,5 +390,6 @@ static const char *vboxsf_get_link(struct dentry *dentry, struct inode *inode, } const struct inode_operations vboxsf_lnk_iops = { - .get_link = vboxsf_get_link + .get_link = vboxsf_get_link, + .fileattr_get = vboxsf_fileattr_get, }; diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c index a618cb093e007..a61fbab51d370 100644 --- a/fs/vboxsf/super.c +++ b/fs/vboxsf/super.c @@ -185,6 +185,13 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc) if (err) goto fail_unmap; + /* + * A failed query leaves sbi->case_insensitive false, so the + * mount defaults to reporting case-sensitive behavior. Do not + * fail the mount over an advisory attribute. + */ + vboxsf_query_case_sensitive(sbi); + sb->s_magic = VBOXSF_SUPER_MAGIC; sb->s_blocksize = 1024; sb->s_maxbytes = MAX_LFS_FILESIZE; diff --git a/fs/vboxsf/utils.c b/fs/vboxsf/utils.c index 440e8c50629d3..298bfc93255c0 100644 --- a/fs/vboxsf/utils.c +++ b/fs/vboxsf/utils.c @@ -11,6 +11,7 @@ #include <linux/sizes.h> #include <linux/pagemap.h> #include <linux/vfs.h> +#include <linux/fileattr.h> #include "vfsmod.h" struct inode *vboxsf_new_inode(struct super_block *sb) @@ -567,3 +568,32 @@ int vboxsf_dir_read_all(struct vboxsf_sbi *sbi, struct vboxsf_dir_info *sf_d, return err; } + +int vboxsf_query_case_sensitive(struct vboxsf_sbi *sbi) +{ + struct shfl_volinfo volinfo = {}; + u32 buf_len; + int err; + + buf_len = sizeof(volinfo); + err = vboxsf_fsinfo(sbi->root, 0, SHFL_INFO_GET | SHFL_INFO_VOLUME, + &buf_len, &volinfo); + if (err) + return err; + if (buf_len < sizeof(volinfo)) + return 0; + + sbi->case_insensitive = !volinfo.properties.case_sensitive; + return 0; +} + +int vboxsf_fileattr_get(struct dentry *dentry, struct file_kattr *fa) +{ + struct vboxsf_sbi *sbi = VBOXSF_SBI(dentry->d_sb); + + if (sbi->case_insensitive) { + fa->fsx_xflags |= FS_XFLAG_CASEFOLD; + fa->flags |= FS_CASEFOLD_FL; + } + return 0; +} diff --git a/fs/vboxsf/vfsmod.h b/fs/vboxsf/vfsmod.h index 05973eb89d528..b61afd0ce842d 100644 --- a/fs/vboxsf/vfsmod.h +++ b/fs/vboxsf/vfsmod.h @@ -47,6 +47,7 @@ struct vboxsf_sbi { u32 next_generation; u32 root; int bdi_id; + bool case_insensitive; }; /* per-inode information */ @@ -111,6 +112,11 @@ void vboxsf_dir_info_free(struct vboxsf_dir_info *p); int vboxsf_dir_read_all(struct vboxsf_sbi *sbi, struct vboxsf_dir_info *sf_d, u64 handle); +int vboxsf_query_case_sensitive(struct vboxsf_sbi *sbi); + +struct file_kattr; +int vboxsf_fileattr_get(struct dentry *dentry, struct file_kattr *fa); + /* from vboxsf_wrappers.c */ int vboxsf_connect(void); void vboxsf_disconnect(void); diff --git a/fs/xattr.c b/fs/xattr.c index 09ecbaaa16608..efdcf2a485857 100644 --- a/fs/xattr.c +++ b/fs/xattr.c @@ -306,7 +306,7 @@ __vfs_setxattr_locked(struct mnt_idmap *idmap, struct dentry *dentry, if (error) goto out; - error = try_break_deleg(inode, delegated_inode); + error = try_break_deleg(inode, 0, delegated_inode); if (error) goto out; @@ -564,7 +564,7 @@ __vfs_removexattr_locked(struct mnt_idmap *idmap, if (error) goto out; - error = try_break_deleg(inode, delegated_inode); + error = try_break_deleg(inode, 0, delegated_inode); if (error) goto out; diff --git a/fs/xfs/libxfs/xfs_inode_util.c b/fs/xfs/libxfs/xfs_inode_util.c index 551fa51befb65..82be54b6f8d3a 100644 --- a/fs/xfs/libxfs/xfs_inode_util.c +++ b/fs/xfs/libxfs/xfs_inode_util.c @@ -130,6 +130,8 @@ xfs_ip2xflags( if (xfs_inode_has_attr_fork(ip)) flags |= FS_XFLAG_HASATTR; + if (xfs_has_asciici(ip->i_mount)) + flags |= FS_XFLAG_CASEFOLD; return flags; } diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index e3e3c3c898400..9b2ad3786b190 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c @@ -244,8 +244,6 @@ const struct export_operations xfs_export_operations = { .get_parent = xfs_fs_get_parent, .commit_metadata = xfs_fs_nfs_commit_metadata, #ifdef CONFIG_EXPORTFS_BLOCK_OPS - .get_uuid = xfs_fs_get_uuid, - .map_blocks = xfs_fs_map_blocks, - .commit_blocks = xfs_fs_commit_blocks, + .block_ops = &xfs_export_block_ops, #endif }; diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 46e234863644f..f8216f74679fd 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -517,7 +517,7 @@ xfs_ioc_fsgetxattra( xfs_inode_t *ip, void __user *arg) { - struct file_kattr fa; + struct file_kattr fa = {}; xfs_ilock(ip, XFS_ILOCK_SHARED); xfs_fill_fsxattr(ip, XFS_ATTR_FORK, &fa); @@ -755,9 +755,23 @@ xfs_fileattr_set( trace_xfs_ioctl_setattr(ip); if (!fa->fsx_valid) { - if (fa->flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | - FS_NOATIME_FL | FS_NODUMP_FL | - FS_SYNC_FL | FS_DAX_FL | FS_PROJINHERIT_FL)) + unsigned int allowed = FS_IMMUTABLE_FL | FS_APPEND_FL | + FS_NOATIME_FL | FS_NODUMP_FL | + FS_SYNC_FL | FS_DAX_FL | + FS_PROJINHERIT_FL; + + /* + * FS_CASEFOLD_FL reflects the ASCIICI superblock feature, + * a read-only property. Accept it as a no-op so chattr's + * RMW round-trip succeeds; reject any attempt to enable + * it on a non-ASCIICI filesystem. xfs_flags2diflags() + * has no clause for CASEFOLD, so the bit is dropped from + * the on-disk diflags regardless. + */ + if (xfs_has_asciici(mp)) + allowed |= FS_CASEFOLD_FL; + + if (fa->flags & ~allowed) return -EOPNOTSUPP; } diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index 221e55887a2a4..266a07601e8d0 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -13,6 +13,7 @@ #include "xfs_bmap.h" #include "xfs_iomap.h" #include "xfs_pnfs.h" +#include <linux/exportfs_block.h> /* * Ensure that we do not have any outstanding pNFS layouts that can be used by @@ -45,11 +46,22 @@ xfs_break_leased_layouts( return error; } +static expfs_block_layouts_t +xfs_fs_layouts_supported( + struct super_block *sb) +{ + expfs_block_layouts_t supported = EXPFS_BLOCK_IN_BAND_ID; + + if (exportfs_bdev_supports_out_of_band_id(sb->s_bdev)) + supported |= EXPFS_BLOCK_OUT_OF_BAND_ID; + return supported; +} + /* * Get a unique ID including its location so that the client can identify * the exported device. */ -int +static int xfs_fs_get_uuid( struct super_block *sb, u8 *buf, @@ -104,7 +116,7 @@ xfs_fs_map_update_inode( /* * Get a layout for the pNFS client. */ -int +static int xfs_fs_map_blocks( struct inode *inode, loff_t offset, @@ -252,28 +264,27 @@ xfs_pnfs_validate_isize( * to manually flush the cache here similar to what the fsync code path does * for datasyncs on files that have no dirty metadata. */ -int +static int xfs_fs_commit_blocks( struct inode *inode, struct iomap *maps, int nr_maps, - struct iattr *iattr) + loff_t new_size) { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; + struct timespec64 now; bool update_isize = false; int error, i; loff_t size; - ASSERT(iattr->ia_valid & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)); - xfs_ilock(ip, XFS_IOLOCK_EXCL); size = i_size_read(inode); - if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size > size) { + if (new_size > size) { update_isize = true; - size = iattr->ia_size; + size = new_size; } for (i = 0; i < nr_maps; i++) { @@ -318,11 +329,13 @@ xfs_fs_commit_blocks( xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - ASSERT(!(iattr->ia_valid & (ATTR_UID | ATTR_GID))); - setattr_copy(&nop_mnt_idmap, inode, iattr); + now = inode_set_ctime_current(inode); + inode_set_atime_to_ts(inode, now); + inode_set_mtime_to_ts(inode, now); + if (update_isize) { - i_size_write(inode, iattr->ia_size); - ip->i_disk_size = iattr->ia_size; + i_size_write(inode, new_size); + ip->i_disk_size = new_size; } xfs_trans_set_sync(tp); @@ -332,3 +345,10 @@ out_drop_iolock: xfs_iunlock(ip, XFS_IOLOCK_EXCL); return error; } + +const struct exportfs_block_ops xfs_export_block_ops = { + .layouts_supported = xfs_fs_layouts_supported, + .get_uuid = xfs_fs_get_uuid, + .map_blocks = xfs_fs_map_blocks, + .commit_blocks = xfs_fs_commit_blocks, +}; diff --git a/fs/xfs/xfs_pnfs.h b/fs/xfs/xfs_pnfs.h index 940c6c2ad88c5..bf43b2009e4cc 100644 --- a/fs/xfs/xfs_pnfs.h +++ b/fs/xfs/xfs_pnfs.h @@ -2,13 +2,9 @@ #ifndef _XFS_PNFS_H #define _XFS_PNFS_H 1 -#ifdef CONFIG_EXPORTFS_BLOCK_OPS -int xfs_fs_get_uuid(struct super_block *sb, u8 *buf, u32 *len, u64 *offset); -int xfs_fs_map_blocks(struct inode *inode, loff_t offset, u64 length, - struct iomap *iomap, bool write, u32 *device_generation); -int xfs_fs_commit_blocks(struct inode *inode, struct iomap *maps, int nr_maps, - struct iattr *iattr); +#include <linux/exportfs_block.h> +#ifdef CONFIG_EXPORTFS_BLOCK_OPS int xfs_break_leased_layouts(struct inode *inode, uint *iolock, bool *did_unlock); #else @@ -18,4 +14,7 @@ xfs_break_leased_layouts(struct inode *inode, uint *iolock, bool *did_unlock) return 0; } #endif /* CONFIG_EXPORTFS_BLOCK_OPS */ + +extern const struct exportfs_block_ops xfs_export_block_ops; + #endif /* _XFS_PNFS_H */ |
