diff options
| author | Mark Brown <broonie@kernel.org> | 2026-05-29 12:59:22 +0100 |
|---|---|---|
| committer | Mark Brown <broonie@kernel.org> | 2026-05-29 12:59:22 +0100 |
| commit | 6390df1c8d5c5f75c845a87a202b4aa7392742a2 (patch) | |
| tree | 43cd60728412c7bddcffa93db8193ba5918ba4c9 /fs | |
| parent | 26da87fd412077a278884c397ef65fd33dabaf58 (diff) | |
| parent | 6d9446c014f7fb88b328cafd0907900b9c2efc79 (diff) | |
| download | linux-next-history-6390df1c8d5c5f75c845a87a202b4aa7392742a2.tar.gz | |
Merge branch 'for-next' of https://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse.git
# Conflicts:
# fs/fuse/dev.c
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/fuse/Makefile | 3 | ||||
| -rw-r--r-- | fs/fuse/args.h | 65 | ||||
| -rw-r--r-- | fs/fuse/backing.c | 3 | ||||
| -rw-r--r-- | fs/fuse/control.c | 16 | ||||
| -rw-r--r-- | fs/fuse/cuse.c | 28 | ||||
| -rw-r--r-- | fs/fuse/dev.c | 1301 | ||||
| -rw-r--r-- | fs/fuse/dev.h | 110 | ||||
| -rw-r--r-- | fs/fuse/dev_uring.c | 143 | ||||
| -rw-r--r-- | fs/fuse/dev_uring_i.h | 39 | ||||
| -rw-r--r-- | fs/fuse/dir.c | 12 | ||||
| -rw-r--r-- | fs/fuse/file.c | 154 | ||||
| -rw-r--r-- | fs/fuse/fuse_dev_i.h | 350 | ||||
| -rw-r--r-- | fs/fuse/fuse_i.h | 862 | ||||
| -rw-r--r-- | fs/fuse/fuse_trace.h | 4 | ||||
| -rw-r--r-- | fs/fuse/inode.c | 263 | ||||
| -rw-r--r-- | fs/fuse/notify.c | 434 | ||||
| -rw-r--r-- | fs/fuse/poll.c | 141 | ||||
| -rw-r--r-- | fs/fuse/req.c | 99 | ||||
| -rw-r--r-- | fs/fuse/req_timeout.c | 148 | ||||
| -rw-r--r-- | fs/fuse/sysctl.c | 1 | ||||
| -rw-r--r-- | fs/fuse/sysctl.h | 9 | ||||
| -rw-r--r-- | fs/fuse/virtio_fs.c | 21 |
22 files changed, 2309 insertions, 1897 deletions
diff --git a/fs/fuse/Makefile b/fs/fuse/Makefile index 22ad9538dfc4b..245e67852b03e 100644 --- a/fs/fuse/Makefile +++ b/fs/fuse/Makefile @@ -11,7 +11,8 @@ obj-$(CONFIG_CUSE) += cuse.o obj-$(CONFIG_VIRTIO_FS) += virtiofs.o fuse-y := trace.o # put trace.o first so we see ftrace errors sooner -fuse-y += dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o ioctl.o +fuse-y += dev.o dir.o file.o inode.o control.o xattr.o acl.o readdir.o ioctl.o req_timeout.o req.o +fuse-y += poll.o notify.o fuse-y += iomode.o fuse-$(CONFIG_FUSE_DAX) += dax.o fuse-$(CONFIG_FUSE_PASSTHROUGH) += passthrough.o backing.o diff --git a/fs/fuse/args.h b/fs/fuse/args.h new file mode 100644 index 0000000000000..ecfe51a192af0 --- /dev/null +++ b/fs/fuse/args.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _FS_FUSE_ARGS_H +#define _FS_FUSE_ARGS_H + +#include <linux/types.h> + +struct fuse_mount; + +/** One input argument of a request */ +struct fuse_in_arg { + unsigned size; + const void *value; +}; + +/** One output argument of a request */ +struct fuse_arg { + unsigned size; + void *value; +}; + +struct fuse_args { + u64 nodeid; + u32 opcode; + u32 uid; + u32 gid; + u32 pid; + u8 in_numargs; + u8 out_numargs; + u8 ext_idx; + bool force:1; + bool noreply:1; + bool nocreds:1; + bool in_pages:1; + bool out_pages:1; + bool user_pages:1; + bool out_argvar:1; + bool page_zeroing:1; + bool page_replace:1; + bool may_block:1; + bool is_ext:1; + bool is_pinned:1; + bool invalidate_vmap:1; + bool abort_on_kill:1; + struct fuse_in_arg in_args[4]; + struct fuse_arg out_args[2]; + void (*end)(struct fuse_args *args, int error); + /* Used for kvec iter backed by vmalloc address */ + void *vmap_base; +}; + +/** FUSE folio descriptor */ +struct fuse_folio_desc { + unsigned int length; + unsigned int offset; +}; + +struct fuse_args_pages { + struct fuse_args args; + struct folio **folios; + struct fuse_folio_desc *descs; + unsigned int num_folios; +}; + +#endif /* _FS_FUSE_ARGS_H */ diff --git a/fs/fuse/backing.c b/fs/fuse/backing.c index d95dfa48483f0..472b6afa7dfff 100644 --- a/fs/fuse/backing.c +++ b/fs/fuse/backing.c @@ -5,6 +5,7 @@ * Copyright (c) 2023 CTERA Networks. */ +#include "dev.h" #include "fuse_i.h" #include <linux/file.h> @@ -118,7 +119,7 @@ int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map) goto out_fput; fb->file = file; - fb->cred = prepare_creds(); + fb->cred = get_current_cred(); refcount_set(&fb->count, 1); res = fuse_backing_id_alloc(fc, fb); diff --git a/fs/fuse/control.c b/fs/fuse/control.c index f902a7fb4630c..925a154884996 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -7,6 +7,7 @@ */ #include "fuse_i.h" +#include "dev.h" #include <linux/init.h> #include <linux/module.h> @@ -37,9 +38,7 @@ static ssize_t fuse_conn_abort_write(struct file *file, const char __user *buf, { struct fuse_conn *fc = fuse_ctl_file_conn_get(file); if (fc) { - if (fc->abort_err) - fc->aborted = true; - fuse_abort_conn(fc); + fuse_chan_abort(fc->chan, fc->abort_err); fuse_conn_put(fc); } return count; @@ -57,7 +56,7 @@ static ssize_t fuse_conn_waiting_read(struct file *file, char __user *buf, if (!fc) return 0; - value = atomic_read(&fc->num_waiting); + value = fuse_chan_num_waiting(fc->chan); file->private_data = (void *)value; fuse_conn_put(fc); } @@ -111,7 +110,7 @@ static ssize_t fuse_conn_max_background_read(struct file *file, if (!fc) return 0; - val = READ_ONCE(fc->max_background); + val = fuse_chan_max_background(fc->chan); fuse_conn_put(fc); return fuse_conn_limit_read(file, buf, len, ppos, val); @@ -129,12 +128,7 @@ static ssize_t fuse_conn_max_background_write(struct file *file, if (ret > 0) { struct fuse_conn *fc = fuse_ctl_file_conn_get(file); if (fc) { - spin_lock(&fc->bg_lock); - fc->max_background = val; - fc->blocked = fc->num_background >= fc->max_background; - if (!fc->blocked) - wake_up(&fc->blocked_waitq); - spin_unlock(&fc->bg_lock); + fuse_chan_max_background_set(fc->chan, val); fuse_conn_put(fc); } } diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index 174333633471b..bac327cfc7f14 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -51,6 +51,7 @@ #include <linux/uio.h> #include <linux/user_namespace.h> +#include "dev.h" #include "fuse_i.h" #include "fuse_dev_i.h" @@ -306,6 +307,7 @@ struct cuse_init_args { struct cuse_init_out out; struct folio *folio; struct fuse_folio_desc desc; + struct fuse_conn *fc; }; /** @@ -319,11 +321,10 @@ struct cuse_init_args { * required data structures for it. Please read the comment at the * top of this file for high level overview. */ -static void cuse_process_init_reply(struct fuse_mount *fm, - struct fuse_args *args, int error) +static void cuse_process_init_reply(struct fuse_args *args, int error) { - struct fuse_conn *fc = fm->fc; struct cuse_init_args *ia = container_of(args, typeof(*ia), ap.args); + struct fuse_conn *fc = ia->fc; struct fuse_args_pages *ap = &ia->ap; struct cuse_conn *cc = fc_to_cc(fc), *pos; struct cuse_init_out *arg = &ia->out; @@ -423,7 +424,7 @@ err_unlock: err_region: unregister_chrdev_region(devt, 1); err: - fuse_abort_conn(fc); + fuse_chan_abort(fc->chan, false); goto out; } @@ -466,6 +467,7 @@ static int cuse_send_init(struct cuse_conn *cc) ap->descs = &ia->desc; ia->folio = folio; ia->desc.length = ap->args.out_args[1].size; + ia->fc = &cc->fc; ap->args.end = cuse_process_init_reply; rc = fuse_simple_background(fm, &ap->args, GFP_KERNEL); @@ -502,8 +504,12 @@ static int cuse_channel_open(struct inode *inode, struct file *file) { struct fuse_dev *fud; struct cuse_conn *cc; + struct fuse_chan *fch __free(fuse_chan_free) = fuse_dev_chan_new(); int rc; + if (!fch) + return -ENOMEM; + /* set up cuse_conn */ cc = kzalloc_obj(*cc); if (!cc) @@ -513,18 +519,16 @@ static int cuse_channel_open(struct inode *inode, struct file *file) * Limit the cuse channel to requests that can * be represented in file->f_cred->user_ns. */ - fuse_conn_init(&cc->fc, &cc->fm, file->f_cred->user_ns, - &fuse_dev_fiq_ops, NULL); - + fuse_conn_init(&cc->fc, &cc->fm, file->f_cred->user_ns, no_free_ptr(fch)); cc->fc.release = cuse_fc_release; - fud = fuse_dev_alloc_install(&cc->fc); + fud = fuse_dev_alloc_install(cc->fc.chan); fuse_conn_put(&cc->fc); if (!fud) return -ENOMEM; INIT_LIST_HEAD(&cc->list); - cc->fc.initialized = 1; + cc->fc.chan->initialized = 1; rc = cuse_send_init(cc); if (rc) { fuse_dev_put(fud); @@ -549,7 +553,7 @@ static int cuse_channel_open(struct inode *inode, struct file *file) static int cuse_channel_release(struct inode *inode, struct file *file) { struct fuse_dev *fud = __fuse_get_dev(file); - struct cuse_conn *cc = fc_to_cc(fud->fc); + struct cuse_conn *cc = fc_to_cc(fud->chan->conn); /* remove from the conntbl, no more access from this point on */ mutex_lock(&cuse_lock); @@ -581,7 +585,7 @@ static ssize_t cuse_class_waiting_show(struct device *dev, { struct cuse_conn *cc = dev_get_drvdata(dev); - return sprintf(buf, "%d\n", atomic_read(&cc->fc.num_waiting)); + return sprintf(buf, "%d\n", atomic_read(&cc->fc.chan->num_waiting)); } static DEVICE_ATTR(waiting, 0400, cuse_class_waiting_show, NULL); @@ -591,7 +595,7 @@ static ssize_t cuse_class_abort_store(struct device *dev, { struct cuse_conn *cc = dev_get_drvdata(dev); - fuse_abort_conn(&cc->fc); + fuse_chan_abort(cc->fc.chan, false); return count; } static DEVICE_ATTR(abort, 0200, NULL, cuse_class_abort_store); diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index c105aaf9ff5d7..a9385d3597ccf 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -6,9 +6,9 @@ See the file COPYING. */ +#include "dev.h" +#include "args.h" #include "dev_uring_i.h" -#include "fuse_i.h" -#include "fuse_dev_i.h" #include <linux/init.h> #include <linux/module.h> @@ -30,118 +30,26 @@ MODULE_ALIAS_MISCDEV(FUSE_MINOR); MODULE_ALIAS("devname:fuse"); -static struct kmem_cache *fuse_req_cachep; - -const unsigned long fuse_timeout_timer_freq = - secs_to_jiffies(FUSE_TIMEOUT_TIMER_FREQ); - -bool fuse_request_expired(struct fuse_conn *fc, struct list_head *list) -{ - struct fuse_req *req; - - req = list_first_entry_or_null(list, struct fuse_req, list); - if (!req) - return false; - return time_is_before_jiffies(req->create_time + fc->timeout.req_timeout); -} - -static bool fuse_fpq_processing_expired(struct fuse_conn *fc, struct list_head *processing) -{ - int i; - - for (i = 0; i < FUSE_PQ_HASH_SIZE; i++) - if (fuse_request_expired(fc, &processing[i])) - return true; - - return false; -} - -/* - * Check if any requests aren't being completed by the time the request timeout - * elapses. To do so, we: - * - check the fiq pending list - * - check the bg queue - * - check the fpq io and processing lists - * - * To make this fast, we only check against the head request on each list since - * these are generally queued in order of creation time (eg newer requests get - * queued to the tail). We might miss a few edge cases (eg requests transitioning - * between lists, re-sent requests at the head of the pending list having a - * later creation time than other requests on that list, etc.) but that is fine - * since if the request never gets fulfilled, it will eventually be caught. - */ -void fuse_check_timeout(struct work_struct *work) -{ - struct delayed_work *dwork = to_delayed_work(work); - struct fuse_conn *fc = container_of(dwork, struct fuse_conn, - timeout.work); - struct fuse_iqueue *fiq = &fc->iq; - struct fuse_dev *fud; - struct fuse_pqueue *fpq; - bool expired = false; - - if (!atomic_read(&fc->num_waiting)) - goto out; - - spin_lock(&fiq->lock); - expired = fuse_request_expired(fc, &fiq->pending); - spin_unlock(&fiq->lock); - if (expired) - goto abort_conn; - - spin_lock(&fc->bg_lock); - expired = fuse_request_expired(fc, &fc->bg_queue); - spin_unlock(&fc->bg_lock); - if (expired) - goto abort_conn; - - spin_lock(&fc->lock); - if (!fc->connected) { - spin_unlock(&fc->lock); - return; - } - list_for_each_entry(fud, &fc->devices, entry) { - fpq = &fud->pq; - spin_lock(&fpq->lock); - if (fuse_request_expired(fc, &fpq->io) || - fuse_fpq_processing_expired(fc, fpq->processing)) { - spin_unlock(&fpq->lock); - spin_unlock(&fc->lock); - goto abort_conn; - } - - spin_unlock(&fpq->lock); - } - spin_unlock(&fc->lock); +static DECLARE_WAIT_QUEUE_HEAD(fuse_dev_waitq); - if (fuse_uring_request_expired(fc)) - goto abort_conn; - -out: - queue_delayed_work(system_percpu_wq, &fc->timeout.work, - fuse_timeout_timer_freq); - return; - -abort_conn: - fuse_abort_conn(fc); -} +static struct kmem_cache *fuse_req_cachep; -static void fuse_request_init(struct fuse_mount *fm, struct fuse_req *req) +static void fuse_request_init(struct fuse_chan *fch, struct fuse_req *req) { INIT_LIST_HEAD(&req->list); INIT_LIST_HEAD(&req->intr_entry); init_waitqueue_head(&req->waitq); refcount_set(&req->count, 1); __set_bit(FR_PENDING, &req->flags); - req->fm = fm; + req->chan = fch; req->create_time = jiffies; } -static struct fuse_req *fuse_request_alloc(struct fuse_mount *fm, gfp_t flags) +static struct fuse_req *fuse_request_alloc(struct fuse_chan *fch, gfp_t flags) { struct fuse_req *req = kmem_cache_zalloc(fuse_req_cachep, flags); if (req) - fuse_request_init(fm, req); + fuse_request_init(fch, req); return req; } @@ -162,110 +70,86 @@ static void __fuse_put_request(struct fuse_req *req) refcount_dec(&req->count); } -void fuse_set_initialized(struct fuse_conn *fc) +void fuse_chan_set_initialized(struct fuse_chan *fch, struct fuse_chan_param *param) { + if (param) { + fch->minor = param->minor; + fch->max_write = param->max_write; + fch->max_pages = param->max_pages; + } + /* Make sure stores before this are seen on another CPU */ smp_wmb(); - fc->initialized = 1; + fch->initialized = 1; + wake_up_all(&fch->blocked_waitq); } -static bool fuse_block_alloc(struct fuse_conn *fc, bool for_background) +static bool fuse_block_alloc(struct fuse_chan *fch, bool for_background) { - return !fc->initialized || (for_background && fc->blocked) || - (fc->io_uring && fc->connected && !fuse_uring_ready(fc)); + return !fch->initialized || (for_background && fch->blocked) || + (fch->io_uring && fch->connected && !fuse_uring_ready(fch)); } -static void fuse_drop_waiting(struct fuse_conn *fc) +static void fuse_drop_waiting(struct fuse_chan *fch) { /* - * lockess check of fc->connected is okay, because atomic_dec_and_test() - * provides a memory barrier matched with the one in fuse_wait_aborted() + * lockess check of fch->connected is okay, because atomic_dec_and_test() + * provides a memory barrier matched with the one in fuse_chan_wait_aborted() * to ensure no wake-up is missed. */ - if (atomic_dec_and_test(&fc->num_waiting) && - !READ_ONCE(fc->connected)) { + if (atomic_dec_and_test(&fch->num_waiting) && + !READ_ONCE(fch->connected)) { /* wake up aborters */ - wake_up_all(&fc->blocked_waitq); + wake_up_all(&fch->blocked_waitq); } } static void fuse_put_request(struct fuse_req *req); -static struct fuse_req *fuse_get_req(struct mnt_idmap *idmap, - struct fuse_mount *fm, - bool for_background) +static struct fuse_req *fuse_get_req(struct fuse_chan *fch, bool for_background) { - struct fuse_conn *fc = fm->fc; struct fuse_req *req; - bool no_idmap = !fm->sb || (fm->sb->s_iflags & SB_I_NOIDMAP); - kuid_t fsuid; - kgid_t fsgid; int err; - atomic_inc(&fc->num_waiting); + atomic_inc(&fch->num_waiting); - if (fuse_block_alloc(fc, for_background)) { + if (fuse_block_alloc(fch, for_background)) { err = -EINTR; - if (wait_event_state_exclusive(fc->blocked_waitq, - !fuse_block_alloc(fc, for_background), + if (wait_event_state_exclusive(fch->blocked_waitq, + !fuse_block_alloc(fch, for_background), (TASK_KILLABLE | TASK_FREEZABLE))) goto out; } - /* Matches smp_wmb() in fuse_set_initialized() */ + + /* Matches smp_wmb() in fuse_chan_set_initialized() */ smp_rmb(); err = -ENOTCONN; - if (!fc->connected) - goto out; - - err = -ECONNREFUSED; - if (fc->conn_error) + if (!fch->connected) goto out; - req = fuse_request_alloc(fm, GFP_KERNEL); + req = fuse_request_alloc(fch, GFP_KERNEL); err = -ENOMEM; if (!req) { if (for_background) - wake_up(&fc->blocked_waitq); + wake_up(&fch->blocked_waitq); goto out; } - req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns); - __set_bit(FR_WAITING, &req->flags); if (for_background) __set_bit(FR_BACKGROUND, &req->flags); - /* - * Keep the old behavior when idmappings support was not - * declared by a FUSE server. - * - * For those FUSE servers who support idmapped mounts, - * we send UID/GID only along with "inode creation" - * fuse requests, otherwise idmap == &invalid_mnt_idmap and - * req->in.h.{u,g}id will be equal to FUSE_INVALID_UIDGID. - */ - fsuid = no_idmap ? current_fsuid() : mapped_fsuid(idmap, fc->user_ns); - fsgid = no_idmap ? current_fsgid() : mapped_fsgid(idmap, fc->user_ns); - req->in.h.uid = from_kuid(fc->user_ns, fsuid); - req->in.h.gid = from_kgid(fc->user_ns, fsgid); - - if (no_idmap && unlikely(req->in.h.uid == ((uid_t)-1) || - req->in.h.gid == ((gid_t)-1))) { - fuse_put_request(req); - return ERR_PTR(-EOVERFLOW); - } - return req; out: - fuse_drop_waiting(fc); + fuse_drop_waiting(fch); return ERR_PTR(err); } static void fuse_put_request(struct fuse_req *req) { - struct fuse_conn *fc = req->fm->fc; + struct fuse_chan *fch = req->chan; if (refcount_dec_and_test(&req->count)) { if (test_bit(FR_BACKGROUND, &req->flags)) { @@ -273,15 +157,15 @@ static void fuse_put_request(struct fuse_req *req) * We get here in the unlikely case that a background * request was allocated but not sent */ - spin_lock(&fc->bg_lock); - if (!fc->blocked) - wake_up(&fc->blocked_waitq); - spin_unlock(&fc->bg_lock); + spin_lock(&fch->bg_lock); + if (!fch->blocked) + wake_up(&fch->blocked_waitq); + spin_unlock(&fch->bg_lock); } if (test_bit(FR_WAITING, &req->flags)) { __clear_bit(FR_WAITING, &req->flags); - fuse_drop_waiting(fc); + fuse_drop_waiting(fch); } fuse_request_free(req); @@ -335,6 +219,11 @@ __releases(fiq->lock) spin_unlock(&fiq->lock); } +struct fuse_forget_link *fuse_alloc_forget(void) +{ + return kzalloc_obj(struct fuse_forget_link, GFP_KERNEL_ACCOUNT); +} + void fuse_dev_queue_forget(struct fuse_iqueue *fiq, struct fuse_forget_link *forget) { @@ -406,12 +295,271 @@ static void fuse_dev_queue_req(struct fuse_iqueue *fiq, struct fuse_req *req) } } -const struct fuse_iqueue_ops fuse_dev_fiq_ops = { +static const struct fuse_iqueue_ops fuse_dev_fiq_ops = { .send_forget = fuse_dev_queue_forget, .send_interrupt = fuse_dev_queue_interrupt, .send_req = fuse_dev_queue_req, }; -EXPORT_SYMBOL_GPL(fuse_dev_fiq_ops); + +void fuse_iqueue_init(struct fuse_iqueue *fiq, const struct fuse_iqueue_ops *ops, void *priv) +{ + spin_lock_init(&fiq->lock); + init_waitqueue_head(&fiq->waitq); + INIT_LIST_HEAD(&fiq->pending); + INIT_LIST_HEAD(&fiq->interrupts); + fiq->forget_list_tail = &fiq->forget_list_head; + fiq->connected = 1; + fiq->ops = ops; + fiq->priv = priv; +} +EXPORT_SYMBOL_GPL(fuse_iqueue_init); + +void fuse_chan_release(struct fuse_chan *fch) +{ + struct fuse_iqueue *fiq = &fch->iq; + + if (fiq->ops->release) + fiq->ops->release(fiq); + + if (fch->timeout.req_timeout) + cancel_delayed_work_sync(&fch->timeout.work); +} + +void fuse_chan_free(struct fuse_chan *fch) +{ + WARN_ON(!list_empty(&fch->devices)); + kfree(fch->pq_prealloc); + kfree(fch); +} +EXPORT_SYMBOL_GPL(fuse_chan_free); + +struct fuse_chan *fuse_chan_new(void) +{ + struct fuse_chan *fch = kzalloc_obj(struct fuse_chan); + if (!fch) + return NULL; + + spin_lock_init(&fch->lock); + INIT_LIST_HEAD(&fch->devices); + spin_lock_init(&fch->bg_lock); + INIT_LIST_HEAD(&fch->bg_queue); + init_waitqueue_head(&fch->blocked_waitq); + atomic_set(&fch->num_waiting, 0); + fch->max_background = FUSE_DEFAULT_MAX_BACKGROUND; + fch->initialized = 0; + fch->blocked = 0; + fch->connected = 1; + fch->timeout.req_timeout = 0; + + return fch; +} +EXPORT_SYMBOL_GPL(fuse_chan_new); + +struct list_head *fuse_pqueue_alloc(void) +{ + struct list_head *pq = kzalloc_objs(struct list_head, FUSE_PQ_HASH_SIZE); + + if (pq) { + for (int i = 0; i < FUSE_PQ_HASH_SIZE; i++) + INIT_LIST_HEAD(&pq[i]); + } + return pq; +} + +struct fuse_chan *fuse_dev_chan_new(void) +{ + struct fuse_chan *fch __free(kfree) = fuse_chan_new(); + if (!fch) + return NULL; + + fch->pq_prealloc = fuse_pqueue_alloc(); + if (!fch->pq_prealloc) + return NULL; + + fuse_iqueue_init(&fch->iq, &fuse_dev_fiq_ops, NULL); + + return no_free_ptr(fch); +} +EXPORT_SYMBOL_GPL(fuse_dev_chan_new); + +unsigned int fuse_chan_num_background(struct fuse_chan *fch) +{ + return fch->num_background; +} + +unsigned int fuse_chan_max_background(struct fuse_chan *fch) +{ + return READ_ONCE(fch->max_background); +} + +void fuse_chan_max_background_set(struct fuse_chan *fch, unsigned int val) +{ + spin_lock(&fch->bg_lock); + fch->max_background = val; + fch->blocked = fch->num_background >= fch->max_background; + if (!fch->blocked) + wake_up(&fch->blocked_waitq); + spin_unlock(&fch->bg_lock); +} + +unsigned int fuse_chan_num_waiting(struct fuse_chan *fch) +{ + return atomic_read(&fch->num_waiting); +} + +void fuse_chan_set_fc(struct fuse_chan *fch, struct fuse_conn *fc) +{ + fch->conn = fc; +} + +void fuse_chan_io_uring_enable(struct fuse_chan *fch) +{ + fch->io_uring = 1; +} + +void fuse_pqueue_init(struct fuse_pqueue *fpq) +{ + spin_lock_init(&fpq->lock); + INIT_LIST_HEAD(&fpq->io); + fpq->connected = 1; + fpq->processing = NULL; +} + +static struct fuse_dev *fuse_dev_alloc_no_pq(void) +{ + struct fuse_dev *fud; + + fud = kzalloc_obj(struct fuse_dev); + if (!fud) + return NULL; + + refcount_set(&fud->ref, 1); + fuse_pqueue_init(&fud->pq); + + return fud; +} + +struct fuse_dev *fuse_dev_alloc(void) +{ + struct fuse_dev *fud __free(kfree) = fuse_dev_alloc_no_pq(); + if (!fud) + return NULL; + + fud->pq.processing = fuse_pqueue_alloc(); + if (!fud->pq.processing) + return NULL; + + return no_free_ptr(fud); +} +EXPORT_SYMBOL_GPL(fuse_dev_alloc); + +/* + * Installs @fch into @fud, return true on success. "Consumes" @pq in either case. + */ +static bool fuse_dev_install_with_pq(struct fuse_dev *fud, struct fuse_chan *fch, + struct list_head *pq) +{ + struct fuse_chan *old_fch; + + guard(spinlock)(&fch->lock); + /* + * Pairs with: + * - xchg() in fuse_dev_release() + * - smp_load_acquire() in fuse_dev_fc_get() + */ + old_fch = cmpxchg(&fud->chan, NULL, fch); + if (old_fch) { + /* + * failed to set fud->chan because + * - it was already set to a different fc + * - it was set to disconneted + */ + kfree(pq); + return false; + } + if (pq) { + WARN_ON(fud->pq.processing); + fud->pq.processing = pq; + } + list_add_tail(&fud->entry, &fch->devices); + fuse_conn_get(fch->conn); + wake_up_all(&fuse_dev_waitq); + return true; +} + +void fuse_dev_install(struct fuse_dev *fud, struct fuse_chan *fch) +{ + struct list_head *pq = fch->pq_prealloc; + + fch->pq_prealloc = NULL; + if (!fuse_dev_install_with_pq(fud, fch, pq)) { + /* Channel is not usable without a dev */ + fuse_chan_abort(fch, false); + } +} +EXPORT_SYMBOL_GPL(fuse_dev_install); + +struct fuse_dev *fuse_dev_alloc_install(struct fuse_chan *fch) +{ + struct fuse_dev *fud; + + fud = fuse_dev_alloc_no_pq(); + if (!fud) + return NULL; + + fuse_dev_install(fud, fch); + return fud; +} +EXPORT_SYMBOL_GPL(fuse_dev_alloc_install); + +void fuse_dev_put(struct fuse_dev *fud) +{ + struct fuse_chan *fch; + + if (!refcount_dec_and_test(&fud->ref)) + return; + + fch = fuse_dev_chan_get(fud); + if (fch && fch != FUSE_DEV_CHAN_DISCONNECTED) { + /* This is the virtiofs case (fuse_dev_release() not called) */ + spin_lock(&fch->lock); + list_del(&fud->entry); + spin_unlock(&fch->lock); + + fuse_conn_put(fch->conn); + } + kfree(fud->pq.processing); + kfree(fud); +} +EXPORT_SYMBOL_GPL(fuse_dev_put); + +bool fuse_dev_is_installed(struct fuse_dev *fud) +{ + struct fuse_chan *fch = fuse_dev_chan_get(fud); + + return fch != NULL && fch != FUSE_DEV_CHAN_DISCONNECTED; +} + +/* + * Checks if @fc matches the one installed in @fud + */ +bool fuse_dev_verify(struct fuse_dev *fud, struct fuse_chan *fch) +{ + return fuse_dev_chan_get(fud) == fch; +} + +bool fuse_dev_is_sync_init(struct fuse_dev *fud) +{ + return fud->sync_init; +} + +struct fuse_dev *fuse_dev_grab(struct file *file) +{ + struct fuse_dev *fud = fuse_file_to_fud(file); + + refcount_inc(&fud->ref); + return fud; +} static void fuse_send_one(struct fuse_iqueue *fiq, struct fuse_req *req) { @@ -421,10 +569,10 @@ static void fuse_send_one(struct fuse_iqueue *fiq, struct fuse_req *req) fiq->ops->send_req(fiq, req); } -void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, - u64 nodeid, u64 nlookup) +void fuse_chan_queue_forget(struct fuse_chan *fch, struct fuse_forget_link *forget, + u64 nodeid, u64 nlookup) { - struct fuse_iqueue *fiq = &fc->iq; + struct fuse_iqueue *fiq = &fch->iq; forget->forget_one.nodeid = nodeid; forget->forget_one.nlookup = nlookup; @@ -432,17 +580,17 @@ void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, fiq->ops->send_forget(fiq, forget); } -static void flush_bg_queue(struct fuse_conn *fc) +static void flush_bg_queue(struct fuse_chan *fch) { - struct fuse_iqueue *fiq = &fc->iq; + struct fuse_iqueue *fiq = &fch->iq; - while (fc->active_background < fc->max_background && - !list_empty(&fc->bg_queue)) { + while (fch->active_background < fch->max_background && + !list_empty(&fch->bg_queue)) { struct fuse_req *req; - req = list_first_entry(&fc->bg_queue, struct fuse_req, list); + req = list_first_entry(&fch->bg_queue, struct fuse_req, list); list_del(&req->list); - fc->active_background++; + fch->active_background++; fuse_send_one(fiq, req); } } @@ -457,9 +605,8 @@ static void flush_bg_queue(struct fuse_conn *fc) */ void fuse_request_end(struct fuse_req *req) { - struct fuse_mount *fm = req->fm; - struct fuse_conn *fc = fm->fc; - struct fuse_iqueue *fiq = &fc->iq; + struct fuse_chan *fch = req->chan; + struct fuse_iqueue *fiq = &fch->iq; if (test_and_set_bit(FR_FINISHED, &req->flags)) goto put_request; @@ -478,33 +625,33 @@ void fuse_request_end(struct fuse_req *req) WARN_ON(test_bit(FR_PENDING, &req->flags)); WARN_ON(test_bit(FR_SENT, &req->flags)); if (test_bit(FR_BACKGROUND, &req->flags)) { - spin_lock(&fc->bg_lock); + spin_lock(&fch->bg_lock); clear_bit(FR_BACKGROUND, &req->flags); - if (fc->num_background == fc->max_background) { - fc->blocked = 0; - wake_up(&fc->blocked_waitq); - } else if (!fc->blocked) { + if (fch->num_background == fch->max_background) { + fch->blocked = 0; + wake_up(&fch->blocked_waitq); + } else if (!fch->blocked) { /* * Wake up next waiter, if any. It's okay to use * waitqueue_active(), as we've already synced up - * fc->blocked with waiters with the wake_up() call + * fch->blocked with waiters with the wake_up() call * above. */ - if (waitqueue_active(&fc->blocked_waitq)) - wake_up(&fc->blocked_waitq); + if (waitqueue_active(&fch->blocked_waitq)) + wake_up(&fch->blocked_waitq); } - fc->num_background--; - fc->active_background--; - flush_bg_queue(fc); - spin_unlock(&fc->bg_lock); + fch->num_background--; + fch->active_background--; + flush_bg_queue(fch); + spin_unlock(&fch->bg_lock); } else { /* Wake up waiter sleeping in request_wait_answer() */ wake_up(&req->waitq); } if (test_bit(FR_ASYNC, &req->flags)) - req->args->end(fm, req->args, req->out.h.error); + req->args->end(req->args, req->out.h.error); put_request: fuse_put_request(req); } @@ -512,7 +659,7 @@ EXPORT_SYMBOL_GPL(fuse_request_end); static int queue_interrupt(struct fuse_req *req) { - struct fuse_iqueue *fiq = &req->fm->fc->iq; + struct fuse_iqueue *fiq = &req->chan->iq; /* Check for we've sent request to interrupt this req */ if (unlikely(!test_bit(FR_INTERRUPTED, &req->flags))) @@ -543,11 +690,11 @@ bool fuse_remove_pending_req(struct fuse_req *req, spinlock_t *lock) static void request_wait_answer(struct fuse_req *req) { - struct fuse_conn *fc = req->fm->fc; - struct fuse_iqueue *fiq = &fc->iq; + struct fuse_chan *fch = req->chan; + struct fuse_iqueue *fiq = &fch->iq; int err; - if (!fc->no_interrupt) { + if (!fch->no_interrupt) { /* Any signal may interrupt this */ err = wait_event_interruptible(req->waitq, test_bit(FR_FINISHED, &req->flags)); @@ -571,7 +718,7 @@ static void request_wait_answer(struct fuse_req *req) return; if (req->args->abort_on_kill) { - fuse_abort_conn(fc); + fuse_chan_abort(fch, false); return; } @@ -592,7 +739,7 @@ static void request_wait_answer(struct fuse_req *req) static void __fuse_request_send(struct fuse_req *req) { - struct fuse_iqueue *fiq = &req->fm->fc->iq; + struct fuse_iqueue *fiq = &req->chan->iq; BUG_ON(test_bit(FR_BACKGROUND, &req->flags)); @@ -606,12 +753,12 @@ static void __fuse_request_send(struct fuse_req *req) smp_rmb(); } -static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args) +static void fuse_adjust_compat(struct fuse_chan *fch, struct fuse_args *args) { - if (fc->minor < 4 && args->opcode == FUSE_STATFS) + if (fch->minor < 4 && args->opcode == FUSE_STATFS) args->out_args[0].size = FUSE_COMPAT_STATFS_SIZE; - if (fc->minor < 9) { + if (fch->minor < 9) { switch (args->opcode) { case FUSE_LOOKUP: case FUSE_CREATE: @@ -627,7 +774,7 @@ static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args) break; } } - if (fc->minor < 12) { + if (fch->minor < 12) { switch (args->opcode) { case FUSE_CREATE: args->in_args[0].size = sizeof(struct fuse_open_in); @@ -639,25 +786,13 @@ static void fuse_adjust_compat(struct fuse_conn *fc, struct fuse_args *args) } } -static void fuse_force_creds(struct fuse_req *req) -{ - struct fuse_conn *fc = req->fm->fc; - - if (!req->fm->sb || req->fm->sb->s_iflags & SB_I_NOIDMAP) { - req->in.h.uid = from_kuid_munged(fc->user_ns, current_fsuid()); - req->in.h.gid = from_kgid_munged(fc->user_ns, current_fsgid()); - } else { - req->in.h.uid = FUSE_INVALID_UIDGID; - req->in.h.gid = FUSE_INVALID_UIDGID; - } - - req->in.h.pid = pid_nr_ns(task_pid(current), fc->pid_ns); -} - static void fuse_args_to_req(struct fuse_req *req, struct fuse_args *args) { req->in.h.opcode = args->opcode; req->in.h.nodeid = args->nodeid; + req->in.h.uid = args->uid; + req->in.h.gid = args->gid; + req->in.h.pid = args->pid; req->args = args; if (args->is_ext) req->in.h.total_extlen = args->in_args[args->ext_idx].size / 8; @@ -665,33 +800,26 @@ static void fuse_args_to_req(struct fuse_req *req, struct fuse_args *args) __set_bit(FR_ASYNC, &req->flags); } -ssize_t __fuse_simple_request(struct mnt_idmap *idmap, - struct fuse_mount *fm, - struct fuse_args *args) +ssize_t fuse_chan_send(struct fuse_chan *fch, struct fuse_args *args) { - struct fuse_conn *fc = fm->fc; struct fuse_req *req; ssize_t ret; if (args->force) { - atomic_inc(&fc->num_waiting); - req = fuse_request_alloc(fm, GFP_KERNEL | __GFP_NOFAIL); - - if (!args->nocreds) - fuse_force_creds(req); + atomic_inc(&fch->num_waiting); + req = fuse_request_alloc(fch, GFP_KERNEL | __GFP_NOFAIL); __set_bit(FR_WAITING, &req->flags); if (!args->abort_on_kill) __set_bit(FR_FORCE, &req->flags); } else { - WARN_ON(args->nocreds); - req = fuse_get_req(idmap, fm, false); + req = fuse_get_req(fch, false); if (IS_ERR(req)) return PTR_ERR(req); } - /* Needs to be done after fuse_get_req() so that fc->minor is valid */ - fuse_adjust_compat(fc, args); + /* Needs to be done after fuse_get_req() so that fch->minor is valid */ + fuse_adjust_compat(fch, args); fuse_args_to_req(req, args); if (!args->noreply) @@ -708,10 +836,9 @@ ssize_t __fuse_simple_request(struct mnt_idmap *idmap, } #ifdef CONFIG_FUSE_IO_URING -static bool fuse_request_queue_background_uring(struct fuse_conn *fc, - struct fuse_req *req) +static bool fuse_request_queue_background_uring(struct fuse_req *req) { - struct fuse_iqueue *fiq = &fc->iq; + struct fuse_iqueue *fiq = &req->chan->iq; req->in.h.len = sizeof(struct fuse_in_header) + fuse_len_args(req->args->in_numargs, @@ -727,50 +854,46 @@ static bool fuse_request_queue_background_uring(struct fuse_conn *fc, */ static int fuse_request_queue_background(struct fuse_req *req) { - struct fuse_mount *fm = req->fm; - struct fuse_conn *fc = fm->fc; + struct fuse_chan *fch = req->chan; bool queued = false; WARN_ON(!test_bit(FR_BACKGROUND, &req->flags)); if (!test_bit(FR_WAITING, &req->flags)) { __set_bit(FR_WAITING, &req->flags); - atomic_inc(&fc->num_waiting); + atomic_inc(&fch->num_waiting); } __set_bit(FR_ISREPLY, &req->flags); #ifdef CONFIG_FUSE_IO_URING - if (fuse_uring_ready(fc)) - return fuse_request_queue_background_uring(fc, req); + if (fuse_uring_ready(fch)) + return fuse_request_queue_background_uring(req); #endif - spin_lock(&fc->bg_lock); - if (likely(fc->connected)) { - fc->num_background++; - if (fc->num_background == fc->max_background) - fc->blocked = 1; - list_add_tail(&req->list, &fc->bg_queue); - flush_bg_queue(fc); + spin_lock(&fch->bg_lock); + if (likely(fch->connected)) { + fch->num_background++; + if (fch->num_background == fch->max_background) + fch->blocked = 1; + list_add_tail(&req->list, &fch->bg_queue); + flush_bg_queue(fch); queued = true; } - spin_unlock(&fc->bg_lock); + spin_unlock(&fch->bg_lock); return queued; } -int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args, - gfp_t gfp_flags) +int fuse_chan_send_bg(struct fuse_chan *fch, struct fuse_args *args, gfp_t gfp_flags) { struct fuse_req *req; if (args->force) { - WARN_ON(!args->nocreds); - req = fuse_request_alloc(fm, gfp_flags); + req = fuse_request_alloc(fch, gfp_flags); if (!req) return -ENOMEM; __set_bit(FR_BACKGROUND, &req->flags); } else { - WARN_ON(args->nocreds); - req = fuse_get_req(&invalid_mnt_idmap, fm, true); + req = fuse_get_req(fch, true); if (IS_ERR(req)) return PTR_ERR(req); } @@ -784,15 +907,13 @@ int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args, return 0; } -EXPORT_SYMBOL_GPL(fuse_simple_background); -static int fuse_simple_notify_reply(struct fuse_mount *fm, - struct fuse_args *args, u64 unique) +int fuse_chan_send_notify_reply(struct fuse_chan *fch, struct fuse_args *args, u64 unique) { struct fuse_req *req; - struct fuse_iqueue *fiq = &fm->fc->iq; + struct fuse_iqueue *fiq = &fch->iq; - req = fuse_get_req(&invalid_mnt_idmap, fm, false); + req = fuse_get_req(fch, false); if (IS_ERR(req)) return PTR_ERR(req); @@ -1037,6 +1158,10 @@ static int fuse_try_move_folio(struct fuse_copy_state *cs, struct folio **foliop if (WARN_ON(folio_test_mlocked(oldfolio))) goto out_fallback_unlock; + err = lock_request(cs->req); + if (err) + goto out_fallback_unlock; + replace_page_cache_folio(oldfolio, newfolio); folio_get(newfolio); @@ -1050,20 +1175,7 @@ static int fuse_try_move_folio(struct fuse_copy_state *cs, struct folio **foliop */ pipe_buf_release(cs->pipe, buf); - err = 0; - spin_lock(&cs->req->waitq.lock); - if (test_bit(FR_ABORTED, &cs->req->flags)) - err = -ENOENT; - else - *foliop = newfolio; - spin_unlock(&cs->req->waitq.lock); - - if (err) { - folio_unlock(newfolio); - folio_put(newfolio); - goto out_put_old; - } - + *foliop = newfolio; folio_unlock(oldfolio); /* Drop ref for ap->pages[] array */ folio_put(oldfolio); @@ -1115,15 +1227,15 @@ static int fuse_ref_folio(struct fuse_copy_state *cs, struct folio *folio, cs->nr_segs++; cs->len = 0; - return 0; + return lock_request(cs->req); } /* * Copy a folio in the request to/from the userspace buffer. Must be * done atomically */ -static int fuse_copy_folio(struct fuse_copy_state *cs, struct folio **foliop, - unsigned offset, unsigned count, int zeroing) +int fuse_copy_folio(struct fuse_copy_state *cs, struct folio **foliop, + unsigned offset, unsigned count, int zeroing) { int err; struct folio *folio = *foliop; @@ -1204,7 +1316,7 @@ static int fuse_copy_folios(struct fuse_copy_state *cs, unsigned nbytes, } /* Copy a single argument in the request to/from userspace buffer */ -static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size) +int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size) { while (size) { if (!cs->len) { @@ -1256,7 +1368,7 @@ static int request_pending(struct fuse_iqueue *fiq) */ static int fuse_read_interrupt(struct fuse_iqueue *fiq, struct fuse_copy_state *cs, - size_t nbytes, struct fuse_req *req) + struct fuse_req *req) __releases(fiq->lock) { struct fuse_in_header ih; @@ -1273,8 +1385,6 @@ __releases(fiq->lock) arg.unique = req->in.h.unique; spin_unlock(&fiq->lock); - if (nbytes < reqsize) - return -EINVAL; err = fuse_copy_one(cs, &ih, sizeof(ih)); if (!err) @@ -1307,8 +1417,7 @@ static struct fuse_forget_link *fuse_dequeue_forget(struct fuse_iqueue *fiq, } static int fuse_read_single_forget(struct fuse_iqueue *fiq, - struct fuse_copy_state *cs, - size_t nbytes) + struct fuse_copy_state *cs) __releases(fiq->lock) { int err; @@ -1325,8 +1434,6 @@ __releases(fiq->lock) spin_unlock(&fiq->lock); kfree(forget); - if (nbytes < ih.len) - return -EINVAL; err = fuse_copy_one(cs, &ih, sizeof(ih)); if (!err) @@ -1354,11 +1461,6 @@ __releases(fiq->lock) .len = sizeof(ih) + sizeof(arg), }; - if (nbytes < ih.len) { - spin_unlock(&fiq->lock); - return -EINVAL; - } - max_forgets = (nbytes - ih.len) / sizeof(struct fuse_forget_one); head = fuse_dequeue_forget(fiq, max_forgets, &count); spin_unlock(&fiq->lock); @@ -1388,13 +1490,13 @@ __releases(fiq->lock) return ih.len; } -static int fuse_read_forget(struct fuse_conn *fc, struct fuse_iqueue *fiq, +static int fuse_read_forget(struct fuse_chan *fch, struct fuse_iqueue *fiq, struct fuse_copy_state *cs, size_t nbytes) __releases(fiq->lock) { - if (fc->minor < 16 || fiq->forget_list_head.next->next == NULL) - return fuse_read_single_forget(fiq, cs, nbytes); + if (fch->minor < 16 || fiq->forget_list_head.next->next == NULL) + return fuse_read_single_forget(fiq, cs); else return fuse_read_batch_forget(fiq, cs, nbytes); } @@ -1412,8 +1514,8 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, struct fuse_copy_state *cs, size_t nbytes) { ssize_t err; - struct fuse_conn *fc = fud->fc; - struct fuse_iqueue *fiq = &fc->iq; + struct fuse_chan *fch = fud->chan; + struct fuse_iqueue *fiq = &fch->iq; struct fuse_pqueue *fpq = &fud->pq; struct fuse_req *req; struct fuse_args *args; @@ -1435,7 +1537,7 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, if (nbytes < max_t(size_t, FUSE_MIN_READ_BUFFER, sizeof(struct fuse_in_header) + sizeof(struct fuse_write_in) + - fc->max_write)) + fch->max_write)) return -EINVAL; restart: @@ -1454,19 +1556,19 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, } if (!fiq->connected) { - err = fc->aborted ? -ECONNABORTED : -ENODEV; + err = fch->abort_with_err ? -ECONNABORTED : -ENODEV; goto err_unlock; } if (!list_empty(&fiq->interrupts)) { req = list_entry(fiq->interrupts.next, struct fuse_req, intr_entry); - return fuse_read_interrupt(fiq, cs, nbytes, req); + return fuse_read_interrupt(fiq, cs, req); } if (forget_pending(fiq)) { if (list_empty(&fiq->pending) || fiq->forget_batch-- > 0) - return fuse_read_forget(fc, fiq, cs, nbytes); + return fuse_read_forget(fch, fiq, cs, nbytes); if (fiq->forget_batch <= -8) fiq->forget_batch = 16; @@ -1492,7 +1594,7 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, spin_lock(&fpq->lock); /* * Must not put request on fpq->io queue after having been shut down by - * fuse_abort_conn() + * fuse_chan_abort() */ if (!fpq->connected) { req->out.h.error = err = -ECONNABORTED; @@ -1510,7 +1612,7 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file, spin_lock(&fpq->lock); clear_bit(FR_LOCKED, &req->flags); if (!fpq->connected) { - err = fc->aborted ? -ECONNABORTED : -ENODEV; + err = fch->abort_with_err ? -ECONNABORTED : -ENODEV; goto out_end; } if (err) { @@ -1548,7 +1650,7 @@ out_end: static int fuse_dev_open(struct inode *inode, struct file *file) { - struct fuse_dev *fud = fuse_dev_alloc(); + struct fuse_dev *fud = fuse_dev_alloc_no_pq(); if (!fud) return -ENOMEM; @@ -1562,9 +1664,15 @@ struct fuse_dev *fuse_get_dev(struct file *file) struct fuse_dev *fud = fuse_file_to_fud(file); int err; - err = wait_event_interruptible(fuse_dev_waitq, fuse_dev_fc_get(fud) != NULL); - if (err) - return ERR_PTR(err); + if (unlikely(!fuse_dev_chan_get(fud))) { + /* only block waiting for mount if sync init was requested */ + if (!fud->sync_init) + return ERR_PTR(-EPERM); + + err = wait_event_interruptible(fuse_dev_waitq, fuse_dev_chan_get(fud) != NULL); + if (err) + return ERR_PTR(err); + } return fud; } @@ -1636,355 +1744,6 @@ out: return ret; } -static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size, - struct fuse_copy_state *cs) -{ - struct fuse_notify_poll_wakeup_out outarg; - int err; - - if (size != sizeof(outarg)) - return -EINVAL; - - err = fuse_copy_one(cs, &outarg, sizeof(outarg)); - if (err) - return err; - - fuse_copy_finish(cs); - return fuse_notify_poll_wakeup(fc, &outarg); -} - -static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size, - struct fuse_copy_state *cs) -{ - struct fuse_notify_inval_inode_out outarg; - int err; - - if (size != sizeof(outarg)) - return -EINVAL; - - err = fuse_copy_one(cs, &outarg, sizeof(outarg)); - if (err) - return err; - fuse_copy_finish(cs); - - down_read(&fc->killsb); - err = fuse_reverse_inval_inode(fc, outarg.ino, - outarg.off, outarg.len); - up_read(&fc->killsb); - return err; -} - -static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size, - struct fuse_copy_state *cs) -{ - struct fuse_notify_inval_entry_out outarg; - int err; - char *buf; - struct qstr name; - - if (size < sizeof(outarg)) - return -EINVAL; - - err = fuse_copy_one(cs, &outarg, sizeof(outarg)); - if (err) - return err; - - if (outarg.namelen > fc->name_max) - return -ENAMETOOLONG; - - err = -EINVAL; - if (size != sizeof(outarg) + outarg.namelen + 1) - return -EINVAL; - - buf = kzalloc(outarg.namelen + 1, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - name.name = buf; - name.len = outarg.namelen; - err = fuse_copy_one(cs, buf, outarg.namelen + 1); - if (err) - goto err; - fuse_copy_finish(cs); - buf[outarg.namelen] = 0; - - down_read(&fc->killsb); - err = fuse_reverse_inval_entry(fc, outarg.parent, 0, &name, outarg.flags); - up_read(&fc->killsb); -err: - kfree(buf); - return err; -} - -static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size, - struct fuse_copy_state *cs) -{ - struct fuse_notify_delete_out outarg; - int err; - char *buf; - struct qstr name; - - if (size < sizeof(outarg)) - return -EINVAL; - - err = fuse_copy_one(cs, &outarg, sizeof(outarg)); - if (err) - return err; - - if (outarg.namelen > fc->name_max) - return -ENAMETOOLONG; - - if (size != sizeof(outarg) + outarg.namelen + 1) - return -EINVAL; - - buf = kzalloc(outarg.namelen + 1, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - name.name = buf; - name.len = outarg.namelen; - err = fuse_copy_one(cs, buf, outarg.namelen + 1); - if (err) - goto err; - fuse_copy_finish(cs); - buf[outarg.namelen] = 0; - - down_read(&fc->killsb); - err = fuse_reverse_inval_entry(fc, outarg.parent, outarg.child, &name, 0); - up_read(&fc->killsb); -err: - kfree(buf); - return err; -} - -static int fuse_notify_store(struct fuse_conn *fc, unsigned int size, - struct fuse_copy_state *cs) -{ - struct fuse_notify_store_out outarg; - struct inode *inode; - struct address_space *mapping; - u64 nodeid; - int err; - unsigned int num; - loff_t file_size; - loff_t pos; - loff_t end; - - if (size < sizeof(outarg)) - return -EINVAL; - - err = fuse_copy_one(cs, &outarg, sizeof(outarg)); - if (err) - return err; - - if (size - sizeof(outarg) != outarg.size) - return -EINVAL; - - if (outarg.offset >= MAX_LFS_FILESIZE) - return -EINVAL; - - nodeid = outarg.nodeid; - pos = outarg.offset; - num = min(outarg.size, MAX_LFS_FILESIZE - pos); - - down_read(&fc->killsb); - - err = -ENOENT; - inode = fuse_ilookup(fc, nodeid, NULL); - if (!inode) - goto out_up_killsb; - if (!S_ISREG(inode->i_mode)) { - err = -EINVAL; - goto out_iput; - } - - mapping = inode->i_mapping; - file_size = i_size_read(inode); - end = pos + num; - if (end > file_size) { - file_size = end; - fuse_write_update_attr(inode, file_size, num); - } - - while (num) { - struct folio *folio; - unsigned int folio_offset; - unsigned int nr_bytes; - pgoff_t index = pos >> PAGE_SHIFT; - - folio = filemap_grab_folio(mapping, index); - err = PTR_ERR(folio); - if (IS_ERR(folio)) - goto out_iput; - - folio_offset = offset_in_folio(folio, pos); - nr_bytes = min(num, folio_size(folio) - folio_offset); - - err = fuse_copy_folio(cs, &folio, folio_offset, nr_bytes, 0); - if (!folio_test_uptodate(folio) && !err && folio_offset == 0 && - (nr_bytes == folio_size(folio) || file_size == end)) { - folio_zero_segment(folio, nr_bytes, folio_size(folio)); - folio_mark_uptodate(folio); - } - folio_unlock(folio); - folio_put(folio); - - if (err) - goto out_iput; - - pos += nr_bytes; - num -= nr_bytes; - } - - err = 0; - -out_iput: - iput(inode); -out_up_killsb: - up_read(&fc->killsb); - return err; -} - -struct fuse_retrieve_args { - struct fuse_args_pages ap; - struct fuse_notify_retrieve_in inarg; -}; - -static void fuse_retrieve_end(struct fuse_mount *fm, struct fuse_args *args, - int error) -{ - struct fuse_retrieve_args *ra = - container_of(args, typeof(*ra), ap.args); - - release_pages(ra->ap.folios, ra->ap.num_folios); - kfree(ra); -} - -static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode, - struct fuse_notify_retrieve_out *outarg) -{ - int err; - struct address_space *mapping = inode->i_mapping; - loff_t file_size; - unsigned int num; - unsigned int offset; - size_t total_len = 0; - unsigned int num_pages; - struct fuse_conn *fc = fm->fc; - struct fuse_retrieve_args *ra; - size_t args_size = sizeof(*ra); - struct fuse_args_pages *ap; - struct fuse_args *args; - loff_t pos = outarg->offset; - - offset = offset_in_page(pos); - file_size = i_size_read(inode); - - num = min(outarg->size, fc->max_write); - if (pos > file_size) - num = 0; - else if (num > file_size - pos) - num = file_size - pos; - - num_pages = DIV_ROUND_UP(num + offset, PAGE_SIZE); - num_pages = min(num_pages, fc->max_pages); - num = min(num, num_pages << PAGE_SHIFT); - - args_size += num_pages * (sizeof(ap->folios[0]) + sizeof(ap->descs[0])); - - ra = kzalloc(args_size, GFP_KERNEL); - if (!ra) - return -ENOMEM; - - ap = &ra->ap; - ap->folios = (void *) (ra + 1); - ap->descs = (void *) (ap->folios + num_pages); - - args = &ap->args; - args->nodeid = outarg->nodeid; - args->opcode = FUSE_NOTIFY_REPLY; - args->in_numargs = 3; - args->in_pages = true; - args->end = fuse_retrieve_end; - - while (num && ap->num_folios < num_pages) { - struct folio *folio; - unsigned int folio_offset; - unsigned int nr_bytes; - pgoff_t index = pos >> PAGE_SHIFT; - - folio = filemap_get_folio(mapping, index); - if (IS_ERR(folio)) - break; - if (!folio_test_uptodate(folio)) { - folio_put(folio); - break; - } - - folio_offset = offset_in_folio(folio, pos); - nr_bytes = min(folio_size(folio) - folio_offset, num); - - ap->folios[ap->num_folios] = folio; - ap->descs[ap->num_folios].offset = folio_offset; - ap->descs[ap->num_folios].length = nr_bytes; - ap->num_folios++; - - pos += nr_bytes; - num -= nr_bytes; - total_len += nr_bytes; - } - ra->inarg.offset = outarg->offset; - ra->inarg.size = total_len; - fuse_set_zero_arg0(args); - args->in_args[1].size = sizeof(ra->inarg); - args->in_args[1].value = &ra->inarg; - args->in_args[2].size = total_len; - - err = fuse_simple_notify_reply(fm, args, outarg->notify_unique); - if (err) - fuse_retrieve_end(fm, args, err); - - return err; -} - -static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size, - struct fuse_copy_state *cs) -{ - struct fuse_notify_retrieve_out outarg; - struct fuse_mount *fm; - struct inode *inode; - u64 nodeid; - int err; - - if (size != sizeof(outarg)) - return -EINVAL; - - err = fuse_copy_one(cs, &outarg, sizeof(outarg)); - if (err) - return err; - - fuse_copy_finish(cs); - - if (outarg.offset >= MAX_LFS_FILESIZE) - return -EINVAL; - - down_read(&fc->killsb); - err = -ENOENT; - nodeid = outarg.nodeid; - - inode = fuse_ilookup(fc, nodeid, &fm); - if (inode) { - if (!S_ISREG(inode->i_mode)) - err = -EINVAL; - else - err = fuse_retrieve(fm, inode, &outarg); - iput(inode); - } - up_read(&fc->killsb); - - return err; -} - /* * Resending all processing queue requests. * @@ -1998,21 +1757,21 @@ static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size, * if the FUSE daemon takes careful measures to avoid processing duplicated * non-idempotent requests. */ -static void fuse_resend(struct fuse_conn *fc) +void fuse_chan_resend(struct fuse_chan *fch) { struct fuse_dev *fud; struct fuse_req *req, *next; - struct fuse_iqueue *fiq = &fc->iq; + struct fuse_iqueue *fiq = &fch->iq; LIST_HEAD(to_queue); unsigned int i; - spin_lock(&fc->lock); - if (!fc->connected) { - spin_unlock(&fc->lock); + spin_lock(&fch->lock); + if (!fch->connected) { + spin_unlock(&fch->lock); return; } - list_for_each_entry(fud, &fc->devices, entry) { + list_for_each_entry(fud, &fch->devices, entry) { struct fuse_pqueue *fpq = &fud->pq; spin_lock(&fpq->lock); @@ -2020,7 +1779,7 @@ static void fuse_resend(struct fuse_conn *fc) list_splice_tail_init(&fpq->processing[i], &to_queue); spin_unlock(&fpq->lock); } - spin_unlock(&fc->lock); + spin_unlock(&fch->lock); list_for_each_entry_safe(req, next, &to_queue, list) { set_bit(FR_PENDING, &req->flags); @@ -2042,108 +1801,6 @@ static void fuse_resend(struct fuse_conn *fc) fuse_dev_wake_and_unlock(fiq); } -static int fuse_notify_resend(struct fuse_conn *fc) -{ - fuse_resend(fc); - return 0; -} - -/* - * Increments the fuse connection epoch. This will result of dentries from - * previous epochs to be invalidated. Additionally, if inval_wq is set, a work - * queue is scheduled to trigger the invalidation. - */ -static int fuse_notify_inc_epoch(struct fuse_conn *fc) -{ - atomic_inc(&fc->epoch); - if (inval_wq) - schedule_work(&fc->epoch_work); - - return 0; -} - -static int fuse_notify_prune(struct fuse_conn *fc, unsigned int size, - struct fuse_copy_state *cs) -{ - struct fuse_notify_prune_out outarg; - const unsigned int batch = 512; - u64 *nodeids __free(kfree) = kmalloc(sizeof(u64) * batch, GFP_KERNEL); - unsigned int num, i; - int err; - - if (!nodeids) - return -ENOMEM; - - if (size < sizeof(outarg)) - return -EINVAL; - - err = fuse_copy_one(cs, &outarg, sizeof(outarg)); - if (err) - return err; - - if (size - sizeof(outarg) != outarg.count * sizeof(u64)) - return -EINVAL; - - for (; outarg.count; outarg.count -= num) { - num = min(batch, outarg.count); - err = fuse_copy_one(cs, nodeids, num * sizeof(u64)); - if (err) - return err; - - scoped_guard(rwsem_read, &fc->killsb) { - for (i = 0; i < num; i++) - fuse_try_prune_one_inode(fc, nodeids[i]); - } - } - return 0; -} - -static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, - unsigned int size, struct fuse_copy_state *cs) -{ - /* - * Only allow notifications during while the connection is in an - * initialized and connected state - */ - if (!fc->initialized || !fc->connected) - return -EINVAL; - - /* Don't try to move folios (yet) */ - cs->move_folios = false; - - switch (code) { - case FUSE_NOTIFY_POLL: - return fuse_notify_poll(fc, size, cs); - - case FUSE_NOTIFY_INVAL_INODE: - return fuse_notify_inval_inode(fc, size, cs); - - case FUSE_NOTIFY_INVAL_ENTRY: - return fuse_notify_inval_entry(fc, size, cs); - - case FUSE_NOTIFY_STORE: - return fuse_notify_store(fc, size, cs); - - case FUSE_NOTIFY_RETRIEVE: - return fuse_notify_retrieve(fc, size, cs); - - case FUSE_NOTIFY_DELETE: - return fuse_notify_delete(fc, size, cs); - - case FUSE_NOTIFY_RESEND: - return fuse_notify_resend(fc); - - case FUSE_NOTIFY_INC_EPOCH: - return fuse_notify_inc_epoch(fc); - - case FUSE_NOTIFY_PRUNE: - return fuse_notify_prune(fc, size, cs); - - default: - return -EINVAL; - } -} - /* Look up request on processing list by unique ID */ struct fuse_req *fuse_request_find(struct fuse_pqueue *fpq, u64 unique) { @@ -2196,7 +1853,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, struct fuse_copy_state *cs, size_t nbytes) { int err; - struct fuse_conn *fc = fud->fc; + struct fuse_chan *fch = fud->chan; struct fuse_pqueue *fpq = &fud->pq; struct fuse_req *req; struct fuse_out_header oh; @@ -2218,7 +1875,18 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, * and error contains notification code. */ if (!oh.unique) { - err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), cs); + /* + * Only allow notifications during while the connection is in an + * initialized and connected state + */ + err = -EINVAL; + if (!fch->initialized || !fch->connected) + goto copy_finish; + + /* Don't try to move folios (yet) */ + cs->move_folios = false; + + err = fuse_notify(fch->conn, oh.error, nbytes - sizeof(oh), cs); goto copy_finish; } @@ -2246,7 +1914,7 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud, if (nbytes != sizeof(struct fuse_out_header)) err = -EINVAL; else if (oh.error == -ENOSYS) - fc->no_interrupt = 1; + fch->no_interrupt = 1; else if (oh.error == -EAGAIN) err = queue_interrupt(req); @@ -2406,7 +2074,7 @@ static __poll_t fuse_dev_poll(struct file *file, poll_table *wait) if (IS_ERR(fud)) return EPOLLERR; - fiq = &fud->fc->iq; + fiq = &fud->chan->iq; poll_wait(file, &fiq->waitq, wait); spin_lock(&fiq->lock); @@ -2432,21 +2100,6 @@ void fuse_dev_end_requests(struct list_head *head) } } -static void end_polls(struct fuse_conn *fc) -{ - struct rb_node *p; - - p = rb_first(&fc->polled_files); - - while (p) { - struct fuse_file *ff; - ff = rb_entry(p, struct fuse_file, polled_node); - wake_up_interruptible_all(&ff->poll_wait); - - p = rb_next(p); - } -} - /* * Abort all requests. * @@ -2465,27 +2118,29 @@ static void end_polls(struct fuse_conn *fc) * is OK, the request will in that case be removed from the list before we touch * it. */ -void fuse_abort_conn(struct fuse_conn *fc) +void fuse_chan_abort(struct fuse_chan *fch, bool abort_with_err) { - struct fuse_iqueue *fiq = &fc->iq; + struct fuse_iqueue *fiq = &fch->iq; + + fch->abort_with_err = abort_with_err; - spin_lock(&fc->lock); - if (fc->connected) { + spin_lock(&fch->lock); + if (fch->connected) { struct fuse_dev *fud; struct fuse_req *req, *next; LIST_HEAD(to_end); unsigned int i; - if (fc->timeout.req_timeout) - cancel_delayed_work(&fc->timeout.work); + if (fch->timeout.req_timeout) + cancel_delayed_work(&fch->timeout.work); - /* Background queuing checks fc->connected under bg_lock */ - spin_lock(&fc->bg_lock); - fc->connected = 0; - spin_unlock(&fc->bg_lock); + /* Background queuing checks fch->connected under bg_lock */ + spin_lock(&fch->bg_lock); + fch->connected = 0; + spin_unlock(&fch->bg_lock); - fuse_set_initialized(fc); - list_for_each_entry(fud, &fc->devices, entry) { + fuse_chan_set_initialized(fch, 0); + list_for_each_entry(fud, &fch->devices, entry) { struct fuse_pqueue *fpq = &fud->pq; spin_lock(&fpq->lock); @@ -2506,11 +2161,11 @@ void fuse_abort_conn(struct fuse_conn *fc) &to_end); spin_unlock(&fpq->lock); } - spin_lock(&fc->bg_lock); - fc->blocked = 0; - fc->max_background = UINT_MAX; - flush_bg_queue(fc); - spin_unlock(&fc->bg_lock); + spin_lock(&fch->bg_lock); + fch->blocked = 0; + fch->max_background = UINT_MAX; + flush_bg_queue(fch); + spin_unlock(&fch->bg_lock); spin_lock(&fiq->lock); fiq->connected = 0; @@ -2522,39 +2177,39 @@ void fuse_abort_conn(struct fuse_conn *fc) wake_up_all(&fiq->waitq); spin_unlock(&fiq->lock); kill_fasync(&fiq->fasync, SIGIO, POLL_IN); - end_polls(fc); - wake_up_all(&fc->blocked_waitq); - spin_unlock(&fc->lock); + fuse_end_polls(fch->conn); + wake_up_all(&fch->blocked_waitq); + spin_unlock(&fch->lock); fuse_dev_end_requests(&to_end); /* - * fc->lock must not be taken to avoid conflicts with io-uring + * fch->lock must not be taken to avoid conflicts with io-uring * locks */ - fuse_uring_abort(fc); + fuse_uring_abort(fch); } else { - spin_unlock(&fc->lock); + spin_unlock(&fch->lock); } } -EXPORT_SYMBOL_GPL(fuse_abort_conn); +EXPORT_SYMBOL_GPL(fuse_chan_abort); -void fuse_wait_aborted(struct fuse_conn *fc) +void fuse_chan_wait_aborted(struct fuse_chan *fch) { /* matches implicit memory barrier in fuse_drop_waiting() */ smp_mb(); - wait_event(fc->blocked_waitq, atomic_read(&fc->num_waiting) == 0); + wait_event(fch->blocked_waitq, fuse_chan_num_waiting(fch) == 0); - fuse_uring_wait_stopped_queues(fc); + fuse_uring_wait_stopped_queues(fch); } int fuse_dev_release(struct inode *inode, struct file *file) { struct fuse_dev *fud = fuse_file_to_fud(file); /* Pairs with cmpxchg() in fuse_dev_install() */ - struct fuse_conn *fc = xchg(&fud->fc, FUSE_DEV_FC_DISCONNECTED); + struct fuse_chan *fch = xchg(&fud->chan, FUSE_DEV_CHAN_DISCONNECTED); - if (fc) { + if (fch) { struct fuse_pqueue *fpq = &fud->pq; LIST_HEAD(to_end); unsigned int i; @@ -2568,17 +2223,17 @@ int fuse_dev_release(struct inode *inode, struct file *file) fuse_dev_end_requests(&to_end); - spin_lock(&fc->lock); + spin_lock(&fch->lock); list_del(&fud->entry); /* Are we the last open device? */ - last = list_empty(&fc->devices); - spin_unlock(&fc->lock); + last = list_empty(&fch->devices); + spin_unlock(&fch->lock); if (last) { - WARN_ON(fc->iq.fasync != NULL); - fuse_abort_conn(fc); + WARN_ON(fch->iq.fasync != NULL); + fuse_chan_abort(fch, false); } - fuse_conn_put(fc); + fuse_conn_put(fch->conn); } fuse_dev_put(fud); return 0; @@ -2593,13 +2248,14 @@ static int fuse_dev_fasync(int fd, struct file *file, int on) return PTR_ERR(fud); /* No locking - fasync_helper does its own locking */ - return fasync_helper(fd, file, on, &fud->fc->iq.fasync); + return fasync_helper(fd, file, on, &fud->chan->iq.fasync); } static long fuse_dev_ioctl_clone(struct file *file, __u32 __user *argp) { int oldfd; struct fuse_dev *fud, *new_fud; + struct list_head *pq; if (get_user(oldfd, argp)) return -EFAULT; @@ -2619,12 +2275,14 @@ static long fuse_dev_ioctl_clone(struct file *file, __u32 __user *argp) if (IS_ERR(fud)) return PTR_ERR(fud); + pq = fuse_pqueue_alloc(); + if (!pq) + return -ENOMEM; + new_fud = fuse_file_to_fud(file); - if (fuse_dev_fc_get(new_fud)) + if (!fuse_dev_install_with_pq(new_fud, fud->chan, pq)) return -EINVAL; - fuse_dev_install(new_fud, fud->fc); - return 0; } @@ -2643,7 +2301,7 @@ static long fuse_dev_ioctl_backing_open(struct file *file, if (copy_from_user(&map, argp, sizeof(map))) return -EFAULT; - return fuse_backing_open(fud->fc, &map); + return fuse_backing_open(fud->chan->conn, &map); } static long fuse_dev_ioctl_backing_close(struct file *file, __u32 __user *argp) @@ -2660,21 +2318,18 @@ static long fuse_dev_ioctl_backing_close(struct file *file, __u32 __user *argp) if (get_user(backing_id, argp)) return -EFAULT; - return fuse_backing_close(fud->fc, backing_id); + return fuse_backing_close(fud->chan->conn, backing_id); } static long fuse_dev_ioctl_sync_init(struct file *file) { - int err = -EINVAL; struct fuse_dev *fud = fuse_file_to_fud(file); - mutex_lock(&fuse_mutex); - if (!fuse_dev_fc_get(fud)) { - fud->sync_init = true; - err = 0; - } - mutex_unlock(&fuse_mutex); - return err; + if (fuse_dev_chan_get(fud)) + return -EINVAL; + + fud->sync_init = true; + return 0; } static long fuse_dev_ioctl(struct file *file, unsigned int cmd, @@ -2707,7 +2362,7 @@ static void fuse_dev_show_fdinfo(struct seq_file *seq, struct file *file) if (!fud) return; - seq_printf(seq, "fuse_connection:\t%u\n", fud->fc->dev); + seq_printf(seq, "fuse_connection:\t%u\n", fuse_conn_get_id(fud->chan->conn)); } #endif diff --git a/fs/fuse/dev.h b/fs/fuse/dev.h new file mode 100644 index 0000000000000..aed69fd14c41d --- /dev/null +++ b/fs/fuse/dev.h @@ -0,0 +1,110 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _FS_FUSE_DEV_H +#define _FS_FUSE_DEV_H + +#include <linux/cleanup.h> + +/** Maximum number of outstanding background requests */ +#define FUSE_DEFAULT_MAX_BACKGROUND 12 + +struct fuse_conn; +struct fuse_chan; +struct fuse_dev; +struct fuse_args; +struct fuse_copy_state; +struct fuse_backing_map; +struct file; +struct folio; +enum fuse_notify_code; + +struct fuse_chan_param { + unsigned int minor; + unsigned int max_write; + unsigned int max_pages; +}; + +struct fuse_chan *fuse_chan_new(void); +struct fuse_chan *fuse_dev_chan_new(void); +void fuse_chan_release(struct fuse_chan *fch); +void fuse_chan_free(struct fuse_chan *fch); +unsigned int fuse_chan_num_background(struct fuse_chan *fch); +unsigned int fuse_chan_max_background(struct fuse_chan *fch); +void fuse_chan_max_background_set(struct fuse_chan *fch, unsigned int val); +unsigned int fuse_chan_num_waiting(struct fuse_chan *fch); +void fuse_chan_set_fc(struct fuse_chan *fch, struct fuse_conn *fc); +void fuse_chan_set_initialized(struct fuse_chan *fch, struct fuse_chan_param *param); +void fuse_chan_io_uring_enable(struct fuse_chan *fch); +ssize_t fuse_chan_send(struct fuse_chan *fch, struct fuse_args *args); +int fuse_chan_send_bg(struct fuse_chan *fch, struct fuse_args *args, gfp_t gfp_flags); +int fuse_chan_send_notify_reply(struct fuse_chan *fch, struct fuse_args *args, u64 unique); +void fuse_chan_resend(struct fuse_chan *fch); + +struct fuse_forget_link *fuse_alloc_forget(void); +void fuse_chan_queue_forget(struct fuse_chan *fch, struct fuse_forget_link *forget, + u64 nodeid, u64 nlookup); + +DEFINE_FREE(fuse_chan_free, struct fuse_chan *, if (_T) fuse_chan_free(_T)) + +/** + * Initialize the client device + */ +int fuse_dev_init(void); + +/** + * Cleanup the client device + */ +void fuse_dev_cleanup(void); + +void fuse_dev_install(struct fuse_dev *fud, struct fuse_chan *fch); +bool fuse_dev_verify(struct fuse_dev *fud, struct fuse_chan *fch); +void fuse_dev_put(struct fuse_dev *fud); +bool fuse_dev_is_installed(struct fuse_dev *fud); +bool fuse_dev_is_sync_init(struct fuse_dev *fud); +struct fuse_dev *fuse_dev_grab(struct file *file); + +void fuse_init_server_timeout(struct fuse_chan *fch, unsigned int timeout); + +/* Abort all requests */ +void fuse_chan_abort(struct fuse_chan *fch, bool abort_with_err); +void fuse_chan_wait_aborted(struct fuse_chan *fch); + +/** + * Acquire reference to fuse_conn + */ +struct fuse_conn *fuse_conn_get(struct fuse_conn *fc); + +/** + * Release reference to fuse_conn + */ +void fuse_conn_put(struct fuse_conn *fc); + +dev_t fuse_conn_get_id(struct fuse_conn *fc); + +void fuse_end_polls(struct fuse_conn *fc); +int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, + unsigned int size, struct fuse_copy_state *cs); + +int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map); +int fuse_backing_close(struct fuse_conn *fc, int backing_id); + +int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size); +int fuse_copy_folio(struct fuse_copy_state *cs, struct folio **foliop, + unsigned offset, unsigned count, int zeroing); +void fuse_copy_finish(struct fuse_copy_state *cs); + +#ifdef CONFIG_FUSE_IO_URING +bool fuse_uring_enabled(void); +void fuse_uring_destruct(struct fuse_chan *fch); +#else /* CONFIG_FUSE_IO_URING */ +static inline bool fuse_uring_enabled(void) +{ + return false; +} + +static inline void fuse_uring_destruct(struct fuse_chan *fch) +{ +} +#endif /* CONFIG_FUSE_IO_URING */ + +#endif /* _FS_FUSE_DEV_H */ diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c index 7b9822e8837bc..e467b23e68959 100644 --- a/fs/fuse/dev_uring.c +++ b/fs/fuse/dev_uring.c @@ -4,9 +4,9 @@ * Copyright (c) 2023-2024 DataDirect Networks. */ -#include "fuse_i.h" +#include "dev.h" +#include "args.h" #include "dev_uring_i.h" -#include "fuse_dev_i.h" #include "fuse_trace.h" #include <linux/fs.h> @@ -51,10 +51,10 @@ static struct fuse_ring_ent *uring_cmd_to_ring_ent(struct io_uring_cmd *cmd) static void fuse_uring_flush_bg(struct fuse_ring_queue *queue) { struct fuse_ring *ring = queue->ring; - struct fuse_conn *fc = ring->fc; + struct fuse_chan *fch = ring->chan; lockdep_assert_held(&queue->lock); - lockdep_assert_held(&fc->bg_lock); + lockdep_assert_held(&fch->bg_lock); /* * Allow one bg request per queue, ignoring global fc limits. @@ -62,14 +62,14 @@ static void fuse_uring_flush_bg(struct fuse_ring_queue *queue) * eliminates the need for remote queue wake-ups when global * limits are met but this queue has no more waiting requests. */ - while ((fc->active_background < fc->max_background || + while ((fch->active_background < fch->max_background || !queue->active_background) && (!list_empty(&queue->fuse_req_bg_queue))) { struct fuse_req *req; req = list_first_entry(&queue->fuse_req_bg_queue, struct fuse_req, list); - fc->active_background++; + fch->active_background++; queue->active_background++; list_move_tail(&req->list, &queue->fuse_req_queue); @@ -81,7 +81,7 @@ static void fuse_uring_req_end(struct fuse_ring_ent *ent, struct fuse_req *req, { struct fuse_ring_queue *queue = ent->queue; struct fuse_ring *ring = queue->ring; - struct fuse_conn *fc = ring->fc; + struct fuse_chan *fch = ring->chan; lockdep_assert_not_held(&queue->lock); spin_lock(&queue->lock); @@ -89,9 +89,9 @@ static void fuse_uring_req_end(struct fuse_ring_ent *ent, struct fuse_req *req, list_del_init(&req->list); if (test_bit(FR_BACKGROUND, &req->flags)) { queue->active_background--; - spin_lock(&fc->bg_lock); + spin_lock(&fch->bg_lock); fuse_uring_flush_bg(queue); - spin_unlock(&fc->bg_lock); + spin_unlock(&fch->bg_lock); } spin_unlock(&queue->lock); @@ -123,7 +123,7 @@ void fuse_uring_abort_end_requests(struct fuse_ring *ring) { int qid; struct fuse_ring_queue *queue; - struct fuse_conn *fc = ring->fc; + struct fuse_chan *fch = ring->chan; for (qid = 0; qid < ring->nr_queues; qid++) { queue = READ_ONCE(ring->queues[qid]); @@ -132,17 +132,17 @@ void fuse_uring_abort_end_requests(struct fuse_ring *ring) queue->stopped = true; - WARN_ON_ONCE(ring->fc->max_background != UINT_MAX); + WARN_ON_ONCE(fch->max_background != UINT_MAX); spin_lock(&queue->lock); - spin_lock(&fc->bg_lock); + spin_lock(&fch->bg_lock); fuse_uring_flush_bg(queue); - spin_unlock(&fc->bg_lock); + spin_unlock(&fch->bg_lock); spin_unlock(&queue->lock); fuse_uring_abort_end_queue_requests(queue); } } -static bool ent_list_request_expired(struct fuse_conn *fc, struct list_head *list) +static bool ent_list_request_expired(struct fuse_chan *fch, struct list_head *list) { struct fuse_ring_ent *ent; struct fuse_req *req; @@ -154,12 +154,12 @@ static bool ent_list_request_expired(struct fuse_conn *fc, struct list_head *lis req = ent->fuse_req; return time_is_before_jiffies(req->create_time + - fc->timeout.req_timeout); + fch->timeout.req_timeout); } -bool fuse_uring_request_expired(struct fuse_conn *fc) +bool fuse_uring_request_expired(struct fuse_chan *fch) { - struct fuse_ring *ring = fc->ring; + struct fuse_ring *ring = fch->ring; struct fuse_ring_queue *queue; int qid; @@ -172,10 +172,10 @@ bool fuse_uring_request_expired(struct fuse_conn *fc) continue; spin_lock(&queue->lock); - if (fuse_request_expired(fc, &queue->fuse_req_queue) || - fuse_request_expired(fc, &queue->fuse_req_bg_queue) || - ent_list_request_expired(fc, &queue->ent_w_req_queue) || - ent_list_request_expired(fc, &queue->ent_in_userspace)) { + if (fuse_request_expired(fch, &queue->fuse_req_queue) || + fuse_request_expired(fch, &queue->fuse_req_bg_queue) || + ent_list_request_expired(fch, &queue->ent_w_req_queue) || + ent_list_request_expired(fch, &queue->ent_in_userspace)) { spin_unlock(&queue->lock); return true; } @@ -185,9 +185,9 @@ bool fuse_uring_request_expired(struct fuse_conn *fc) return false; } -void fuse_uring_destruct(struct fuse_conn *fc) +void fuse_uring_destruct(struct fuse_chan *fch) { - struct fuse_ring *ring = fc->ring; + struct fuse_ring *ring = fch->ring; int qid; if (!ring) @@ -218,20 +218,20 @@ void fuse_uring_destruct(struct fuse_conn *fc) kfree(ring->queues); kfree(ring); - fc->ring = NULL; + fch->ring = NULL; } /* * Basic ring setup for this connection based on the provided configuration */ -static struct fuse_ring *fuse_uring_create(struct fuse_conn *fc) +static struct fuse_ring *fuse_uring_create(struct fuse_chan *fch) { struct fuse_ring *ring; size_t nr_queues = num_possible_cpus(); struct fuse_ring *res = NULL; size_t max_payload_size; - ring = kzalloc_obj(*fc->ring, GFP_KERNEL_ACCOUNT); + ring = kzalloc_obj(*ring, GFP_KERNEL_ACCOUNT); if (!ring) return NULL; @@ -240,25 +240,25 @@ static struct fuse_ring *fuse_uring_create(struct fuse_conn *fc) if (!ring->queues) goto out_err; - max_payload_size = max(FUSE_MIN_READ_BUFFER, fc->max_write); - max_payload_size = max(max_payload_size, fc->max_pages * PAGE_SIZE); + max_payload_size = max(FUSE_MIN_READ_BUFFER, fch->max_write); + max_payload_size = max(max_payload_size, fch->max_pages * PAGE_SIZE); - spin_lock(&fc->lock); - if (fc->ring) { + spin_lock(&fch->lock); + if (fch->ring) { /* race, another thread created the ring in the meantime */ - spin_unlock(&fc->lock); - res = fc->ring; + spin_unlock(&fch->lock); + res = fch->ring; goto out_err; } init_waitqueue_head(&ring->stop_waitq); ring->nr_queues = nr_queues; - ring->fc = fc; + ring->chan = fch; ring->max_payload_sz = max_payload_size; - smp_store_release(&fc->ring, ring); + smp_store_release(&fch->ring, ring); - spin_unlock(&fc->lock); + spin_unlock(&fch->lock); return ring; out_err: @@ -270,14 +270,14 @@ out_err: static struct fuse_ring_queue *fuse_uring_create_queue(struct fuse_ring *ring, int qid) { - struct fuse_conn *fc = ring->fc; + struct fuse_chan *fch = ring->chan; struct fuse_ring_queue *queue; struct list_head *pq; queue = kzalloc_obj(*queue, GFP_KERNEL_ACCOUNT); if (!queue) return NULL; - pq = kzalloc_objs(struct list_head, FUSE_PQ_HASH_SIZE); + pq = fuse_pqueue_alloc(); if (!pq) { kfree(queue); return NULL; @@ -295,12 +295,12 @@ static struct fuse_ring_queue *fuse_uring_create_queue(struct fuse_ring *ring, INIT_LIST_HEAD(&queue->fuse_req_bg_queue); INIT_LIST_HEAD(&queue->ent_released); - queue->fpq.processing = pq; fuse_pqueue_init(&queue->fpq); + queue->fpq.processing = pq; - spin_lock(&fc->lock); + spin_lock(&fch->lock); if (ring->queues[qid]) { - spin_unlock(&fc->lock); + spin_unlock(&fch->lock); kfree(queue->fpq.processing); kfree(queue); return ring->queues[qid]; @@ -310,7 +310,7 @@ static struct fuse_ring_queue *fuse_uring_create_queue(struct fuse_ring *ring, * write_once and lock as the caller mostly doesn't take the lock at all */ WRITE_ONCE(ring->queues[qid], queue); - spin_unlock(&fc->lock); + spin_unlock(&fch->lock); return queue; } @@ -531,8 +531,7 @@ static void fuse_uring_prepare_cancel(struct io_uring_cmd *cmd, int issue_flags, * Checks for errors and stores it into the request */ static int fuse_uring_out_header_has_err(struct fuse_out_header *oh, - struct fuse_req *req, - struct fuse_conn *fc) + struct fuse_req *req) { int err; @@ -812,7 +811,6 @@ static void fuse_uring_commit(struct fuse_ring_ent *ent, struct fuse_req *req, unsigned int issue_flags) { struct fuse_ring *ring = ent->queue->ring; - struct fuse_conn *fc = ring->fc; ssize_t err = 0; err = copy_from_user(&req->out.h, &ent->headers->in_out, @@ -822,7 +820,7 @@ static void fuse_uring_commit(struct fuse_ring_ent *ent, struct fuse_req *req, goto out; } - err = fuse_uring_out_header_has_err(&req->out.h, req, fc); + err = fuse_uring_out_header_has_err(&req->out.h, req); if (err) { /* req->out.h.error already set */ goto out; @@ -873,13 +871,13 @@ static int fuse_ring_ent_set_commit(struct fuse_ring_ent *ent) /* FUSE_URING_CMD_COMMIT_AND_FETCH handler */ static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags, - struct fuse_conn *fc) + struct fuse_chan *fch) { const struct fuse_uring_cmd_req *cmd_req = io_uring_sqe128_cmd(cmd->sqe, struct fuse_uring_cmd_req); struct fuse_ring_ent *ent; int err; - struct fuse_ring *ring = fc->ring; + struct fuse_ring *ring = fch->ring; struct fuse_ring_queue *queue; uint64_t commit_id = READ_ONCE(cmd_req->commit_id); unsigned int qid = READ_ONCE(cmd_req->qid); @@ -898,7 +896,7 @@ static int fuse_uring_commit_fetch(struct io_uring_cmd *cmd, int issue_flags, return err; fpq = &queue->fpq; - if (!READ_ONCE(fc->connected) || READ_ONCE(queue->stopped)) + if (!READ_ONCE(fch->connected) || READ_ONCE(queue->stopped)) return err; spin_lock(&queue->lock); @@ -981,8 +979,8 @@ static void fuse_uring_do_register(struct fuse_ring_ent *ent, { struct fuse_ring_queue *queue = ent->queue; struct fuse_ring *ring = queue->ring; - struct fuse_conn *fc = ring->fc; - struct fuse_iqueue *fiq = &fc->iq; + struct fuse_chan *fch = ring->chan; + struct fuse_iqueue *fiq = &fch->iq; fuse_uring_prepare_cancel(cmd, issue_flags, ent); @@ -997,7 +995,7 @@ static void fuse_uring_do_register(struct fuse_ring_ent *ent, if (ready) { WRITE_ONCE(fiq->ops, &fuse_io_uring_ops); WRITE_ONCE(ring->ready, true); - wake_up_all(&fc->blocked_waitq); + wake_up_all(&fch->blocked_waitq); } } } @@ -1078,11 +1076,11 @@ fuse_uring_create_ring_ent(struct io_uring_cmd *cmd, * entry as "ready to get fuse requests" on the queue */ static int fuse_uring_register(struct io_uring_cmd *cmd, - unsigned int issue_flags, struct fuse_conn *fc) + unsigned int issue_flags, struct fuse_chan *fch) { const struct fuse_uring_cmd_req *cmd_req = io_uring_sqe128_cmd(cmd->sqe, struct fuse_uring_cmd_req); - struct fuse_ring *ring = smp_load_acquire(&fc->ring); + struct fuse_ring *ring = smp_load_acquire(&fch->ring); struct fuse_ring_queue *queue; struct fuse_ring_ent *ent; int err; @@ -1090,7 +1088,7 @@ static int fuse_uring_register(struct io_uring_cmd *cmd, err = -ENOMEM; if (!ring) { - ring = fuse_uring_create(fc); + ring = fuse_uring_create(fch); if (!ring) return err; } @@ -1128,7 +1126,7 @@ static int fuse_uring_register(struct io_uring_cmd *cmd, int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) { struct fuse_dev *fud; - struct fuse_conn *fc; + struct fuse_chan *fch; u32 cmd_op = cmd->cmd_op; int err; @@ -1146,39 +1144,39 @@ int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) pr_info_ratelimited("No fuse device found\n"); return PTR_ERR(fud); } - fc = fud->fc; + fch = fud->chan; /* Once a connection has io-uring enabled on it, it can't be disabled */ - if (!enable_uring && !fc->io_uring) { + if (!enable_uring && !fch->io_uring) { pr_info_ratelimited("fuse-io-uring is disabled\n"); return -EOPNOTSUPP; } - if (fc->aborted) + if (fch->abort_with_err) return -ECONNABORTED; - if (!fc->connected) + if (!fch->connected) return -ENOTCONN; /* * fuse_uring_register() needs the ring to be initialized, * we need to know the max payload size */ - if (!fc->initialized) + if (!fch->initialized) return -EAGAIN; switch (cmd_op) { case FUSE_IO_URING_CMD_REGISTER: - err = fuse_uring_register(cmd, issue_flags, fc); + err = fuse_uring_register(cmd, issue_flags, fch); if (err) { pr_info_once("FUSE_IO_URING_CMD_REGISTER failed err=%d\n", err); - fc->io_uring = 0; - wake_up_all(&fc->blocked_waitq); + fch->io_uring = 0; + wake_up_all(&fch->blocked_waitq); return err; } break; case FUSE_IO_URING_CMD_COMMIT_AND_FETCH: - err = fuse_uring_commit_fetch(cmd, issue_flags, fc); + err = fuse_uring_commit_fetch(cmd, issue_flags, fch); if (err) { pr_info_once("FUSE_IO_URING_COMMIT_AND_FETCH failed err=%d\n", err); @@ -1261,8 +1259,7 @@ static void fuse_uring_dispatch_ent(struct fuse_ring_ent *ent) /* queue a fuse request and send it if a ring entry is available */ void fuse_uring_queue_fuse_req(struct fuse_iqueue *fiq, struct fuse_req *req) { - struct fuse_conn *fc = req->fm->fc; - struct fuse_ring *ring = fc->ring; + struct fuse_ring *ring = req->chan->ring; struct fuse_ring_queue *queue; struct fuse_ring_ent *ent = NULL; int err; @@ -1304,8 +1301,8 @@ err: bool fuse_uring_queue_bq_req(struct fuse_req *req) { - struct fuse_conn *fc = req->fm->fc; - struct fuse_ring *ring = fc->ring; + struct fuse_chan *fch = req->chan; + struct fuse_ring *ring = fch->ring; struct fuse_ring_queue *queue; struct fuse_ring_ent *ent = NULL; @@ -1325,12 +1322,12 @@ bool fuse_uring_queue_bq_req(struct fuse_req *req) ent = list_first_entry_or_null(&queue->ent_avail_queue, struct fuse_ring_ent, list); - spin_lock(&fc->bg_lock); - fc->num_background++; - if (fc->num_background == fc->max_background) - fc->blocked = 1; + spin_lock(&fch->bg_lock); + fch->num_background++; + if (fch->num_background == fch->max_background) + fch->blocked = 1; fuse_uring_flush_bg(queue); - spin_unlock(&fc->bg_lock); + spin_unlock(&fch->bg_lock); /* * Due to bg_queue flush limits there might be other bg requests diff --git a/fs/fuse/dev_uring_i.h b/fs/fuse/dev_uring_i.h index 51a563922ce14..368f4d0790ebb 100644 --- a/fs/fuse/dev_uring_i.h +++ b/fs/fuse/dev_uring_i.h @@ -7,7 +7,7 @@ #ifndef _FS_FUSE_DEV_URING_I_H #define _FS_FUSE_DEV_URING_I_H -#include "fuse_i.h" +#include "fuse_dev_i.h" #ifdef CONFIG_FUSE_IO_URING @@ -101,13 +101,13 @@ struct fuse_ring_queue { bool stopped; }; -/** +/* * Describes if uring is for communication and holds alls the data needed * for uring communication */ struct fuse_ring { /* back pointer */ - struct fuse_conn *fc; + struct fuse_chan *chan; /* number of ring queues */ size_t nr_queues; @@ -135,19 +135,17 @@ struct fuse_ring { bool ready; }; -bool fuse_uring_enabled(void); -void fuse_uring_destruct(struct fuse_conn *fc); void fuse_uring_stop_queues(struct fuse_ring *ring); void fuse_uring_abort_end_requests(struct fuse_ring *ring); int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags); void fuse_uring_queue_fuse_req(struct fuse_iqueue *fiq, struct fuse_req *req); bool fuse_uring_queue_bq_req(struct fuse_req *req); bool fuse_uring_remove_pending_req(struct fuse_req *req); -bool fuse_uring_request_expired(struct fuse_conn *fc); +bool fuse_uring_request_expired(struct fuse_chan *fch); -static inline void fuse_uring_abort(struct fuse_conn *fc) +static inline void fuse_uring_abort(struct fuse_chan *fch) { - struct fuse_ring *ring = fc->ring; + struct fuse_ring *ring = fch->ring; if (ring == NULL) return; @@ -158,40 +156,31 @@ static inline void fuse_uring_abort(struct fuse_conn *fc) } } -static inline void fuse_uring_wait_stopped_queues(struct fuse_conn *fc) +static inline void fuse_uring_wait_stopped_queues(struct fuse_chan *fch) { - struct fuse_ring *ring = fc->ring; + struct fuse_ring *ring = fch->ring; if (ring) wait_event(ring->stop_waitq, atomic_read(&ring->queue_refs) == 0); } -static inline bool fuse_uring_ready(struct fuse_conn *fc) +static inline bool fuse_uring_ready(struct fuse_chan *fch) { - return fc->ring && fc->ring->ready; + return fch->ring && fch->ring->ready; } #else /* CONFIG_FUSE_IO_URING */ -static inline void fuse_uring_destruct(struct fuse_conn *fc) +static inline void fuse_uring_abort(struct fuse_chan *fch) { } -static inline bool fuse_uring_enabled(void) -{ - return false; -} - -static inline void fuse_uring_abort(struct fuse_conn *fc) -{ -} - -static inline void fuse_uring_wait_stopped_queues(struct fuse_conn *fc) +static inline void fuse_uring_wait_stopped_queues(struct fuse_chan *fch) { } -static inline bool fuse_uring_ready(struct fuse_conn *fc) +static inline bool fuse_uring_ready(struct fuse_chan *fch) { return false; } @@ -201,7 +190,7 @@ static inline bool fuse_uring_remove_pending_req(struct fuse_req *req) return false; } -static inline bool fuse_uring_request_expired(struct fuse_conn *fc) +static inline bool fuse_uring_request_expired(struct fuse_chan *fch) { return false; } diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index b658b6baf72fe..be41c14ef3292 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -6,6 +6,7 @@ See the file COPYING. */ +#include "dev.h" #include "fuse_i.h" #include <linux/pagemap.h> @@ -317,7 +318,7 @@ void fuse_invalidate_attr(struct inode *inode) static void fuse_dir_changed(struct inode *dir) { - fuse_invalidate_attr(dir); + fuse_invalidate_attr_mask(dir, FUSE_STATX_MODDIR); inode_maybe_inc_iversion(dir, false); } @@ -430,7 +431,7 @@ static int fuse_dentry_revalidate(struct inode *dir, const struct qstr *name, fi = get_fuse_inode(inode); if (outarg.nodeid != get_node_id(inode) || (bool) IS_AUTOMOUNT(inode) != (bool) (outarg.attr.flags & FUSE_ATTR_SUBMOUNT)) { - fuse_queue_forget(fm->fc, forget, + fuse_chan_queue_forget(fm->fc->chan, forget, outarg.nodeid, 1); goto invalid; } @@ -593,7 +594,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name attr_version, evict_ctr); err = -ENOMEM; if (!*inode) { - fuse_queue_forget(fm->fc, forget, outarg->nodeid, 1); + fuse_chan_queue_forget(fm->fc->chan, forget, outarg->nodeid, 1); goto out; } err = 0; @@ -837,7 +838,6 @@ static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir, if (!forget) goto out_err; - err = -ENOMEM; ff = fuse_file_alloc(fm, true); if (!ff) goto out_put_forget_req; @@ -894,7 +894,7 @@ static int fuse_create_open(struct mnt_idmap *idmap, struct inode *dir, if (!inode) { flags &= ~(O_CREAT | O_EXCL | O_TRUNC); fuse_sync_release(NULL, ff, flags); - fuse_queue_forget(fm->fc, forget, outentry.nodeid, 1); + fuse_chan_queue_forget(fm->fc->chan, forget, outentry.nodeid, 1); err = -ENOMEM; goto out_err; } @@ -1019,7 +1019,7 @@ static struct dentry *create_new_entry(struct mnt_idmap *idmap, struct fuse_moun inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation, &outarg.attr, ATTR_TIMEOUT(&outarg), 0, 0); if (!inode) { - fuse_queue_forget(fm->fc, forget, outarg.nodeid, 1); + fuse_chan_queue_forget(fm->fc->chan, forget, outarg.nodeid, 1); return ERR_PTR(-ENOMEM); } kfree(forget); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index f94f3dc082c6b..9585d1be4a4ef 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -7,6 +7,7 @@ */ #include "fuse_i.h" +#include "dev.h" #include <linux/pagemap.h> #include <linux/slab.h> @@ -91,8 +92,7 @@ static struct fuse_file *fuse_file_get(struct fuse_file *ff) return ff; } -static void fuse_release_end(struct fuse_mount *fm, struct fuse_args *args, - int error) +static void fuse_release_end(struct fuse_args *args, int error) { struct fuse_release_args *ra = container_of(args, typeof(*ra), args); @@ -112,10 +112,10 @@ static void fuse_file_put(struct fuse_file *ff, bool sync) if (!args) { /* Do nothing when server does not implement 'opendir' */ } else if (args->opcode == FUSE_RELEASE && ff->fm->fc->no_open) { - fuse_release_end(ff->fm, args, 0); + fuse_release_end(args, 0); } else if (sync) { fuse_simple_request(ff->fm, args); - fuse_release_end(ff->fm, args, 0); + fuse_release_end(args, 0); } else { /* * DAX inodes may need to issue a number of synchronous @@ -126,7 +126,7 @@ static void fuse_file_put(struct fuse_file *ff, bool sync) args->end = fuse_release_end; if (fuse_simple_background(ff->fm, args, GFP_KERNEL | __GFP_NOFAIL)) - fuse_release_end(ff->fm, args, -ENOTCONN); + fuse_release_end(args, -ENOTCONN); } kfree(ff); } @@ -380,8 +380,14 @@ void fuse_file_release(struct inode *inode, struct fuse_file *ff, * aio and closes the fd before the aio completes. Since aio takes its * own ref to the file, the IO completion has to drop the ref, which is * how the fuse server can end up closing its clients' files. + * + * Exception is virtio-fs, which is not affected by the above (server is + * on host, cannot close open files in guest). Virtio-fs needs sync + * release, because the num_waiting mechanism to wait for all requests + * before commencing with fs shutdown doesn't work if submounts are + * used. */ - fuse_file_put(ff, false); + fuse_file_put(ff, ff->fm->fc->auto_submounts); } void fuse_release_common(struct file *file, bool isdir) @@ -709,8 +715,7 @@ static void fuse_io_free(struct fuse_io_args *ia) kfree(ia); } -static void fuse_aio_complete_req(struct fuse_mount *fm, struct fuse_args *args, - int err) +static void fuse_aio_complete_req(struct fuse_args *args, int err) { struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args); struct fuse_io_priv *io = ia->io; @@ -758,7 +763,7 @@ static ssize_t fuse_async_req_send(struct fuse_mount *fm, ia->ap.args.may_block = io->should_dirty; err = fuse_simple_background(fm, &ia->ap.args, GFP_KERNEL); if (err) - fuse_aio_complete_req(fm, &ia->ap.args, err); + fuse_aio_complete_req(&ia->ap.args, err); return num_bytes; } @@ -902,7 +907,7 @@ static int fuse_handle_readahead(struct folio *folio, ia = NULL; } if (!ia) { - if (fc->num_background >= fc->congestion_threshold && + if (fuse_chan_num_background(fc->chan) >= fc->congestion_threshold && rac->ra->async_size >= readahead_count(rac)) /* * Congested and only async pages left, so skip the @@ -1001,8 +1006,7 @@ static int fuse_iomap_read_folio_range(const struct iomap_iter *iter, return fuse_do_readfolio(file, folio, off, len); } -static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args, - int err) +static void fuse_readpages_end(struct fuse_args *args, int err) { int i; struct fuse_io_args *ia = container_of(args, typeof(*ia), ap.args); @@ -1068,7 +1072,7 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file, res = fuse_simple_request(fm, &ap->args); err = res < 0 ? res : 0; } - fuse_readpages_end(fm, &ap->args, err); + fuse_readpages_end(&ap->args, err); } static void fuse_readahead(struct readahead_control *rac) @@ -1985,8 +1989,7 @@ __acquires(fi->lock) } } -static void fuse_writepage_end(struct fuse_mount *fm, struct fuse_args *args, - int error) +static void fuse_writepage_end(struct fuse_args *args, int error) { struct fuse_writepage_args *wpa = container_of(args, typeof(*wpa), ia.ap.args); @@ -2297,7 +2300,7 @@ static int fuse_writepages(struct address_space *mapping, return -EIO; if (wbc->sync_mode == WB_SYNC_NONE && - fc->num_background >= fc->congestion_threshold) + fuse_chan_num_background(fc->chan) >= fc->congestion_threshold) return 0; return iomap_writepages(&wpc); @@ -2683,125 +2686,6 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int whence) return retval; } -/* - * All files which have been polled are linked to RB tree - * fuse_conn->polled_files which is indexed by kh. Walk the tree and - * find the matching one. - */ -static struct rb_node **fuse_find_polled_node(struct fuse_conn *fc, u64 kh, - struct rb_node **parent_out) -{ - struct rb_node **link = &fc->polled_files.rb_node; - struct rb_node *last = NULL; - - while (*link) { - struct fuse_file *ff; - - last = *link; - ff = rb_entry(last, struct fuse_file, polled_node); - - if (kh < ff->kh) - link = &last->rb_left; - else if (kh > ff->kh) - link = &last->rb_right; - else - return link; - } - - if (parent_out) - *parent_out = last; - return link; -} - -/* - * The file is about to be polled. Make sure it's on the polled_files - * RB tree. Note that files once added to the polled_files tree are - * not removed before the file is released. This is because a file - * polled once is likely to be polled again. - */ -static void fuse_register_polled_file(struct fuse_conn *fc, - struct fuse_file *ff) -{ - spin_lock(&fc->lock); - if (RB_EMPTY_NODE(&ff->polled_node)) { - struct rb_node **link, *parent; - - link = fuse_find_polled_node(fc, ff->kh, &parent); - BUG_ON(*link); - rb_link_node(&ff->polled_node, parent, link); - rb_insert_color(&ff->polled_node, &fc->polled_files); - } - spin_unlock(&fc->lock); -} - -__poll_t fuse_file_poll(struct file *file, poll_table *wait) -{ - struct fuse_file *ff = file->private_data; - struct fuse_mount *fm = ff->fm; - struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh }; - struct fuse_poll_out outarg; - FUSE_ARGS(args); - int err; - - if (fm->fc->no_poll) - return DEFAULT_POLLMASK; - - poll_wait(file, &ff->poll_wait, wait); - inarg.events = mangle_poll(poll_requested_events(wait)); - - /* - * Ask for notification iff there's someone waiting for it. - * The client may ignore the flag and always notify. - */ - if (waitqueue_active(&ff->poll_wait)) { - inarg.flags |= FUSE_POLL_SCHEDULE_NOTIFY; - fuse_register_polled_file(fm->fc, ff); - } - - args.opcode = FUSE_POLL; - args.nodeid = ff->nodeid; - args.in_numargs = 1; - args.in_args[0].size = sizeof(inarg); - args.in_args[0].value = &inarg; - args.out_numargs = 1; - args.out_args[0].size = sizeof(outarg); - args.out_args[0].value = &outarg; - err = fuse_simple_request(fm, &args); - - if (!err) - return demangle_poll(outarg.revents); - if (err == -ENOSYS) { - fm->fc->no_poll = 1; - return DEFAULT_POLLMASK; - } - return EPOLLERR; -} -EXPORT_SYMBOL_GPL(fuse_file_poll); - -/* - * This is called from fuse_handle_notify() on FUSE_NOTIFY_POLL and - * wakes up the poll waiters. - */ -int fuse_notify_poll_wakeup(struct fuse_conn *fc, - struct fuse_notify_poll_wakeup_out *outarg) -{ - u64 kh = outarg->kh; - struct rb_node **link; - - spin_lock(&fc->lock); - - link = fuse_find_polled_node(fc, kh, NULL); - if (*link) { - struct fuse_file *ff; - - ff = rb_entry(*link, struct fuse_file, polled_node); - wake_up_interruptible_sync(&ff->poll_wait); - } - - spin_unlock(&fc->lock); - return 0; -} - static void fuse_do_truncate(struct file *file) { struct inode *inode = file->f_mapping->host; diff --git a/fs/fuse/fuse_dev_i.h b/fs/fuse/fuse_dev_i.h index 910f883cd090f..9ce987826deda 100644 --- a/fs/fuse/fuse_dev_i.h +++ b/fs/fuse/fuse_dev_i.h @@ -6,20 +6,311 @@ #ifndef _FS_FUSE_DEV_I_H #define _FS_FUSE_DEV_I_H +#include <linux/fuse.h> #include <linux/types.h> +#include <linux/refcount.h> +#include <linux/wait.h> +#include <linux/workqueue.h> +#include <linux/fs.h> /* Ordinary requests have even IDs, while interrupts IDs are odd */ #define FUSE_INT_REQ_BIT (1ULL << 0) #define FUSE_REQ_ID_STEP (1ULL << 1) -extern struct wait_queue_head fuse_dev_waitq; - struct fuse_arg; struct fuse_args; struct fuse_pqueue; -struct fuse_req; struct fuse_iqueue; -struct fuse_forget_link; + +/** + * enum fuse_req_flag - Request flags + * + * @FR_ISREPLY: set if the request has reply + * @FR_FORCE: force sending of the request even if interrupted + * @FR_BACKGROUND: request is sent in the background + * @FR_WAITING: request is counted as "waiting" + * @FR_ABORTED: the request was aborted + * @FR_INTERRUPTED: the request has been interrupted + * @FR_LOCKED: data is being copied to/from the request + * @FR_PENDING: request is not yet in userspace + * @FR_SENT: request is in userspace, waiting for an answer + * @FR_FINISHED: request is finished + * @FR_PRIVATE: request is on private list + * @FR_ASYNC: request is asynchronous + * @FR_URING: request is handled through fuse-io-uring + */ +enum fuse_req_flag { + FR_ISREPLY, + FR_FORCE, + FR_BACKGROUND, + FR_WAITING, + FR_ABORTED, + FR_INTERRUPTED, + FR_LOCKED, + FR_PENDING, + FR_SENT, + FR_FINISHED, + FR_PRIVATE, + FR_ASYNC, + FR_URING, +}; + +/** + * struct fuse_req - A request to the client + * + * .waitq.lock protects the following fields: + * - FR_ABORTED + * - FR_LOCKED (may also be modified under fpq->lock, tested under both) + */ +struct fuse_req { + /** + * @list: This can be on either pending processing or io lists in + * fuse_conn + */ + struct list_head list; + + /** @intr_entry: Entry on the interrupts list */ + struct list_head intr_entry; + + /** @args: Input/output arguments */ + struct fuse_args *args; + + /** @count: refcount */ + refcount_t count; + + /** @flags: Request flags, updated with test/set/clear_bit() */ + unsigned long flags; + + /** @in: The request input header */ + struct { + /** @in.h: The request input header */ + struct fuse_in_header h; + } in; + + /** @out: The request output header */ + struct { + /** @out.h: The request output header */ + struct fuse_out_header h; + } out; + + /** @waitq: Used to wake up the task waiting for completion of request */ + wait_queue_head_t waitq; + +#if IS_ENABLED(CONFIG_VIRTIO_FS) + /** + * @argbuf: virtio-fs's physically contiguous buffer for in and out + * args + */ + void *argbuf; +#endif + + /** @chan: fuse_chan this request belongs to */ + struct fuse_chan *chan; + +#ifdef CONFIG_FUSE_IO_URING + void *ring_entry; + void *ring_queue; +#endif + /** @create_time: When (in jiffies) the request was created */ + unsigned long create_time; +}; + +/* One forget request */ +struct fuse_forget_link { + struct fuse_forget_one forget_one; + struct fuse_forget_link *next; +}; + +/** + * struct fuse_iqueue_ops - Input queue callbacks + * + * Input queue signalling is device-specific. For example, the /dev/fuse file + * uses fiq->waitq and fasync to wake processes that are waiting on queue + * readiness. These callbacks allow other device types to respond to input + * queue activity. + */ +struct fuse_iqueue_ops { + /** + * @send_forget: Send one forget + */ + void (*send_forget)(struct fuse_iqueue *fiq, struct fuse_forget_link *link); + + /** + * @send_interrupt: Send interrupt for request + */ + void (*send_interrupt)(struct fuse_iqueue *fiq, struct fuse_req *req); + + /** + * @send_req: Send one request + */ + void (*send_req)(struct fuse_iqueue *fiq, struct fuse_req *req); + + /** + * @release: Clean up when fuse_iqueue is destroyed + */ + void (*release)(struct fuse_iqueue *fiq); +}; + +struct fuse_iqueue { + /** Connection established */ + unsigned connected; + + /** Lock protecting accesses to members of this structure */ + spinlock_t lock; + + /** Readers of the connection are waiting on this */ + wait_queue_head_t waitq; + + /** The next unique request id */ + u64 reqctr; + + /** The list of pending requests */ + struct list_head pending; + + /** Pending interrupts */ + struct list_head interrupts; + + /** Queue of pending forgets */ + struct fuse_forget_link forget_list_head; + struct fuse_forget_link *forget_list_tail; + + /** Batching of FORGET requests (positive indicates FORGET batch) */ + int forget_batch; + + /** O_ASYNC requests */ + struct fasync_struct *fasync; + + /** Device-specific callbacks */ + const struct fuse_iqueue_ops *ops; + + /** Device-specific state */ + void *priv; +}; + +struct fuse_chan { + /** Lock protecting: + - devices + - connected + - ring + - ring->queues[qid] + */ + spinlock_t lock; + + /* back pointer: fc->chan->conn == fc */ + struct fuse_conn *conn; + + /** Input queue */ + struct fuse_iqueue iq; + + /** List of device instances belonging to this connection */ + struct list_head devices; + + /** Maximum number of outstanding background requests */ + unsigned max_background; + + /** Number of requests currently in the background */ + unsigned num_background; + + /** Number of background requests currently queued for userspace */ + unsigned active_background; + + /** The list of background requests set aside for later queuing */ + struct list_head bg_queue; + + /** Protects: max_background, num_background, active_background, bg_queue, blocked */ + spinlock_t bg_lock; + + /** Flag indicating that INIT reply has been received. Allocating + * any fuse request will be suspended until the flag is set */ + int initialized; + + /** Flag indicating if connection is blocked. This will be + the case before the INIT reply is received, and if there + are too many outstading backgrounds requests */ + int blocked; + + /** waitq for blocked connection */ + wait_queue_head_t blocked_waitq; + + /** Connection established, cleared on umount, connection + abort and device release */ + unsigned connected; + + /** The number of requests waiting for completion */ + atomic_t num_waiting; + + /** Is interrupt not implemented by fs? */ + bool no_interrupt; + + /* Use io_uring for communication */ + unsigned int io_uring; + + /* Negotiated minor version */ + unsigned int minor; + + /* Maximum write size */ + unsigned int max_write; + + /* Maximum number of pages that can be used in a single request */ + unsigned int max_pages; + + /* Before being installed into fud, contains the preallocated pq array*/ + struct list_head *pq_prealloc; + + /** Connection aborted via sysfs, respond with ECONNABORTED on device I/O */ + bool abort_with_err; + +#ifdef CONFIG_FUSE_IO_URING + /** uring connection information*/ + struct fuse_ring *ring; +#endif + + /** Only used if the connection opts into request timeouts */ + struct { + /* Worker for checking if any requests have timed out */ + struct delayed_work work; + + /* Request timeout (in jiffies). 0 = no timeout */ + unsigned int req_timeout; + } timeout; +}; + +#define FUSE_PQ_HASH_BITS 8 +#define FUSE_PQ_HASH_SIZE (1 << FUSE_PQ_HASH_BITS) + +struct fuse_pqueue { + /** Connection established */ + unsigned connected; + + /** Lock protecting accessess to members of this structure */ + spinlock_t lock; + + /** Hash table of requests being processed */ + struct list_head *processing; + + /** The list of requests under I/O */ + struct list_head io; +}; + +/** + * struct fuse_dev - Fuse device instance + */ +struct fuse_dev { + /** @ref: Reference count of this object */ + refcount_t ref; + + /** @sync_init: Issue FUSE_INIT synchronously */ + bool sync_init; + + /** @chan: Fuse channel for this device */ + struct fuse_chan *chan; + + /** @pq: Processing queue */ + struct fuse_pqueue pq; + + /** @entry: list entry on fch->devices */ + struct list_head entry; +}; struct fuse_copy_state { struct fuse_req *req; @@ -39,21 +330,21 @@ struct fuse_copy_state { } ring; }; -/* fud->fc gets assigned to this value when /dev/fuse is closed */ -#define FUSE_DEV_FC_DISCONNECTED ((struct fuse_conn *) 1) +/* fud->chan gets assigned to this value when /dev/fuse is closed */ +#define FUSE_DEV_CHAN_DISCONNECTED ((struct fuse_chan *) 1) /* - * Lockless access is OK, because fud->fc is set once during mount and is valid + * Lockless access is OK, because fud->chan is set once during mount and is valid * until the file is released. * - * fud->fc is set to FUSE_DEV_FC_DISCONNECTED only after the containing file is + * fud->chan is set to FUSE_DEV_CHAN_DISCONNECTED only after the containing file is * released, so result is safe to dereference in most cases. Exceptions are: * fuse_dev_put() and fuse_fill_super_common(). */ -static inline struct fuse_conn *fuse_dev_fc_get(struct fuse_dev *fud) +static inline struct fuse_chan *fuse_dev_chan_get(struct fuse_dev *fud) { /* Pairs with xchg() in fuse_dev_install() */ - return smp_load_acquire(&fud->fc); + return smp_load_acquire(&fud->chan); } static inline struct fuse_dev *fuse_file_to_fud(struct file *file) @@ -65,12 +356,14 @@ static inline struct fuse_dev *__fuse_get_dev(struct file *file) { struct fuse_dev *fud = fuse_file_to_fud(file); - if (!fuse_dev_fc_get(fud)) + if (!fuse_dev_chan_get(fud)) return NULL; return fud; } +void fuse_iqueue_init(struct fuse_iqueue *fiq, const struct fuse_iqueue_ops *ops, void *priv); + struct fuse_dev *fuse_get_dev(struct file *file); unsigned int fuse_req_hash(u64 unique); @@ -80,7 +373,11 @@ void fuse_dev_end_requests(struct list_head *head); void fuse_copy_init(struct fuse_copy_state *cs, bool write, struct iov_iter *iter); -void fuse_copy_finish(struct fuse_copy_state *cs); +/* + * Return the number of bytes in an arguments list + */ +unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args); + int fuse_copy_args(struct fuse_copy_state *cs, unsigned int numargs, unsigned int argpages, struct fuse_arg *args, int zeroing); @@ -91,7 +388,34 @@ void fuse_dev_queue_forget(struct fuse_iqueue *fiq, void fuse_dev_queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req); bool fuse_remove_pending_req(struct fuse_req *req, spinlock_t *lock); -bool fuse_request_expired(struct fuse_conn *fc, struct list_head *list); +bool fuse_request_expired(struct fuse_chan *fch, struct list_head *list); + +/* + * Assign a unique id to a fuse request + */ +void fuse_request_assign_unique(struct fuse_iqueue *fiq, struct fuse_req *req); + +/* + * Get the next unique ID for a request + */ +u64 fuse_get_unique(struct fuse_iqueue *fiq); + +struct fuse_dev *fuse_dev_alloc_install(struct fuse_chan *fch); +struct fuse_dev *fuse_dev_alloc(void); + +int fuse_dev_release(struct inode *inode, struct file *file); + +struct list_head *fuse_pqueue_alloc(void); + +/* + * Initialize the fuse processing queue + */ +void fuse_pqueue_init(struct fuse_pqueue *fpq); + +/* + * End a finished request + */ +void fuse_request_end(struct fuse_req *req); #endif diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 17423d4e3cfa6..55a3841b28899 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -13,6 +13,7 @@ # define pr_fmt(fmt) "fuse: " fmt #endif +#include "args.h" #include <linux/fuse.h> #include <linux/fs.h> #include <linux/mount.h> @@ -48,12 +49,6 @@ /** Number of dentries for each connection in the control filesystem */ #define FUSE_CTL_NUM_DENTRIES 5 -/* Frequency (in seconds) of request timeout checks, if opted into */ -#define FUSE_TIMEOUT_TIMER_FREQ 15 - -/** Frequency (in jiffies) of request timeout checks, if opted into */ -extern const unsigned long fuse_timeout_timer_freq; - /* * Dentries invalidation workqueue period, in seconds. The value of this * parameter shall be >= FUSE_DENTRY_INVAL_FREQ_MIN seconds, or 0 (zero), in @@ -63,16 +58,6 @@ extern unsigned inval_wq __read_mostly; /** Maximum of max_pages received in init_out */ extern unsigned int fuse_max_pages_limit; -/* - * Default timeout (in seconds) for the server to reply to a request - * before the connection is aborted, if no timeout was specified on mount. - */ -extern unsigned int fuse_default_req_timeout; -/* - * Max timeout (in seconds) for the server to reply to a request before - * the connection is aborted. - */ -extern unsigned int fuse_max_req_timeout; /** List of active connections */ extern struct list_head fuse_conn_list; @@ -84,143 +69,168 @@ extern struct mutex fuse_mutex; extern unsigned int max_user_bgreq; extern unsigned int max_user_congthresh; -/* One forget request */ -struct fuse_forget_link { - struct fuse_forget_one forget_one; - struct fuse_forget_link *next; -}; +struct fuse_forget_link; -/* Submount lookup tracking */ +/** + * struct fuse_submount_lookup - Submount lookup tracking + */ struct fuse_submount_lookup { - /** Refcount */ + /** @count: Refcount */ refcount_t count; - /** Unique ID, which identifies the inode between userspace - * and kernel */ + /** + * @nodeid: Unique ID, which identifies the inode between userspace + * and kernel + */ u64 nodeid; - /** The request used for sending the FORGET message */ + /** @forget: The request used for sending the FORGET message */ struct fuse_forget_link *forget; }; -/** Container for data related to mapping to backing file */ +/* Container for data related to mapping to backing file */ struct fuse_backing { struct file *file; - struct cred *cred; + const struct cred *cred; - /** refcount */ + /* refcount */ refcount_t count; struct rcu_head rcu; }; -/** FUSE inode */ +/** + * struct fuse_inode - FUSE inode + */ struct fuse_inode { - /** Inode data */ + /** @inode: Inode data */ struct inode inode; - /** Unique ID, which identifies the inode between userspace - * and kernel */ + /** + * @nodeid: Unique ID, which identifies the inode between userspace + * and kernel + */ u64 nodeid; - /** Number of lookups on this inode */ + /** @nlookup: Number of lookups on this inode */ u64 nlookup; - /** The request used for sending the FORGET message */ + /** @forget: The request used for sending the FORGET message */ struct fuse_forget_link *forget; - /** Time in jiffies until the file attributes are valid */ + /** @i_time: Time in jiffies until the file attributes are valid */ u64 i_time; - /* Which attributes are invalid */ + /** @inval_mask: Which attributes are invalid */ u32 inval_mask; - /** The sticky bit in inode->i_mode may have been removed, so - preserve the original mode */ + /** + * @orig_i_mode: The sticky bit in inode->i_mode may have been removed, + * so preserve the original mode + */ umode_t orig_i_mode; - /* Cache birthtime */ + /** @i_btime: Cache birthtime */ struct timespec64 i_btime; - /** 64 bit inode number */ + /** @orig_ino: 64-bit inode number */ u64 orig_ino; - /** Version of last attribute change */ + /** @attr_version: Version of last attribute change */ u64 attr_version; union { /* read/write io cache (regular file only) */ struct { - /* Files usable in writepage. Protected by fi->lock */ + /** + * @write_files: Files usable in writepage. + * Protected by fi->lock + */ struct list_head write_files; - /* Writepages pending on truncate or fsync */ + /** + * @queued_writes: Writepages pending on truncate or + * fsync + */ struct list_head queued_writes; - /* Number of sent writes, a negative bias - * (FUSE_NOWRITE) means more writes are blocked */ + /** + * @writectr: Number of sent writes, a negative bias + * (FUSE_NOWRITE) means more writes are blocked + */ int writectr; - /** Number of files/maps using page cache */ + /** @iocachectr: Number of files/maps using page cache */ int iocachectr; - /* Waitq for writepage completion */ + /** @page_waitq: Waitq for writepage completion */ wait_queue_head_t page_waitq; - /* waitq for direct-io completion */ + /** @direct_io_waitq: waitq for direct-io completion */ wait_queue_head_t direct_io_waitq; }; - /* readdir cache (directory only) */ + /** @rdc: readdir cache (directory only) */ struct { - /* true if fully cached */ + /** @cached: true if fully cached */ bool cached; - /* size of cache */ + /** @size: size of cache */ loff_t size; - /* position at end of cache (position of next entry) */ + /** + * @pos: position at end of cache (position of next + * entry) + */ loff_t pos; - /* version of the cache */ + /** @version: version of the cache */ u64 version; - /* modification time of directory when cache was - * started */ + /** + * @mtime: modification time of directory when cache was + * started + */ struct timespec64 mtime; - /* iversion of directory when cache was started */ + /** + * @iversion: iversion of directory when cache was + * started + */ u64 iversion; - /* protects above fields */ + /** @lock: protects above fields */ spinlock_t lock; } rdc; }; - /** Miscellaneous bits describing inode state */ + /** @state: Miscellaneous bits describing inode state */ unsigned long state; - /** Lock for serializing lookup and readdir for back compatibility*/ + /** + * @mutex: Lock for serializing lookup and readdir for back + * compatibility + */ struct mutex mutex; - /** Lock to protect write related fields */ + /** @lock: Lock to protect write-related fields */ spinlock_t lock; #ifdef CONFIG_FUSE_DAX - /* - * Dax specific inode data + /** + * @dax: Dax specific inode data */ struct fuse_inode_dax *dax; #endif - /** Submount specific lookup tracking */ + /** @submount_lookup: Submount specific lookup tracking */ struct fuse_submount_lookup *submount_lookup; #ifdef CONFIG_FUSE_PASSTHROUGH - /** Reference to backing file in passthrough mode */ + /** @fb: Reference to backing file in passthrough mode */ struct fuse_backing *fb; #endif - /* - * The underlying inode->i_blkbits value will not be modified, - * so preserve the blocksize specified by the server. + /** + * @cached_i_blkbits: The underlying inode->i_blkbits value will not + * be modified, so preserve the blocksize specified by the server. */ u8 cached_i_blkbits; }; @@ -250,116 +260,67 @@ struct fuse_conn; struct fuse_mount; union fuse_file_args; -/** FUSE specific file data */ +/** + * struct fuse_file - FUSE-specific file data + */ struct fuse_file { - /** Fuse connection for this file */ + /** @fm: Fuse connection for this file */ struct fuse_mount *fm; - /* Argument space reserved for open/release */ + /** @args: Argument space reserved for open/release */ union fuse_file_args *args; - /** Kernel file handle guaranteed to be unique */ + /** @kh: Kernel file handle guaranteed to be unique */ u64 kh; - /** File handle used by userspace */ + /** @fh: File handle used by userspace */ u64 fh; - /** Node id of this file */ + /** @nodeid: Node id of this file */ u64 nodeid; - /** Refcount */ + /** @count: Refcount */ refcount_t count; - /** FOPEN_* flags returned by open */ + /** @open_flags: FOPEN_* flags returned by open */ u32 open_flags; - /** Entry on inode's write_files list */ + /** @write_entry: Entry on inode's write_files list */ struct list_head write_entry; - /* Readdir related */ + /** @readdir: Readdir-related */ struct { - /* Dir stream position */ + /** @pos: Dir stream position */ loff_t pos; - /* Offset in cache */ + /** @cache_off: Offset in cache */ loff_t cache_off; - /* Version of cache we are reading */ + /** @version: Version of cache we are reading */ u64 version; } readdir; - /** RB node to be linked on fuse_conn->polled_files */ + /** @polled_node: RB node to be linked on fuse_conn->polled_files */ struct rb_node polled_node; - /** Wait queue head for poll */ + /** @poll_wait: Wait queue head for poll */ wait_queue_head_t poll_wait; - /** Does file hold a fi->iocachectr refcount? */ + /** @iomode: Does file hold a fi->iocachectr refcount? */ enum { IOM_NONE, IOM_CACHED, IOM_UNCACHED } iomode; #ifdef CONFIG_FUSE_PASSTHROUGH - /** Reference to backing file in passthrough mode */ + /** @passthrough: Reference to backing file in passthrough mode */ struct file *passthrough; + /** @cred: passthrough file credentials */ const struct cred *cred; #endif - /** Has flock been performed on this file? */ + /** @flock: Has flock been performed on this file? */ bool flock:1; }; -/** One input argument of a request */ -struct fuse_in_arg { - unsigned size; - const void *value; -}; - -/** One output argument of a request */ -struct fuse_arg { - unsigned size; - void *value; -}; - -/** FUSE folio descriptor */ -struct fuse_folio_desc { - unsigned int length; - unsigned int offset; -}; - -struct fuse_args { - uint64_t nodeid; - uint32_t opcode; - uint8_t in_numargs; - uint8_t out_numargs; - uint8_t ext_idx; - bool force:1; - bool noreply:1; - bool nocreds:1; - bool in_pages:1; - bool out_pages:1; - bool user_pages:1; - bool out_argvar:1; - bool page_zeroing:1; - bool page_replace:1; - bool may_block:1; - bool is_ext:1; - bool is_pinned:1; - bool invalidate_vmap:1; - bool abort_on_kill:1; - struct fuse_in_arg in_args[4]; - struct fuse_arg out_args[2]; - void (*end)(struct fuse_mount *fm, struct fuse_args *args, int error); - /* Used for kvec iter backed by vmalloc address */ - void *vmap_base; -}; - -struct fuse_args_pages { - struct fuse_args args; - struct folio **folios; - struct fuse_folio_desc *descs; - unsigned int num_folios; -}; - struct fuse_release_args { struct fuse_args args; struct fuse_release_in inarg; @@ -399,200 +360,6 @@ struct fuse_io_priv { .iocb = i, \ } -/** - * Request flags - * - * FR_ISREPLY: set if the request has reply - * FR_FORCE: force sending of the request even if interrupted - * FR_BACKGROUND: request is sent in the background - * FR_WAITING: request is counted as "waiting" - * FR_ABORTED: the request was aborted - * FR_INTERRUPTED: the request has been interrupted - * FR_LOCKED: data is being copied to/from the request - * FR_PENDING: request is not yet in userspace - * FR_SENT: request is in userspace, waiting for an answer - * FR_FINISHED: request is finished - * FR_PRIVATE: request is on private list - * FR_ASYNC: request is asynchronous - * FR_URING: request is handled through fuse-io-uring - */ -enum fuse_req_flag { - FR_ISREPLY, - FR_FORCE, - FR_BACKGROUND, - FR_WAITING, - FR_ABORTED, - FR_INTERRUPTED, - FR_LOCKED, - FR_PENDING, - FR_SENT, - FR_FINISHED, - FR_PRIVATE, - FR_ASYNC, - FR_URING, -}; - -/** - * A request to the client - * - * .waitq.lock protects the following fields: - * - FR_ABORTED - * - FR_LOCKED (may also be modified under fc->lock, tested under both) - */ -struct fuse_req { - /** This can be on either pending processing or io lists in - fuse_conn */ - struct list_head list; - - /** Entry on the interrupts list */ - struct list_head intr_entry; - - /* Input/output arguments */ - struct fuse_args *args; - - /** refcount */ - refcount_t count; - - /* Request flags, updated with test/set/clear_bit() */ - unsigned long flags; - - /* The request input header */ - struct { - struct fuse_in_header h; - } in; - - /* The request output header */ - struct { - struct fuse_out_header h; - } out; - - /** Used to wake up the task waiting for completion of request*/ - wait_queue_head_t waitq; - -#if IS_ENABLED(CONFIG_VIRTIO_FS) - /** virtio-fs's physically contiguous buffer for in and out args */ - void *argbuf; -#endif - - /** fuse_mount this request belongs to */ - struct fuse_mount *fm; - -#ifdef CONFIG_FUSE_IO_URING - void *ring_entry; - void *ring_queue; -#endif - /** When (in jiffies) the request was created */ - unsigned long create_time; -}; - -struct fuse_iqueue; - -/** - * Input queue callbacks - * - * Input queue signalling is device-specific. For example, the /dev/fuse file - * uses fiq->waitq and fasync to wake processes that are waiting on queue - * readiness. These callbacks allow other device types to respond to input - * queue activity. - */ -struct fuse_iqueue_ops { - /** - * Send one forget - */ - void (*send_forget)(struct fuse_iqueue *fiq, struct fuse_forget_link *link); - - /** - * Send interrupt for request - */ - void (*send_interrupt)(struct fuse_iqueue *fiq, struct fuse_req *req); - - /** - * Send one request - */ - void (*send_req)(struct fuse_iqueue *fiq, struct fuse_req *req); - - /** - * Clean up when fuse_iqueue is destroyed - */ - void (*release)(struct fuse_iqueue *fiq); -}; - -/** /dev/fuse input queue operations */ -extern const struct fuse_iqueue_ops fuse_dev_fiq_ops; - -struct fuse_iqueue { - /** Connection established */ - unsigned connected; - - /** Lock protecting accesses to members of this structure */ - spinlock_t lock; - - /** Readers of the connection are waiting on this */ - wait_queue_head_t waitq; - - /** The next unique request id */ - u64 reqctr; - - /** The list of pending requests */ - struct list_head pending; - - /** Pending interrupts */ - struct list_head interrupts; - - /** Queue of pending forgets */ - struct fuse_forget_link forget_list_head; - struct fuse_forget_link *forget_list_tail; - - /** Batching of FORGET requests (positive indicates FORGET batch) */ - int forget_batch; - - /** O_ASYNC requests */ - struct fasync_struct *fasync; - - /** Device-specific callbacks */ - const struct fuse_iqueue_ops *ops; - - /** Device-specific state */ - void *priv; -}; - -#define FUSE_PQ_HASH_BITS 8 -#define FUSE_PQ_HASH_SIZE (1 << FUSE_PQ_HASH_BITS) - -struct fuse_pqueue { - /** Connection established */ - unsigned connected; - - /** Lock protecting accessess to members of this structure */ - spinlock_t lock; - - /** Hash table of requests being processed */ - struct list_head *processing; - - /** The list of requests under I/O */ - struct list_head io; -}; - -/** - * Fuse device instance - */ -struct fuse_dev { - /** Reference count of this object */ - refcount_t ref; - - /** Issue FUSE_INIT synchronously */ - bool sync_init; - - /** Fuse connection for this device */ - struct fuse_conn *fc; - - /** Processing queue */ - struct fuse_pqueue pq; - - /** list entry on fc->devices */ - struct list_head entry; -}; - enum fuse_dax_mode { FUSE_DAX_INODE_DEFAULT, /* default */ FUSE_DAX_ALWAYS, /* "-o dax=always" */ @@ -637,133 +404,135 @@ struct fuse_sync_bucket { }; /** - * A Fuse connection. + * struct fuse_conn - A Fuse connection. * * This structure is created, when the root filesystem is mounted, and * is destroyed, when the client device is closed and the last * fuse_mount is destroyed. */ struct fuse_conn { - /** Lock protecting accessess to members of this structure */ + /** + * @lock: Lock protecting: + * - polled_files + * - backing_files_map + * - curr_bucket + */ spinlock_t lock; - /** Refcount */ + /** @count: Refcount */ refcount_t count; - /** Current epoch for up-to-date dentries */ + /** @epoch: Current epoch for up-to-date dentries */ atomic_t epoch; + /** @epoch_work: Used to invalidate dentries from old epochs */ struct work_struct epoch_work; + /** @rcu: Used to delay freeing fuse_conn, making it safe */ struct rcu_head rcu; - /** The user id for this mount */ + /** @user_id: The user id for this mount */ kuid_t user_id; - /** The group id for this mount */ + /** @group_id: The group id for this mount */ kgid_t group_id; - /** The pid namespace for this mount */ + /** @pid_ns: The pid namespace for this mount */ struct pid_namespace *pid_ns; - /** The user namespace for this mount */ + /** @user_ns: The user namespace for this mount */ struct user_namespace *user_ns; - /** Maximum read size */ + /** @max_read: Maximum read size */ unsigned max_read; - /** Maximum write size */ + /** @max_write: Maximum write size */ unsigned max_write; - /** Maximum number of pages that can be used in a single request */ + /** + * @max_pages: Maximum number of pages that can be used in a + * single request + */ unsigned int max_pages; - /** Constrain ->max_pages to this value during feature negotiation */ + /** + * @max_pages_limit: Constrain ->max_pages to this value during + * feature negotiation + */ unsigned int max_pages_limit; - /** Input queue */ - struct fuse_iqueue iq; + /** @chan: transport layer object */ + struct fuse_chan *chan; - /** The next unique kernel file handle */ + /** @khctr: The next unique kernel file handle */ atomic64_t khctr; - /** rbtree of fuse_files waiting for poll events indexed by ph */ + /** + * @polled_files: rbtree of fuse_files waiting for poll events + * indexed by ph + */ struct rb_root polled_files; - /** Maximum number of outstanding background requests */ - unsigned max_background; - - /** Number of background requests at which congestion starts */ + /** + * @congestion_threshold: Number of background requests at which + * congestion starts + */ unsigned congestion_threshold; - /** Number of requests currently in the background */ - unsigned num_background; - - /** Number of background requests currently queued for userspace */ - unsigned active_background; - - /** The list of background requests set aside for later queuing */ - struct list_head bg_queue; - - /** Protects: max_background, congestion_threshold, num_background, - * active_background, bg_queue, blocked */ - spinlock_t bg_lock; - - /** Flag indicating that INIT reply has been received. Allocating - * any fuse request will be suspended until the flag is set */ - int initialized; - - /** Flag indicating if connection is blocked. This will be - the case before the INIT reply is received, and if there - are too many outstading backgrounds requests */ - int blocked; - - /** waitq for blocked connection */ - wait_queue_head_t blocked_waitq; - - /** Connection established, cleared on umount, connection - abort and device release */ - unsigned connected; - - /** Connection aborted via sysfs */ - bool aborted; - - /** Connection failed (version mismatch). Cannot race with - setting other bitfields since it is only set once in INIT - reply, before any other request, and never cleared */ + /** + * @conn_error: Connection failed (version mismatch). Cannot race with + * setting other bitfields since it is only set once in INIT + * reply, before any other request, and never cleared + */ unsigned conn_error:1; - /** Connection successful. Only set in INIT */ + /** @conn_init: Connection successful. Only set in INIT */ unsigned conn_init:1; - /** Do readahead asynchronously? Only set in INIT */ + /** @async_read: Do readahead asynchronously? Only set in INIT */ unsigned async_read:1; - /** Return an unique read error after abort. Only set in INIT */ + /** + * @abort_err: Return an unique read error after abort. + * Only set in INIT + */ unsigned abort_err:1; - /** Do not send separate SETATTR request before open(O_TRUNC) */ + /** + * @atomic_o_trunc: Do not send separate SETATTR request before + * open(O_TRUNC) + */ unsigned atomic_o_trunc:1; - /** Filesystem supports NFS exporting. Only set in INIT */ + /** + * @export_support: Filesystem supports NFS exporting. + * Only set in INIT + */ unsigned export_support:1; - /** write-back cache policy (default is write-through) */ + /** @writeback_cache: write-back cache policy (default is write-through) */ unsigned writeback_cache:1; - /** allow parallel lookups and readdir (default is serialized) */ + /** + * @parallel_dirops: allow parallel lookups and readdir (default is + * serialized) + */ unsigned parallel_dirops:1; - /** handle fs handles killing suid/sgid/cap on write/chown/trunc */ + /** + * @handle_killpriv: handle fs handles killing suid/sgid/cap on + * write/chown/trunc + */ unsigned handle_killpriv:1; - /** cache READLINK responses in page cache */ + /** @cache_symlinks: cache READLINK responses in page cache */ unsigned cache_symlinks:1; - /* show legacy mount options */ + /** @legacy_opts_show: show legacy mount options */ unsigned int legacy_opts_show:1; - /* + /** + * @handle_killpriv_v2: * fs kills suid/sgid/cap on write/chown/trunc. suid is killed on * write/trunc only if caller did not have CAP_FSETID. sgid is killed * on write/truncate only if caller did not have CAP_FSETID as well as @@ -776,224 +545,219 @@ struct fuse_conn { * and hence races in setting them will not cause malfunction */ - /** Is open/release not implemented by fs? */ + /** @no_open: Is open/release not implemented by fs? */ unsigned no_open:1; - /** Is opendir/releasedir not implemented by fs? */ + /** @no_opendir: Is opendir/releasedir not implemented by fs? */ unsigned no_opendir:1; - /** Is fsync not implemented by fs? */ + /** @no_fsync: Is fsync not implemented by fs? */ unsigned no_fsync:1; - /** Is fsyncdir not implemented by fs? */ + /** @no_fsyncdir: Is fsyncdir not implemented by fs? */ unsigned no_fsyncdir:1; - /** Is flush not implemented by fs? */ + /** @no_flush: Is flush not implemented by fs? */ unsigned no_flush:1; - /** Is setxattr not implemented by fs? */ + /** @no_setxattr: Is setxattr not implemented by fs? */ unsigned no_setxattr:1; - /** Does file server support extended setxattr */ + /** @setxattr_ext: Does file server support extended setxattr */ unsigned setxattr_ext:1; - /** Is getxattr not implemented by fs? */ + /** @no_getxattr: Is getxattr not implemented by fs? */ unsigned no_getxattr:1; - /** Is listxattr not implemented by fs? */ + /** @no_listxattr: Is listxattr not implemented by fs? */ unsigned no_listxattr:1; - /** Is removexattr not implemented by fs? */ + /** @no_removexattr: Is removexattr not implemented by fs? */ unsigned no_removexattr:1; - /** Are posix file locking primitives not implemented by fs? */ + /** @no_lock: Are posix file locking primitives not implemented by fs? */ unsigned no_lock:1; - /** Is access not implemented by fs? */ + /** @no_access: Is access not implemented by fs? */ unsigned no_access:1; - /** Is create not implemented by fs? */ + /** @no_create: Is create not implemented by fs? */ unsigned no_create:1; - /** Is interrupt not implemented by fs? */ - unsigned no_interrupt:1; - - /** Is bmap not implemented by fs? */ + /** @no_bmap: Is bmap not implemented by fs? */ unsigned no_bmap:1; - /** Is poll not implemented by fs? */ + /** @no_poll: Is poll not implemented by fs? */ unsigned no_poll:1; - /** Do multi-page cached writes */ + /** @big_writes: Do multi-page cached writes */ unsigned big_writes:1; - /** Don't apply umask to creation modes */ + /** @dont_mask: Don't apply umask to creation modes */ unsigned dont_mask:1; - /** Are BSD file locking primitives not implemented by fs? */ + /** @no_flock: Are BSD file locking primitives not implemented by fs? */ unsigned no_flock:1; - /** Is fallocate not implemented by fs? */ + /** @no_fallocate: Is fallocate not implemented by fs? */ unsigned no_fallocate:1; - /** Is rename with flags implemented by fs? */ + /** @no_rename2: Is rename with flags implemented by fs? */ unsigned no_rename2:1; - /** Use enhanced/automatic page cache invalidation. */ + /** @auto_inval_data: Use enhanced/automatic page cache invalidation. */ unsigned auto_inval_data:1; - /** Filesystem is fully responsible for page cache invalidation. */ + /** + * @explicit_inval_data: Filesystem is fully responsible for page cache + * invalidation. + */ unsigned explicit_inval_data:1; - /** Does the filesystem support readdirplus? */ + /** @do_readdirplus: Does the filesystem support readdirplus? */ unsigned do_readdirplus:1; - /** Does the filesystem want adaptive readdirplus? */ + /** @readdirplus_auto: Does the filesystem want adaptive readdirplus? */ unsigned readdirplus_auto:1; - /** Does the filesystem support asynchronous direct-IO submission? */ + /** + * @async_dio: Does the filesystem support asynchronous direct-IO + * submission? + */ unsigned async_dio:1; - /** Is lseek not implemented by fs? */ + /** @no_lseek: Is lseek not implemented by fs? */ unsigned no_lseek:1; - /** Does the filesystem support posix acls? */ + /** @posix_acl: Does the filesystem support posix acls? */ unsigned posix_acl:1; - /** Check permissions based on the file mode or not? */ + /** + * @default_permissions: Check permissions based on the file mode + * or not? + */ unsigned default_permissions:1; - /** Allow other than the mounter user to access the filesystem ? */ + /** + * @allow_other: Allow other than the mounter user to access the + * filesystem ? + */ unsigned allow_other:1; - /** Does the filesystem support copy_file_range? */ + /** @no_copy_file_range: Does the filesystem support copy_file_range? */ unsigned no_copy_file_range:1; - /** Does the filesystem support copy_file_range_64? */ + /** + * @no_copy_file_range_64: Does the filesystem support + * copy_file_range_64? + */ unsigned no_copy_file_range_64:1; - /* Send DESTROY request */ + /** @destroy: Send DESTROY request */ unsigned int destroy:1; - /* Delete dentries that have gone stale */ + /** @delete_stale: Delete dentries that have gone stale */ unsigned int delete_stale:1; - /** Do not create entry in fusectl fs */ + /** @no_control: Do not create entry in fusectl fs */ unsigned int no_control:1; - /** Do not allow MNT_FORCE umount */ + /** @no_force_umount: Do not allow MNT_FORCE umount */ unsigned int no_force_umount:1; - /* Auto-mount submounts announced by the server */ + /** @auto_submounts: Auto-mount submounts announced by the server */ unsigned int auto_submounts:1; - /* Propagate syncfs() to server */ + /** @sync_fs: Propagate syncfs() to server */ unsigned int sync_fs:1; - /* Initialize security xattrs when creating a new inode */ + /** @init_security: Initialize security xattrs when creating a new inode */ unsigned int init_security:1; - /* Add supplementary group info when creating a new inode */ + /** + * @create_supp_group: Add supplementary group info when creating + * a new inode + */ unsigned int create_supp_group:1; - /* Does the filesystem support per inode DAX? */ + /** @inode_dax: Does the filesystem support per inode DAX? */ unsigned int inode_dax:1; - /* Is tmpfile not implemented by fs? */ + /** @no_tmpfile: Is tmpfile not implemented by fs? */ unsigned int no_tmpfile:1; - /* Relax restrictions to allow shared mmap in FOPEN_DIRECT_IO mode */ + /** + * @direct_io_allow_mmap: Relax restrictions to allow shared mmap + * in FOPEN_DIRECT_IO mode + */ unsigned int direct_io_allow_mmap:1; - /* Is statx not implemented by fs? */ + /** @no_statx: Is statx not implemented by fs? */ unsigned int no_statx:1; - /** Passthrough support for read/write IO */ + /** @passthrough: Passthrough support for read/write IO */ unsigned int passthrough:1; - /* Use pages instead of pointer for kernel I/O */ + /** @use_pages_for_kvec_io: Use pages instead of pointer for kernel I/O */ unsigned int use_pages_for_kvec_io:1; - /* Is link not implemented by fs? */ + /** @no_link: Is link not implemented by fs? */ unsigned int no_link:1; - /* Is synchronous FUSE_INIT allowed? */ + /** @sync_init: Is synchronous FUSE_INIT allowed? */ unsigned int sync_init:1; - /* Use io_uring for communication */ - unsigned int io_uring; - - /** Maximum stack depth for passthrough backing files */ + /** @max_stack_depth: Maximum stack depth for passthrough backing files */ int max_stack_depth; - /** The number of requests waiting for completion */ - atomic_t num_waiting; - - /** Negotiated minor version */ + /** @minor: Negotiated minor version */ unsigned minor; - /** Entry on the fuse_conn_list */ + /** @entry: Entry on the fuse_conn_list */ struct list_head entry; - /** Device ID from the root super block */ + /** @dev: Device ID from the root super block */ dev_t dev; - /** Key for lock owner ID scrambling */ + /** @scramble_key: Key for lock owner ID scrambling */ u32 scramble_key[4]; - /** Version counter for attribute changes */ + /** @attr_version: Version counter for attribute changes */ atomic64_t attr_version; - /** Version counter for evict inode */ + /** @evict_ctr: Version counter for evict inode */ atomic64_t evict_ctr; - /* maximum file name length */ + /** @name_max: maximum file name length */ u32 name_max; - /** Called on final put */ + /** @release: Called on final put */ void (*release)(struct fuse_conn *); /** - * Read/write semaphore to hold when accessing the sb of any + * @killsb: Read/write semaphore to hold when accessing the sb of any * fuse_mount belonging to this connection */ struct rw_semaphore killsb; - /** List of device instances belonging to this connection */ - struct list_head devices; - #ifdef CONFIG_FUSE_DAX - /* Dax mode */ + /** @dax_mode: Dax mode */ enum fuse_dax_mode dax_mode; - /* Dax specific conn data, non-NULL if DAX is enabled */ + /** @dax: Dax specific conn data, non-NULL if DAX is enabled */ struct fuse_conn_dax *dax; #endif - /** List of filesystems using this connection */ + /** @mounts: List of filesystems using this connection */ struct list_head mounts; - /* New writepages go into this bucket */ + /** @curr_bucket: New writepages go into this bucket */ struct fuse_sync_bucket __rcu *curr_bucket; #ifdef CONFIG_FUSE_PASSTHROUGH - /** IDR for backing files ids */ + /** @backing_files_map: IDR for backing files ids */ struct idr backing_files_map; #endif - -#ifdef CONFIG_FUSE_IO_URING - /** uring connection information*/ - struct fuse_ring *ring; -#endif - - /** Only used if the connection opts into request timeouts */ - struct { - /* Worker for checking if any requests have timed out */ - struct delayed_work work; - - /* Request timeout (in jiffies). 0 = no timeout */ - unsigned int req_timeout; - } timeout; }; /* @@ -1136,7 +900,7 @@ extern const struct file_operations fuse_dev_operations; extern const struct dentry_operations fuse_dentry_operations; -/** +/* * Get a filled in inode */ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, @@ -1147,14 +911,6 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name, struct fuse_entry_out *outarg, struct inode **inode); -/** - * Send FORGET command - */ -void fuse_queue_forget(struct fuse_conn *fc, struct fuse_forget_link *forget, - u64 nodeid, u64 nlookup); - -struct fuse_forget_link *fuse_alloc_forget(void); - /* * Initialize READ or READDIR request */ @@ -1186,44 +942,44 @@ int fuse_finish_open(struct inode *inode, struct file *file); void fuse_sync_release(struct fuse_inode *fi, struct fuse_file *ff, unsigned int flags); -/** +/* * Send RELEASE or RELEASEDIR request */ void fuse_release_common(struct file *file, bool isdir); -/** +/* * Send FSYNC or FSYNCDIR request */ int fuse_fsync_common(struct file *file, loff_t start, loff_t end, int datasync, int opcode); -/** +/* * Notify poll wakeup */ int fuse_notify_poll_wakeup(struct fuse_conn *fc, struct fuse_notify_poll_wakeup_out *outarg); -/** +/* * Initialize file operations on a regular file */ void fuse_init_file_inode(struct inode *inode, unsigned int flags); -/** +/* * Initialize inode operations on regular files and special files */ void fuse_init_common(struct inode *inode); -/** +/* * Initialize inode and file operations on a directory */ void fuse_init_dir(struct inode *inode); -/** +/* * Initialize inode operations on a symlink */ void fuse_init_symlink(struct inode *inode); -/** +/* * Change attributes of an inode */ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, @@ -1237,20 +993,10 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, u32 fuse_get_cache_mask(struct inode *inode); -/** - * Initialize the client device - */ -int fuse_dev_init(void); - -/** - * Cleanup the client device - */ -void fuse_dev_cleanup(void); - int fuse_ctl_init(void); void __exit fuse_ctl_cleanup(void); -/** +/* * Simple request sending that does request allocation and freeing */ ssize_t __fuse_simple_request(struct mnt_idmap *idmap, @@ -1271,30 +1017,14 @@ static inline ssize_t fuse_simple_idmap_request(struct mnt_idmap *idmap, int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args, gfp_t gfp_flags); - -/** - * Assign a unique id to a fuse request - */ -void fuse_request_assign_unique(struct fuse_iqueue *fiq, struct fuse_req *req); - -/** - * End a finished request - */ -void fuse_request_end(struct fuse_req *req); - -/* Abort all requests */ -void fuse_abort_conn(struct fuse_conn *fc); -void fuse_wait_aborted(struct fuse_conn *fc); - -/* Check if any requests timed out */ -void fuse_check_timeout(struct work_struct *work); +int fuse_simple_notify_reply(struct fuse_mount *fm, struct fuse_args *args, u64 unique); void fuse_dentry_tree_init(void); void fuse_dentry_tree_cleanup(void); void fuse_epoch_work(struct work_struct *work); -/** +/* * Invalidate inode attributes */ @@ -1304,6 +1034,9 @@ void fuse_epoch_work(struct work_struct *work); /* Attributes possibly changed on data and/or size modification */ #define FUSE_STATX_MODSIZE (FUSE_STATX_MODIFY | STATX_SIZE) +/* Attributes possibly changed on directory modification */ +#define FUSE_STATX_MODDIR (FUSE_STATX_MODSIZE | STATX_NLINK) + void fuse_invalidate_attr(struct inode *inode); void fuse_invalidate_attr_mask(struct inode *inode, u32 mask); @@ -1317,45 +1050,26 @@ u64 fuse_time_to_jiffies(u64 sec, u32 nsec); void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o); -/** - * Acquire reference to fuse_conn - */ -struct fuse_conn *fuse_conn_get(struct fuse_conn *fc); - -/** - * Initialize the fuse processing queue - */ -void fuse_pqueue_init(struct fuse_pqueue *fpq); - -/** +/* * Initialize fuse_conn */ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, - struct user_namespace *user_ns, - const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv); + struct user_namespace *user_ns, struct fuse_chan *fch); -/** - * Release reference to fuse_conn - */ -void fuse_conn_put(struct fuse_conn *fc); - -struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc); -struct fuse_dev *fuse_dev_alloc(void); -void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc); -void fuse_dev_put(struct fuse_dev *fud); int fuse_send_init(struct fuse_mount *fm); /** - * Fill in superblock and initialize fuse connection + * fuse_fill_super_common - Fill in superblock and initialize fuse connection * @sb: partially-initialized superblock to fill in * @ctx: mount context */ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx); -/* - * Remove the mount from the connection +/** + * fuse_mount_remove - Remove the mount from the connection + * @fm: fuse_mount to remove * - * Returns whether this was the last mount + * Returns: whether this was the last mount */ bool fuse_mount_remove(struct fuse_mount *fm); @@ -1373,23 +1087,25 @@ void fuse_conn_destroy(struct fuse_mount *fm); void fuse_mount_destroy(struct fuse_mount *fm); /** - * Add connection to control filesystem + * fuse_ctl_add_conn - Add connection to control filesystem + * @fc: Fuse connection to add */ int fuse_ctl_add_conn(struct fuse_conn *fc); /** - * Remove connection from control filesystem + * fuse_ctl_remove_conn - Remove connection from control filesystem + * @fc: Fuse connection to remove */ void fuse_ctl_remove_conn(struct fuse_conn *fc); -/** +/* * Is file type valid? */ int fuse_valid_type(int m); bool fuse_invalid_attr(struct fuse_attr *attr); -/** +/* * Is current process allowed to perform filesystem operation? */ bool fuse_allow_current_process(struct fuse_conn *fc); @@ -1406,7 +1122,7 @@ void fuse_flush_writepages(struct inode *inode); void fuse_set_nowrite(struct inode *inode); void fuse_release_nowrite(struct inode *inode); -/** +/* * Scan all fuse_mounts belonging to fc to find the first where * ilookup5() returns a result. Return that result and the * respective fuse_mount in *fm (unless fm is NULL). @@ -1416,13 +1132,13 @@ void fuse_release_nowrite(struct inode *inode); struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid, struct fuse_mount **fm); -/** +/* * File-system tells the kernel to invalidate cache for the given node id. */ int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid, loff_t offset, loff_t len); -/** +/* * File-system tells the kernel to invalidate parent attributes and * the dentry matching parent/name. * @@ -1444,7 +1160,7 @@ void fuse_try_prune_one_inode(struct fuse_conn *fc, u64 nodeid); int fuse_do_open(struct fuse_mount *fm, u64 nodeid, struct file *file, bool isdir); -/** +/* * fuse_direct_io() flags */ @@ -1461,7 +1177,6 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, long fuse_ioctl_common(struct file *file, unsigned int cmd, unsigned long arg, unsigned int flags); __poll_t fuse_file_poll(struct file *file, poll_table *wait); -int fuse_dev_release(struct inode *inode, struct file *file); bool fuse_write_update_attr(struct inode *inode, loff_t pos, ssize_t written); @@ -1471,8 +1186,6 @@ int fuse_write_inode(struct inode *inode, struct writeback_control *wbc); int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr, struct file *file); -void fuse_set_initialized(struct fuse_conn *fc); - void fuse_unlock_inode(struct inode *inode, bool locked); bool fuse_lock_inode(struct inode *inode); @@ -1494,15 +1207,6 @@ int fuse_set_acl(struct mnt_idmap *, struct dentry *dentry, /* readdir.c */ int fuse_readdir(struct file *file, struct dir_context *ctx); -/** - * Return the number of bytes in an arguments list - */ -unsigned int fuse_len_args(unsigned int numargs, struct fuse_arg *args); - -/** - * Get the next unique ID for a request - */ -u64 fuse_get_unique(struct fuse_iqueue *fiq); void fuse_free_conn(struct fuse_conn *fc); /* dax.c */ @@ -1570,8 +1274,6 @@ static inline struct fuse_backing *fuse_backing_lookup(struct fuse_conn *fc, void fuse_backing_files_init(struct fuse_conn *fc); void fuse_backing_files_free(struct fuse_conn *fc); -int fuse_backing_open(struct fuse_conn *fc, struct fuse_backing_map *map); -int fuse_backing_close(struct fuse_conn *fc, int backing_id); /* passthrough.c */ static inline struct fuse_backing *fuse_inode_backing(struct fuse_inode *fi) diff --git a/fs/fuse/fuse_trace.h b/fs/fuse/fuse_trace.h index bbe9ddd8c7169..4e34ddb172ed9 100644 --- a/fs/fuse/fuse_trace.h +++ b/fs/fuse/fuse_trace.h @@ -90,7 +90,7 @@ TRACE_EVENT(fuse_request_send, ), TP_fast_assign( - __entry->connection = req->fm->fc->dev; + __entry->connection = req->chan->conn->dev; __entry->unique = req->in.h.unique; __entry->opcode = req->in.h.opcode; __entry->len = req->in.h.len; @@ -114,7 +114,7 @@ TRACE_EVENT(fuse_request_end, ), TP_fast_assign( - __entry->connection = req->fm->fc->dev; + __entry->connection = req->chan->conn->dev; __entry->unique = req->in.h.unique; __entry->len = req->out.h.len; __entry->error = req->out.h.error; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index deddfffb037fb..0897f8e62b4d1 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -6,9 +6,8 @@ See the file COPYING. */ +#include "dev.h" #include "fuse_i.h" -#include "fuse_dev_i.h" -#include "dev_uring_i.h" #include <linux/dax.h> #include <linux/pagemap.h> @@ -35,14 +34,11 @@ MODULE_LICENSE("GPL"); static struct kmem_cache *fuse_inode_cachep; struct list_head fuse_conn_list; DEFINE_MUTEX(fuse_mutex); -DECLARE_WAIT_QUEUE_HEAD(fuse_dev_waitq); static int set_global_limit(const char *val, const struct kernel_param *kp); unsigned int fuse_max_pages_limit = 256; /* default is no timeout */ -unsigned int fuse_default_req_timeout; -unsigned int fuse_max_req_timeout; unsigned int max_user_bgreq; module_param_call(max_user_bgreq, set_global_limit, param_get_uint, @@ -62,9 +58,6 @@ MODULE_PARM_DESC(max_user_congthresh, #define FUSE_DEFAULT_BLKSIZE 512 -/** Maximum number of outstanding background requests */ -#define FUSE_DEFAULT_MAX_BACKGROUND 12 - /** Congestion starts at 75% of maximum */ #define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4) @@ -72,11 +65,6 @@ MODULE_PARM_DESC(max_user_congthresh, static struct file_system_type fuseblk_fs_type; #endif -struct fuse_forget_link *fuse_alloc_forget(void) -{ - return kzalloc_obj(struct fuse_forget_link, GFP_KERNEL_ACCOUNT); -} - static struct fuse_submount_lookup *fuse_alloc_submount_lookup(void) { struct fuse_submount_lookup *sl; @@ -150,7 +138,7 @@ static void fuse_cleanup_submount_lookup(struct fuse_conn *fc, if (!refcount_dec_and_test(&sl->count)) return; - fuse_queue_forget(fc, sl->forget, sl->nodeid, 1); + fuse_chan_queue_forget(fc->chan, sl->forget, sl->nodeid, 1); sl->forget = NULL; kfree(sl); } @@ -173,8 +161,8 @@ static void fuse_evict_inode(struct inode *inode) if (FUSE_IS_DAX(inode)) fuse_dax_inode_cleanup(inode); if (fi->nlookup) { - fuse_queue_forget(fc, fi->forget, fi->nodeid, - fi->nlookup); + fuse_chan_queue_forget(fc->chan, fi->forget, fi->nodeid, + fi->nlookup); fi->forget = NULL; } @@ -624,7 +612,7 @@ static void fuse_umount_begin(struct super_block *sb) if (fc->no_force_umount) return; - fuse_abort_conn(fc); + fuse_chan_abort(fc->chan, false); // Only retire block-device-based superblocks. if (sb->s_bdev != NULL) @@ -688,11 +676,9 @@ static struct fuse_sync_bucket *fuse_sync_bucket_alloc(void) struct fuse_sync_bucket *bucket; bucket = kzalloc_obj(*bucket, GFP_KERNEL | __GFP_NOFAIL); - if (bucket) { - init_waitqueue_head(&bucket->waitq); - /* Initial active count */ - atomic_set(&bucket->count, 1); - } + init_waitqueue_head(&bucket->waitq); + /* Initial active count */ + atomic_set(&bucket->count, 1); return bucket; } @@ -816,8 +802,7 @@ static int fuse_opt_fd(struct fs_context *fsc, struct file *file) if (file->f_cred->user_ns != fsc->user_ns) return invalfc(fsc, "wrong user namespace for fuse device"); - ctx->fud = file->private_data; - refcount_inc(&ctx->fud->ref); + ctx->fud = fuse_dev_grab(file); return 0; } @@ -970,56 +955,19 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root) return 0; } -static void fuse_iqueue_init(struct fuse_iqueue *fiq, - const struct fuse_iqueue_ops *ops, - void *priv) -{ - memset(fiq, 0, sizeof(struct fuse_iqueue)); - spin_lock_init(&fiq->lock); - init_waitqueue_head(&fiq->waitq); - INIT_LIST_HEAD(&fiq->pending); - INIT_LIST_HEAD(&fiq->interrupts); - fiq->forget_list_tail = &fiq->forget_list_head; - fiq->connected = 1; - fiq->ops = ops; - fiq->priv = priv; -} - -void fuse_pqueue_init(struct fuse_pqueue *fpq) -{ - unsigned int i; - - spin_lock_init(&fpq->lock); - for (i = 0; i < FUSE_PQ_HASH_SIZE; i++) - INIT_LIST_HEAD(&fpq->processing[i]); - INIT_LIST_HEAD(&fpq->io); - fpq->connected = 1; -} - void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, - struct user_namespace *user_ns, - const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv) + struct user_namespace *user_ns, struct fuse_chan *fch) { memset(fc, 0, sizeof(*fc)); spin_lock_init(&fc->lock); - spin_lock_init(&fc->bg_lock); init_rwsem(&fc->killsb); refcount_set(&fc->count, 1); atomic_set(&fc->epoch, 1); INIT_WORK(&fc->epoch_work, fuse_epoch_work); - init_waitqueue_head(&fc->blocked_waitq); - fuse_iqueue_init(&fc->iq, fiq_ops, fiq_priv); - INIT_LIST_HEAD(&fc->bg_queue); INIT_LIST_HEAD(&fc->entry); - INIT_LIST_HEAD(&fc->devices); - atomic_set(&fc->num_waiting, 0); - fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND; fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD; atomic64_set(&fc->khctr, 0); fc->polled_files = RB_ROOT; - fc->blocked = 0; - fc->initialized = 0; - fc->connected = 1; atomic64_set(&fc->attr_version, 1); atomic64_set(&fc->evict_ctr, 1); get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key)); @@ -1028,7 +976,6 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ; fc->max_pages_limit = fuse_max_pages_limit; fc->name_max = FUSE_NAME_LOW_MAX; - fc->timeout.req_timeout = 0; if (IS_ENABLED(CONFIG_FUSE_PASSTHROUGH)) fuse_backing_files_init(fc); @@ -1036,6 +983,8 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm, INIT_LIST_HEAD(&fc->mounts); list_add(&fm->fc_entry, &fc->mounts); fm->fc = fc; + fuse_chan_set_fc(fch, fc); + fc->chan = fch; } EXPORT_SYMBOL_GPL(fuse_conn_init); @@ -1043,7 +992,8 @@ static void delayed_release(struct rcu_head *p) { struct fuse_conn *fc = container_of(p, struct fuse_conn, rcu); - fuse_uring_destruct(fc); + fuse_uring_destruct(fc->chan); + fuse_chan_free(fc->chan); put_user_ns(fc->user_ns); fc->release(fc); @@ -1051,7 +1001,6 @@ static void delayed_release(struct rcu_head *p) void fuse_conn_put(struct fuse_conn *fc) { - struct fuse_iqueue *fiq = &fc->iq; struct fuse_sync_bucket *bucket; if (!refcount_dec_and_test(&fc->count)) @@ -1059,11 +1008,8 @@ void fuse_conn_put(struct fuse_conn *fc) if (IS_ENABLED(CONFIG_FUSE_DAX)) fuse_dax_conn_free(fc); - if (fc->timeout.req_timeout) - cancel_delayed_work_sync(&fc->timeout.work); cancel_work_sync(&fc->epoch_work); - if (fiq->ops->release) - fiq->ops->release(fiq); + fuse_chan_release(fc->chan); put_pid_ns(fc->pid_ns); bucket = rcu_dereference_protected(fc->curr_bucket, 1); if (bucket) { @@ -1083,6 +1029,11 @@ struct fuse_conn *fuse_conn_get(struct fuse_conn *fc) } EXPORT_SYMBOL_GPL(fuse_conn_get); +dev_t fuse_conn_get_id(struct fuse_conn *fc) +{ + return fc->dev; +} + static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned int mode) { struct fuse_attr attr; @@ -1294,12 +1245,13 @@ static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg) sanitize_global_limit(&max_user_bgreq); sanitize_global_limit(&max_user_congthresh); - spin_lock(&fc->bg_lock); if (arg->max_background) { - fc->max_background = arg->max_background; + unsigned int max_background = max_background = arg->max_background; + + if (!cap_sys_admin && max_background > max_user_bgreq) + max_background = max_user_bgreq; - if (!cap_sys_admin && fc->max_background > max_user_bgreq) - fc->max_background = max_user_bgreq; + fuse_chan_max_background_set(fc->chan, max_background); } if (arg->congestion_threshold) { fc->congestion_threshold = arg->congestion_threshold; @@ -1308,48 +1260,20 @@ static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg) fc->congestion_threshold > max_user_congthresh) fc->congestion_threshold = max_user_congthresh; } - spin_unlock(&fc->bg_lock); -} - -static void set_request_timeout(struct fuse_conn *fc, unsigned int timeout) -{ - fc->timeout.req_timeout = secs_to_jiffies(timeout); - INIT_DELAYED_WORK(&fc->timeout.work, fuse_check_timeout); - queue_delayed_work(system_percpu_wq, &fc->timeout.work, - fuse_timeout_timer_freq); -} - -static void init_server_timeout(struct fuse_conn *fc, unsigned int timeout) -{ - if (!timeout && !fuse_max_req_timeout && !fuse_default_req_timeout) - return; - - if (!timeout) - timeout = fuse_default_req_timeout; - - if (fuse_max_req_timeout) { - if (timeout) - timeout = min(fuse_max_req_timeout, timeout); - else - timeout = fuse_max_req_timeout; - } - - timeout = max(FUSE_TIMEOUT_TIMER_FREQ, timeout); - - set_request_timeout(fc, timeout); } struct fuse_init_args { struct fuse_args args; struct fuse_init_in in; struct fuse_init_out out; + struct fuse_mount *fm; }; -static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, - int error) +static void process_init_reply(struct fuse_args *args, int error) { - struct fuse_conn *fc = fm->fc; struct fuse_init_args *ia = container_of(args, typeof(*ia), args); + struct fuse_mount *fm = ia->fm; + struct fuse_conn *fc = fm->fc; struct fuse_init_out *arg = &ia->out; bool ok = true; @@ -1481,7 +1405,7 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, ok = false; } if (flags & FUSE_OVER_IO_URING && fuse_uring_enabled()) - fc->io_uring = 1; + fuse_chan_io_uring_enable(fc->chan); if (flags & FUSE_REQUEST_TIMEOUT) timeout = arg->request_timeout; @@ -1491,7 +1415,7 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, fc->no_flock = 1; } - init_server_timeout(fc, timeout); + fuse_init_server_timeout(fc->chan, timeout); fm->sb->s_bdi->ra_pages = min(fm->sb->s_bdi->ra_pages, ra_pages); @@ -1505,10 +1429,15 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args, if (!ok) { fc->conn_init = 0; fc->conn_error = 1; + fuse_chan_set_initialized(fc->chan, NULL); + } else { + struct fuse_chan_param cp = { + .minor = fc->minor, + .max_write = fc->max_write, + .max_pages = fc->max_pages, + }; + fuse_chan_set_initialized(fc->chan, &cp); } - - fuse_set_initialized(fc); - wake_up_all(&fc->blocked_waitq); } static struct fuse_init_args *fuse_new_init(struct fuse_mount *fm) @@ -1518,6 +1447,7 @@ static struct fuse_init_args *fuse_new_init(struct fuse_mount *fm) ia = kzalloc_obj(*ia, GFP_KERNEL | __GFP_NOFAIL); + ia->fm = fm; ia->in.major = FUSE_KERNEL_VERSION; ia->in.minor = FUSE_KERNEL_MINOR_VERSION; ia->in.max_readahead = fm->sb->s_bdi->ra_pages * PAGE_SIZE; @@ -1591,7 +1521,7 @@ int fuse_send_init(struct fuse_mount *fm) if (!err) return 0; } - process_init_reply(fm, &ia->args, err); + process_init_reply(&ia->args, err); if (fm->fc->conn_error) return -ENOTCONN; return 0; @@ -1600,7 +1530,6 @@ EXPORT_SYMBOL_GPL(fuse_send_init); void fuse_free_conn(struct fuse_conn *fc) { - WARN_ON(!list_empty(&fc->devices)); kfree(fc); } EXPORT_SYMBOL_GPL(fuse_free_conn); @@ -1643,89 +1572,6 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb) return 0; } -struct fuse_dev *fuse_dev_alloc(void) -{ - struct fuse_dev *fud; - struct list_head *pq; - - fud = kzalloc_obj(struct fuse_dev); - if (!fud) - return NULL; - - refcount_set(&fud->ref, 1); - pq = kzalloc_objs(struct list_head, FUSE_PQ_HASH_SIZE); - if (!pq) { - kfree(fud); - return NULL; - } - - fud->pq.processing = pq; - fuse_pqueue_init(&fud->pq); - - return fud; -} -EXPORT_SYMBOL_GPL(fuse_dev_alloc); - -void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc) -{ - struct fuse_conn *old_fc; - - spin_lock(&fc->lock); - /* - * Pairs with: - * - xchg() in fuse_dev_release() - * - smp_load_acquire() in fuse_dev_fc_get() - */ - old_fc = cmpxchg(&fud->fc, NULL, fc); - if (old_fc) { - /* - * failed to set fud->fc because - * - it was already set to a different fc - * - it was set to disconneted - */ - fc->connected = 0; - } else { - list_add_tail(&fud->entry, &fc->devices); - fuse_conn_get(fc); - } - spin_unlock(&fc->lock); -} -EXPORT_SYMBOL_GPL(fuse_dev_install); - -struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc) -{ - struct fuse_dev *fud; - - fud = fuse_dev_alloc(); - if (!fud) - return NULL; - - fuse_dev_install(fud, fc); - return fud; -} -EXPORT_SYMBOL_GPL(fuse_dev_alloc_install); - -void fuse_dev_put(struct fuse_dev *fud) -{ - struct fuse_conn *fc; - - if (!refcount_dec_and_test(&fud->ref)) - return; - - fc = fuse_dev_fc_get(fud); - if (fc && fc != FUSE_DEV_FC_DISCONNECTED) { - /* This is the virtiofs case (fuse_dev_release() not called) */ - spin_lock(&fc->lock); - list_del(&fud->entry); - spin_unlock(&fc->lock); - - fuse_conn_put(fc); - } - kfree(fud->pq.processing); - kfree(fud); -} -EXPORT_SYMBOL_GPL(fuse_dev_put); - static void fuse_fill_attr_from_inode(struct fuse_attr *attr, const struct fuse_inode *fi) { @@ -1941,9 +1787,9 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) mutex_lock(&fuse_mutex); err = -EINVAL; if (fud) { - if (fuse_dev_fc_get(fud)) + if (fuse_dev_is_installed(fud)) goto err_unlock; - if (fud->sync_init) + if (fuse_dev_is_sync_init(fud)) fc->sync_init = 1; } @@ -1953,10 +1799,9 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx) list_add_tail(&fc->entry, &fuse_conn_list); sb->s_root = root_dentry; - if (fud) { - fuse_dev_install(fud, fc); - wake_up_all(&fuse_dev_waitq); - } + if (fud) + fuse_dev_install(fud, fc->chan); + mutex_unlock(&fuse_mutex); return 0; @@ -2001,9 +1846,7 @@ static int fuse_set_no_super(struct super_block *sb, struct fs_context *fsc) static int fuse_test_super(struct super_block *sb, struct fs_context *fsc) { - struct fuse_dev *fud = fsc->sget_key; - - return fuse_dev_fc_get(fud) == get_fuse_conn_super(sb); + return fuse_dev_verify(fsc->sget_key, get_fuse_conn_super(sb)->chan); } static int fuse_get_tree(struct fs_context *fsc) @@ -2012,8 +1855,12 @@ static int fuse_get_tree(struct fs_context *fsc) struct fuse_conn *fc; struct fuse_mount *fm; struct super_block *sb; + struct fuse_chan *fch __free(fuse_chan_free) = fuse_dev_chan_new(); int err; + if (!fch) + return -ENOMEM; + fc = kmalloc_obj(*fc); if (!fc) return -ENOMEM; @@ -2024,7 +1871,7 @@ static int fuse_get_tree(struct fs_context *fsc) return -ENOMEM; } - fuse_conn_init(fc, fm, fsc->user_ns, &fuse_dev_fiq_ops, NULL); + fuse_conn_init(fc, fm, fsc->user_ns, no_free_ptr(fch)); fc->release = fuse_free_conn; fsc->s_fs_info = fm; @@ -2045,7 +1892,7 @@ static int fuse_get_tree(struct fs_context *fsc) * Allow creating a fuse mount with an already initialized fuse * connection */ - if (fuse_dev_fc_get(ctx->fud)) { + if (fuse_dev_is_installed(ctx->fud)) { fsc->sget_key = ctx->fud; sb = sget_fc(fsc, fuse_test_super, fuse_set_no_super); err = PTR_ERR_OR_ZERO(sb); @@ -2116,8 +1963,8 @@ void fuse_conn_destroy(struct fuse_mount *fm) if (fc->destroy) fuse_send_destroy(fm); - fuse_abort_conn(fc); - fuse_wait_aborted(fc); + fuse_chan_abort(fc->chan, false); + fuse_chan_wait_aborted(fc->chan); if (!list_empty(&fc->entry)) { mutex_lock(&fuse_mutex); diff --git a/fs/fuse/notify.c b/fs/fuse/notify.c new file mode 100644 index 0000000000000..f200a33f05332 --- /dev/null +++ b/fs/fuse/notify.c @@ -0,0 +1,434 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "dev.h" +#include "fuse_i.h" +#include <linux/pagemap.h> + +static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size, + struct fuse_copy_state *cs) +{ + struct fuse_notify_poll_wakeup_out outarg; + int err; + + if (size != sizeof(outarg)) + return -EINVAL; + + err = fuse_copy_one(cs, &outarg, sizeof(outarg)); + if (err) + return err; + + fuse_copy_finish(cs); + return fuse_notify_poll_wakeup(fc, &outarg); +} + +static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size, + struct fuse_copy_state *cs) +{ + struct fuse_notify_inval_inode_out outarg; + int err; + + if (size != sizeof(outarg)) + return -EINVAL; + + err = fuse_copy_one(cs, &outarg, sizeof(outarg)); + if (err) + return err; + fuse_copy_finish(cs); + + down_read(&fc->killsb); + err = fuse_reverse_inval_inode(fc, outarg.ino, + outarg.off, outarg.len); + up_read(&fc->killsb); + return err; +} + +static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size, + struct fuse_copy_state *cs) +{ + struct fuse_notify_inval_entry_out outarg; + int err; + char *buf; + struct qstr name; + + if (size < sizeof(outarg)) + return -EINVAL; + + err = fuse_copy_one(cs, &outarg, sizeof(outarg)); + if (err) + return err; + + if (outarg.namelen > fc->name_max) + return -ENAMETOOLONG; + + err = -EINVAL; + if (size != sizeof(outarg) + outarg.namelen + 1) + return -EINVAL; + + buf = kzalloc(outarg.namelen + 1, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + name.name = buf; + name.len = outarg.namelen; + err = fuse_copy_one(cs, buf, outarg.namelen + 1); + if (err) + goto err; + fuse_copy_finish(cs); + buf[outarg.namelen] = 0; + + down_read(&fc->killsb); + err = fuse_reverse_inval_entry(fc, outarg.parent, 0, &name, outarg.flags); + up_read(&fc->killsb); +err: + kfree(buf); + return err; +} + +static int fuse_notify_delete(struct fuse_conn *fc, unsigned int size, + struct fuse_copy_state *cs) +{ + struct fuse_notify_delete_out outarg; + int err; + char *buf; + struct qstr name; + + if (size < sizeof(outarg)) + return -EINVAL; + + err = fuse_copy_one(cs, &outarg, sizeof(outarg)); + if (err) + return err; + + if (outarg.namelen > fc->name_max) + return -ENAMETOOLONG; + + if (size != sizeof(outarg) + outarg.namelen + 1) + return -EINVAL; + + buf = kzalloc(outarg.namelen + 1, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + name.name = buf; + name.len = outarg.namelen; + err = fuse_copy_one(cs, buf, outarg.namelen + 1); + if (err) + goto err; + fuse_copy_finish(cs); + buf[outarg.namelen] = 0; + + down_read(&fc->killsb); + err = fuse_reverse_inval_entry(fc, outarg.parent, outarg.child, &name, 0); + up_read(&fc->killsb); +err: + kfree(buf); + return err; +} + +static int fuse_notify_store(struct fuse_conn *fc, unsigned int size, + struct fuse_copy_state *cs) +{ + struct fuse_notify_store_out outarg; + struct inode *inode; + struct address_space *mapping; + u64 nodeid; + int err; + unsigned int num; + loff_t file_size; + loff_t pos; + loff_t end; + + if (size < sizeof(outarg)) + return -EINVAL; + + err = fuse_copy_one(cs, &outarg, sizeof(outarg)); + if (err) + return err; + + if (size - sizeof(outarg) != outarg.size) + return -EINVAL; + + if (outarg.offset >= MAX_LFS_FILESIZE) + return -EINVAL; + + nodeid = outarg.nodeid; + pos = outarg.offset; + num = min(outarg.size, MAX_LFS_FILESIZE - pos); + + down_read(&fc->killsb); + + err = -ENOENT; + inode = fuse_ilookup(fc, nodeid, NULL); + if (!inode) + goto out_up_killsb; + + mapping = inode->i_mapping; + file_size = i_size_read(inode); + end = pos + num; + if (end > file_size) { + file_size = end; + fuse_write_update_attr(inode, file_size, num); + } + + while (num) { + struct folio *folio; + unsigned int folio_offset; + unsigned int nr_bytes; + pgoff_t index = pos >> PAGE_SHIFT; + + folio = filemap_grab_folio(mapping, index); + err = PTR_ERR(folio); + if (IS_ERR(folio)) + goto out_iput; + + folio_offset = offset_in_folio(folio, pos); + nr_bytes = min(num, folio_size(folio) - folio_offset); + + err = fuse_copy_folio(cs, &folio, folio_offset, nr_bytes, 0); + if (!folio_test_uptodate(folio) && !err && folio_offset == 0 && + (nr_bytes == folio_size(folio) || file_size == end)) { + folio_zero_segment(folio, nr_bytes, folio_size(folio)); + folio_mark_uptodate(folio); + } + folio_unlock(folio); + folio_put(folio); + + if (err) + goto out_iput; + + pos += nr_bytes; + num -= nr_bytes; + } + + err = 0; + +out_iput: + iput(inode); +out_up_killsb: + up_read(&fc->killsb); + return err; +} + +struct fuse_retrieve_args { + struct fuse_args_pages ap; + struct fuse_notify_retrieve_in inarg; +}; + +static void fuse_retrieve_end(struct fuse_args *args, int error) +{ + struct fuse_retrieve_args *ra = + container_of(args, typeof(*ra), ap.args); + + release_pages(ra->ap.folios, ra->ap.num_folios); + kfree(ra); +} + +static int fuse_retrieve(struct fuse_mount *fm, struct inode *inode, + struct fuse_notify_retrieve_out *outarg) +{ + int err; + struct address_space *mapping = inode->i_mapping; + loff_t file_size; + unsigned int num; + unsigned int offset; + size_t total_len = 0; + unsigned int num_pages; + struct fuse_conn *fc = fm->fc; + struct fuse_retrieve_args *ra; + size_t args_size = sizeof(*ra); + struct fuse_args_pages *ap; + struct fuse_args *args; + loff_t pos = outarg->offset; + + offset = offset_in_page(pos); + file_size = i_size_read(inode); + + num = min(outarg->size, fc->max_write); + if (pos > file_size) + num = 0; + else if (num > file_size - pos) + num = file_size - pos; + + num_pages = DIV_ROUND_UP(num + offset, PAGE_SIZE); + num_pages = min(num_pages, fc->max_pages); + num = min(num, num_pages << PAGE_SHIFT); + + args_size += num_pages * (sizeof(ap->folios[0]) + sizeof(ap->descs[0])); + + ra = kzalloc(args_size, GFP_KERNEL); + if (!ra) + return -ENOMEM; + + ap = &ra->ap; + ap->folios = (void *) (ra + 1); + ap->descs = (void *) (ap->folios + num_pages); + + args = &ap->args; + args->nodeid = outarg->nodeid; + args->opcode = FUSE_NOTIFY_REPLY; + args->in_numargs = 3; + args->in_pages = true; + args->end = fuse_retrieve_end; + + while (num && ap->num_folios < num_pages) { + struct folio *folio; + unsigned int folio_offset; + unsigned int nr_bytes; + pgoff_t index = pos >> PAGE_SHIFT; + + folio = filemap_get_folio(mapping, index); + if (IS_ERR(folio)) + break; + + folio_offset = offset_in_folio(folio, pos); + nr_bytes = min(folio_size(folio) - folio_offset, num); + + ap->folios[ap->num_folios] = folio; + ap->descs[ap->num_folios].offset = folio_offset; + ap->descs[ap->num_folios].length = nr_bytes; + ap->num_folios++; + + pos += nr_bytes; + num -= nr_bytes; + total_len += nr_bytes; + } + ra->inarg.offset = outarg->offset; + ra->inarg.size = total_len; + fuse_set_zero_arg0(args); + args->in_args[1].size = sizeof(ra->inarg); + args->in_args[1].value = &ra->inarg; + args->in_args[2].size = total_len; + + err = fuse_simple_notify_reply(fm, args, outarg->notify_unique); + if (err) + fuse_retrieve_end(args, err); + + return err; +} + +static int fuse_notify_retrieve(struct fuse_conn *fc, unsigned int size, + struct fuse_copy_state *cs) +{ + struct fuse_notify_retrieve_out outarg; + struct fuse_mount *fm; + struct inode *inode; + u64 nodeid; + int err; + + if (size != sizeof(outarg)) + return -EINVAL; + + err = fuse_copy_one(cs, &outarg, sizeof(outarg)); + if (err) + return err; + + fuse_copy_finish(cs); + + if (outarg.offset >= MAX_LFS_FILESIZE) + return -EINVAL; + + down_read(&fc->killsb); + err = -ENOENT; + nodeid = outarg.nodeid; + + inode = fuse_ilookup(fc, nodeid, &fm); + if (inode) { + err = fuse_retrieve(fm, inode, &outarg); + iput(inode); + } + up_read(&fc->killsb); + + return err; +} + +static int fuse_notify_resend(struct fuse_conn *fc) +{ + fuse_chan_resend(fc->chan); + return 0; +} + +/* + * Increments the fuse connection epoch. This will result of dentries from + * previous epochs to be invalidated. Additionally, if inval_wq is set, a work + * queue is scheduled to trigger the invalidation. + */ +static int fuse_notify_inc_epoch(struct fuse_conn *fc) +{ + atomic_inc(&fc->epoch); + if (inval_wq) + schedule_work(&fc->epoch_work); + + return 0; +} + +static int fuse_notify_prune(struct fuse_conn *fc, unsigned int size, + struct fuse_copy_state *cs) +{ + struct fuse_notify_prune_out outarg; + const unsigned int batch = 512; + u64 *nodeids __free(kfree) = kmalloc(sizeof(u64) * batch, GFP_KERNEL); + unsigned int num, i; + int err; + + if (!nodeids) + return -ENOMEM; + + if (size < sizeof(outarg)) + return -EINVAL; + + err = fuse_copy_one(cs, &outarg, sizeof(outarg)); + if (err) + return err; + + if (size - sizeof(outarg) != outarg.count * sizeof(u64)) + return -EINVAL; + + for (; outarg.count; outarg.count -= num) { + num = min(batch, outarg.count); + err = fuse_copy_one(cs, nodeids, num * sizeof(u64)); + if (err) + return err; + + scoped_guard(rwsem_read, &fc->killsb) { + for (i = 0; i < num; i++) + fuse_try_prune_one_inode(fc, nodeids[i]); + } + } + return 0; +} + +int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code, + unsigned int size, struct fuse_copy_state *cs) +{ + switch (code) { + case FUSE_NOTIFY_POLL: + return fuse_notify_poll(fc, size, cs); + + case FUSE_NOTIFY_INVAL_INODE: + return fuse_notify_inval_inode(fc, size, cs); + + case FUSE_NOTIFY_INVAL_ENTRY: + return fuse_notify_inval_entry(fc, size, cs); + + case FUSE_NOTIFY_STORE: + return fuse_notify_store(fc, size, cs); + + case FUSE_NOTIFY_RETRIEVE: + return fuse_notify_retrieve(fc, size, cs); + + case FUSE_NOTIFY_DELETE: + return fuse_notify_delete(fc, size, cs); + + case FUSE_NOTIFY_RESEND: + return fuse_notify_resend(fc); + + case FUSE_NOTIFY_INC_EPOCH: + return fuse_notify_inc_epoch(fc); + + case FUSE_NOTIFY_PRUNE: + return fuse_notify_prune(fc, size, cs); + + default: + return -EINVAL; + } +} diff --git a/fs/fuse/poll.c b/fs/fuse/poll.c new file mode 100644 index 0000000000000..bce3ee2e861eb --- /dev/null +++ b/fs/fuse/poll.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "dev.h" +#include "fuse_i.h" + +void fuse_end_polls(struct fuse_conn *fc) +{ + struct rb_node *p; + + spin_lock(&fc->lock); + p = rb_first(&fc->polled_files); + + while (p) { + struct fuse_file *ff; + ff = rb_entry(p, struct fuse_file, polled_node); + wake_up_interruptible_all(&ff->poll_wait); + + p = rb_next(p); + } + spin_unlock(&fc->lock); +} + +/* + * All files which have been polled are linked to RB tree + * fuse_conn->polled_files which is indexed by kh. Walk the tree and + * find the matching one. + */ +static struct rb_node **fuse_find_polled_node(struct fuse_conn *fc, u64 kh, + struct rb_node **parent_out) +{ + struct rb_node **link = &fc->polled_files.rb_node; + struct rb_node *last = NULL; + + while (*link) { + struct fuse_file *ff; + + last = *link; + ff = rb_entry(last, struct fuse_file, polled_node); + + if (kh < ff->kh) + link = &last->rb_left; + else if (kh > ff->kh) + link = &last->rb_right; + else + return link; + } + + if (parent_out) + *parent_out = last; + return link; +} + +/* + * The file is about to be polled. Make sure it's on the polled_files + * RB tree. Note that files once added to the polled_files tree are + * not removed before the file is released. This is because a file + * polled once is likely to be polled again. + */ +static void fuse_register_polled_file(struct fuse_conn *fc, + struct fuse_file *ff) +{ + spin_lock(&fc->lock); + if (RB_EMPTY_NODE(&ff->polled_node)) { + struct rb_node **link, *parent; + + link = fuse_find_polled_node(fc, ff->kh, &parent); + BUG_ON(*link); + rb_link_node(&ff->polled_node, parent, link); + rb_insert_color(&ff->polled_node, &fc->polled_files); + } + spin_unlock(&fc->lock); +} + +__poll_t fuse_file_poll(struct file *file, poll_table *wait) +{ + struct fuse_file *ff = file->private_data; + struct fuse_mount *fm = ff->fm; + struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh }; + struct fuse_poll_out outarg; + FUSE_ARGS(args); + int err; + + if (fm->fc->no_poll) + return DEFAULT_POLLMASK; + + poll_wait(file, &ff->poll_wait, wait); + inarg.events = mangle_poll(poll_requested_events(wait)); + + /* + * Ask for notification iff there's someone waiting for it. + * The client may ignore the flag and always notify. + */ + if (waitqueue_active(&ff->poll_wait)) { + inarg.flags |= FUSE_POLL_SCHEDULE_NOTIFY; + fuse_register_polled_file(fm->fc, ff); + } + + args.opcode = FUSE_POLL; + args.nodeid = ff->nodeid; + args.in_numargs = 1; + args.in_args[0].size = sizeof(inarg); + args.in_args[0].value = &inarg; + args.out_numargs = 1; + args.out_args[0].size = sizeof(outarg); + args.out_args[0].value = &outarg; + err = fuse_simple_request(fm, &args); + + if (!err) + return demangle_poll(outarg.revents); + if (err == -ENOSYS) { + fm->fc->no_poll = 1; + return DEFAULT_POLLMASK; + } + return EPOLLERR; +} +EXPORT_SYMBOL_GPL(fuse_file_poll); + +/* + * This is called from fuse_handle_notify() on FUSE_NOTIFY_POLL and + * wakes up the poll waiters. + */ +int fuse_notify_poll_wakeup(struct fuse_conn *fc, + struct fuse_notify_poll_wakeup_out *outarg) +{ + u64 kh = outarg->kh; + struct rb_node **link; + + spin_lock(&fc->lock); + + link = fuse_find_polled_node(fc, kh, NULL); + if (*link) { + struct fuse_file *ff; + + ff = rb_entry(*link, struct fuse_file, polled_node); + wake_up_interruptible_sync(&ff->poll_wait); + } + + spin_unlock(&fc->lock); + return 0; +} + diff --git a/fs/fuse/req.c b/fs/fuse/req.c new file mode 100644 index 0000000000000..a01ee743d31e9 --- /dev/null +++ b/fs/fuse/req.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "dev.h" +#include "fuse_i.h" + +static int fuse_fill_creds(struct fuse_mount *fm, struct fuse_args *args, struct mnt_idmap *idmap) +{ + struct fuse_conn *fc = fm->fc; + bool no_idmap = !fm->sb || (fm->sb->s_iflags & SB_I_NOIDMAP); + kuid_t fsuid = mapped_fsuid(idmap, fc->user_ns); + kgid_t fsgid = mapped_fsgid(idmap, fc->user_ns); + + args->pid = pid_nr_ns(task_pid(current), fc->pid_ns); + + if (args->force) { + if (args->nocreds) + return 0; + + if (no_idmap) { + args->uid = from_kuid_munged(fc->user_ns, current_fsuid()); + args->gid = from_kgid_munged(fc->user_ns, current_fsgid()); + } else { + args->uid = FUSE_INVALID_UIDGID; + args->gid = FUSE_INVALID_UIDGID; + } + return 0; + } + + WARN_ON(args->nocreds); + /* + * Keep the old behavior when idmappings support was not + * declared by a FUSE server. + * + * For those FUSE servers who support idmapped mounts, we send UID/GID + * only along with "inode creation" fuse requests, otherwise idmap == + * &invalid_mnt_idmap and req->in.h.{u,g}id will be equal to + * FUSE_INVALID_UIDGID. + */ + if (no_idmap) { + fsuid = current_fsuid(); + fsgid = current_fsgid(); + } + args->uid = from_kuid(fc->user_ns, fsuid); + args->gid = from_kgid(fc->user_ns, fsgid); + + if (no_idmap && unlikely(args->uid == ((uid_t)-1) || args->gid == ((gid_t)-1))) + return -EOVERFLOW; + + return 0; +} + +static int fuse_req_prep(struct fuse_mount *fm, struct fuse_args *args, struct mnt_idmap *idmap) +{ + if (!args->force && fm->fc->conn_error) + return -ECONNREFUSED; + + return fuse_fill_creds(fm, args, idmap); +} + +ssize_t __fuse_simple_request(struct mnt_idmap *idmap, struct fuse_mount *fm, + struct fuse_args *args) +{ + struct fuse_conn *fc = fm->fc; + int err = fuse_req_prep(fm, args, idmap); + + if (err) + return err; + + return fuse_chan_send(fc->chan, args); +} + +int fuse_simple_background(struct fuse_mount *fm, struct fuse_args *args, gfp_t gfp_flags) +{ + struct fuse_conn *fc = fm->fc; + int err; + + WARN_ON(args->force && !args->nocreds); + + err = fuse_req_prep(fm, args, &invalid_mnt_idmap); + if (err) + return err; + + return fuse_chan_send_bg(fc->chan, args, gfp_flags); +} +EXPORT_SYMBOL_GPL(fuse_simple_background); + +int fuse_simple_notify_reply(struct fuse_mount *fm, struct fuse_args *args, u64 unique) +{ + struct fuse_conn *fc = fm->fc; + int err; + + WARN_ON(args->force && !args->nocreds); + + err = fuse_req_prep(fm, args, &invalid_mnt_idmap); + if (err) + return err; + + return fuse_chan_send_notify_reply(fc->chan, args, unique); +} diff --git a/fs/fuse/req_timeout.c b/fs/fuse/req_timeout.c new file mode 100644 index 0000000000000..6cc6fc4913437 --- /dev/null +++ b/fs/fuse/req_timeout.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "dev.h" +#include "sysctl.h" +#include "fuse_dev_i.h" +#include "dev_uring_i.h" + +/* Frequency (in seconds) of request timeout checks, if opted into */ +#define FUSE_TIMEOUT_TIMER_FREQ 15 + +/* Frequency (in jiffies) of request timeout checks, if opted into */ +static const unsigned long fuse_timeout_timer_freq = + secs_to_jiffies(FUSE_TIMEOUT_TIMER_FREQ); + +/* + * Default timeout (in seconds) for the server to reply to a request + * before the connection is aborted, if no timeout was specified on mount. + * + * Exported via sysctl + */ +unsigned int fuse_default_req_timeout; + +/* + * Max timeout (in seconds) for the server to reply to a request before + * the connection is aborted. + * + * Exported via sysctl + */ +unsigned int fuse_max_req_timeout; + +bool fuse_request_expired(struct fuse_chan *fch, struct list_head *list) +{ + struct fuse_req *req; + + req = list_first_entry_or_null(list, struct fuse_req, list); + if (!req) + return false; + return time_is_before_jiffies(req->create_time + fch->timeout.req_timeout); +} + +static bool fuse_fpq_processing_expired(struct fuse_chan *fch, struct list_head *processing) +{ + int i; + + for (i = 0; i < FUSE_PQ_HASH_SIZE; i++) + if (fuse_request_expired(fch, &processing[i])) + return true; + + return false; +} + +/* + * Check if any requests aren't being completed by the time the request timeout + * elapses. To do so, we: + * - check the fiq pending list + * - check the bg queue + * - check the fpq io and processing lists + * + * To make this fast, we only check against the head request on each list since + * these are generally queued in order of creation time (eg newer requests get + * queued to the tail). We might miss a few edge cases (eg requests transitioning + * between lists, re-sent requests at the head of the pending list having a + * later creation time than other requests on that list, etc.) but that is fine + * since if the request never gets fulfilled, it will eventually be caught. + */ +static void fuse_check_timeout(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct fuse_chan *fch = container_of(dwork, struct fuse_chan, timeout.work); + struct fuse_iqueue *fiq = &fch->iq; + struct fuse_dev *fud; + struct fuse_pqueue *fpq; + bool expired = false; + + if (!atomic_read(&fch->num_waiting)) + goto out; + + spin_lock(&fiq->lock); + expired = fuse_request_expired(fch, &fiq->pending); + spin_unlock(&fiq->lock); + if (expired) + goto chan_abort; + + spin_lock(&fch->bg_lock); + expired = fuse_request_expired(fch, &fch->bg_queue); + spin_unlock(&fch->bg_lock); + if (expired) + goto chan_abort; + + spin_lock(&fch->lock); + if (!fch->connected) { + spin_unlock(&fch->lock); + return; + } + list_for_each_entry(fud, &fch->devices, entry) { + fpq = &fud->pq; + spin_lock(&fpq->lock); + if (fuse_request_expired(fch, &fpq->io) || + fuse_fpq_processing_expired(fch, fpq->processing)) { + spin_unlock(&fpq->lock); + spin_unlock(&fch->lock); + goto chan_abort; + } + + spin_unlock(&fpq->lock); + } + spin_unlock(&fch->lock); + + if (fuse_uring_request_expired(fch)) + goto chan_abort; + +out: + queue_delayed_work(system_percpu_wq, &fch->timeout.work, + fuse_timeout_timer_freq); + return; + +chan_abort: + fuse_chan_abort(fch, false); +} + +static void set_request_timeout(struct fuse_chan *fch, unsigned int timeout) +{ + fch->timeout.req_timeout = secs_to_jiffies(timeout); + INIT_DELAYED_WORK(&fch->timeout.work, fuse_check_timeout); + queue_delayed_work(system_percpu_wq, &fch->timeout.work, + fuse_timeout_timer_freq); +} + +void fuse_init_server_timeout(struct fuse_chan *fch, unsigned int timeout) +{ + if (!timeout && !fuse_max_req_timeout && !fuse_default_req_timeout) + return; + + if (!timeout) + timeout = fuse_default_req_timeout; + + if (fuse_max_req_timeout) { + if (timeout) + timeout = min(fuse_max_req_timeout, timeout); + else + timeout = fuse_max_req_timeout; + } + + timeout = max(FUSE_TIMEOUT_TIMER_FREQ, timeout); + + set_request_timeout(fch, timeout); +} + diff --git a/fs/fuse/sysctl.c b/fs/fuse/sysctl.c index e2d921abcb883..74eca5ce9a2ce 100644 --- a/fs/fuse/sysctl.c +++ b/fs/fuse/sysctl.c @@ -6,6 +6,7 @@ */ #include <linux/sysctl.h> +#include "sysctl.h" #include "fuse_i.h" static struct ctl_table_header *fuse_table_header; diff --git a/fs/fuse/sysctl.h b/fs/fuse/sysctl.h new file mode 100644 index 0000000000000..948d884171331 --- /dev/null +++ b/fs/fuse/sysctl.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _FS_FUSE_SYSCTL_H +#define _FS_FUSE_SYSCTL_H + +extern unsigned int fuse_default_req_timeout; +extern unsigned int fuse_max_req_timeout; + +#endif /* _FS_FUSE_SYSCTL_H */ diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 12300651a0f1a..a4cf813cebfcc 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -19,6 +19,7 @@ #include <linux/highmem.h> #include <linux/cleanup.h> #include <linux/uio.h> +#include "dev.h" #include "fuse_i.h" #include "fuse_dev_i.h" @@ -1519,7 +1520,7 @@ static void virtio_fs_send_req(struct fuse_iqueue *fiq, struct fuse_req *req) if (ret == -ENOSPC) { /* * Virtqueue full. Retry submission from worker - * context as we might be holding fc->bg_lock. + * context as we might be holding fc->chan->bg_lock. */ spin_lock(&fsvq->lock); list_add_tail(&req->list, &fsvq->queued_reqs); @@ -1562,7 +1563,7 @@ static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc) { struct fuse_mount *fm = get_fuse_mount_super(sb); struct fuse_conn *fc = fm->fc; - struct virtio_fs *fs = fc->iq.priv; + struct virtio_fs *fs = fc->chan->iq.priv; struct fuse_fs_context *ctx = fsc->fs_private; unsigned int i; int err; @@ -1606,7 +1607,7 @@ static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc) for (i = 0; i < fs->nvqs; i++) { struct virtio_fs_vq *fsvq = &fs->vqs[i]; - fuse_dev_install(fsvq->fud, fc); + fuse_dev_install(fsvq->fud, fc->chan); } /* Previous unmount will stop all queues. Start these again */ @@ -1625,7 +1626,7 @@ err: static void virtio_fs_conn_destroy(struct fuse_mount *fm) { struct fuse_conn *fc = fm->fc; - struct virtio_fs *vfs = fc->iq.priv; + struct virtio_fs *vfs = fc->chan->iq.priv; struct virtio_fs_vq *fsvq = &vfs->vqs[VQ_HIPRIO]; /* Stop dax worker. Soon evict_inodes() will be called which @@ -1673,7 +1674,7 @@ static int virtio_fs_test_super(struct super_block *sb, struct fuse_mount *fsc_fm = fsc->s_fs_info; struct fuse_mount *sb_fm = get_fuse_mount_super(sb); - return fsc_fm->fc->iq.priv == sb_fm->fc->iq.priv; + return fsc_fm->fc->chan->iq.priv == sb_fm->fc->chan->iq.priv; } static int virtio_fs_get_tree(struct fs_context *fsc) @@ -1683,13 +1684,17 @@ static int virtio_fs_get_tree(struct fs_context *fsc) struct fuse_conn *fc = NULL; struct fuse_mount *fm; unsigned int virtqueue_size; + struct fuse_chan *fch __free(fuse_chan_free) = fuse_chan_new(); int err = -EIO; + if (!fch) + return -ENOMEM; + if (!fsc->source) return invalf(fsc, "No source specified"); /* This gets a reference on virtio_fs object. This ptr gets installed - * in fc->iq->priv. Once fuse_conn is going away, it calls ->put() + * in chan->iq->priv. Once fuse_conn is going away, it calls ->put() * to drop the reference to this object. */ fs = virtio_fs_find_instance(fsc->source); @@ -1711,7 +1716,9 @@ static int virtio_fs_get_tree(struct fs_context *fsc) if (!fm) goto out_err; - fuse_conn_init(fc, fm, fsc->user_ns, &virtio_fs_fiq_ops, fs); + fuse_iqueue_init(&fch->iq, &virtio_fs_fiq_ops, fs); + fuse_conn_init(fc, fm, fsc->user_ns, no_free_ptr(fch)); + fc->release = fuse_free_conn; fc->delete_stale = true; fc->auto_submounts = true; |
