aboutsummaryrefslogtreecommitdiffstats
path: root/io_uring
diff options
authorJens Axboe <axboe@kernel.dk>2026-06-15 13:43:16 -0600
committerJens Axboe <axboe@kernel.dk>2026-06-16 09:48:00 -0600
commitca4aa97194ae353c2882d7cb4ed123a544892bcf (patch)
tree6847313caee411c23aba37bf5bbce3e51a0ef9e5 /io_uring
parentc554246ff4c68abf71b61a89c6e39d3cf94f523e (diff)
downloadath-ca4aa97194ae353c2882d7cb4ed123a544892bcf.tar.gz
io_uring: get rid of tw_pending for !DEFER task work
The normal task_work path used a tw_pending bit to ensure the callback was only added once: the mpscq drains incrementally, so a single tctx_task_work() run can take the queue through empty -> non-empty several times, and each transition would otherwise re-add the already pending callback_head. This corrupts the task_work list, and is what tw_pending protects again. This can go away, if we stop running the task_work as soon as the queue empties. Suggested-by: Caleb Sander Mateos <csander@purestorage.com> Reviewed-by: Caleb Sander Mateos <csander@purestorage.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'io_uring')
-rw-r--r--io_uring/mpscq.h9
-rw-r--r--io_uring/tw.c19
2 files changed, 17 insertions, 11 deletions
diff --git a/io_uring/mpscq.h b/io_uring/mpscq.h
index c801384c6a0aa..f910526766fd8 100644
--- a/io_uring/mpscq.h
+++ b/io_uring/mpscq.h
@@ -122,4 +122,13 @@ static inline struct llist_node *mpscq_pop(struct mpscq *q,
return NULL;
}
+/*
+ * Returns true if the most recent mpscq_pop() that returned a node also
+ * emptied the queue. Consumer must be serialized.
+ */
+static inline bool mpscq_pop_emptied(struct mpscq *q, struct llist_node *head)
+{
+ return head == &q->stub;
+}
+
#endif /* IOU_MPSCQ_H */
diff --git a/io_uring/tw.c b/io_uring/tw.c
index e74372233f40b..f2ce806b01a1e 100644
--- a/io_uring/tw.c
+++ b/io_uring/tw.c
@@ -34,10 +34,6 @@ void io_tctx_fallback_work(struct work_struct *work)
fallback_work);
unsigned int count = 0;
- /* see tctx_task_work() - a set bit must always have a run coming */
- clear_bit(0, &tctx->tw_pending);
- smp_mb__after_atomic();
-
/*
* Run the entries directly. We're in PF_KTHRED context, hence
* io_should_terminate_tw() is true and they will be marked as
@@ -101,6 +97,13 @@ void tctx_task_work_run(struct io_uring_task *tctx, unsigned int max_entries,
io_poll_task_func, io_req_rw_complete,
(struct io_tw_req){req}, ts);
(*count)++;
+ /*
+ * Break if most recent pop emptied the queue. This helps
+ * bound task_work run, and also protects the regular
+ * task_work addition.
+ */
+ if (mpscq_pop_emptied(&tctx->task_list, tctx->task_head))
+ break;
if (unlikely(need_resched())) {
ctx_flush_and_put(ctx, ts);
ctx = NULL;
@@ -127,8 +130,6 @@ void tctx_task_work(struct callback_head *cb)
unsigned int count = 0;
tctx = container_of(cb, struct io_uring_task, task_work);
- clear_bit(0, &tctx->tw_pending);
- smp_mb__after_atomic();
tctx_task_work_run(tctx, UINT_MAX, &count);
}
@@ -206,7 +207,7 @@ void io_req_normal_work_add(struct io_kiocb *req)
struct io_uring_task *tctx = req->tctx;
struct io_ring_ctx *ctx = req->ctx;
- /* task_work already pending, we're done */
+ /* tw run already pending, nothing else to do */
if (!mpscq_push(&tctx->task_list, &req->io_task_work.node))
return;
@@ -223,10 +224,6 @@ void io_req_normal_work_add(struct io_kiocb *req)
return;
}
- /* task_work must only be added once */
- if (test_and_set_bit(0, &tctx->tw_pending))
- return;
-
if (likely(!task_work_add(tctx->task, &tctx->task_work, ctx->notify_method)))
return;