aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
authorYu Kuai <yukuai@fygo.io>2026-06-08 11:42:44 +0800
committerJens Axboe <axboe@kernel.dk>2026-06-24 06:42:19 -0600
commit56cc24f59c145ce6938959f792df04b8a4f5a4d8 (patch)
tree23c5ff025e2a6c6f699477bfe63c21449ac4d400 /block
parent0af3fedb8c8ed3c07b4f76927bd7fc88f6f82efb (diff)
downloadath-56cc24f59c145ce6938959f792df04b8a4f5a4d8.tar.gz
blk-cgroup: don't nest queue_lock under rcu in blkcg_print_blkgs()
With previous modification to delay freeing policy data after an RCU grace period, prfill() can run under RCU instead of taking queue_lock. However, policy teardown can still clear blkg->pd[plid] after blkcg_print_blkgs() observes the policy enabled bit. Load policy data once with READ_ONCE() and skip the blkg if teardown already cleared it. Do the same in recursive stat walks for descendant blkgs. Remove the stale BFQ debug queue_lock assertion because blkcg_print_blkgs() no longer calls prfill() with queue_lock held. This also lets ioc_qos_prfill() and ioc_cost_model_prfill() use IRQ-safe ioc->lock locking without re-enabling IRQs while queue_lock is still held. Signed-off-by: Yu Kuai <yukuai@fygo.io> Link: https://patch.msgid.link/db7633d5e263dd1c2bf9b901762545a84b7d714e.1780621988.git.yukuai@fygo.io Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
-rw-r--r--block/bfq-cgroup.c8
-rw-r--r--block/blk-cgroup-rwstat.c15
-rw-r--r--block/blk-cgroup.c22
-rw-r--r--block/blk-cgroup.h6
-rw-r--r--block/blk-iocost.c8
5 files changed, 34 insertions, 25 deletions
diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 0d3e32d246a24..e82ff03bda02e 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -1163,16 +1163,18 @@ static u64 bfqg_prfill_stat_recursive(struct seq_file *sf,
struct cgroup_subsys_state *pos_css;
u64 sum = 0;
- lockdep_assert_held(&blkg->q->queue_lock);
-
rcu_read_lock();
blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
+ struct blkg_policy_data *pd;
struct bfq_stat *stat;
if (!pos_blkg->online)
continue;
- stat = (void *)blkg_to_pd(pos_blkg, &blkcg_policy_bfq) + off;
+ pd = blkg_to_pd(pos_blkg, &blkcg_policy_bfq);
+ if (!pd)
+ continue;
+ stat = (void *)pd + off;
sum += bfq_stat_read(stat) + atomic64_read(&stat->aux_cnt);
}
rcu_read_unlock();
diff --git a/block/blk-cgroup-rwstat.c b/block/blk-cgroup-rwstat.c
index a55fb0c535582..aae910713814f 100644
--- a/block/blk-cgroup-rwstat.c
+++ b/block/blk-cgroup-rwstat.c
@@ -101,24 +101,27 @@ void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol,
struct cgroup_subsys_state *pos_css;
unsigned int i;
- lockdep_assert_held(&blkg->q->queue_lock);
+ WARN_ON_ONCE(!rcu_read_lock_held());
memset(sum, 0, sizeof(*sum));
- rcu_read_lock();
blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
struct blkg_rwstat *rwstat;
if (!pos_blkg->online)
continue;
- if (pol)
- rwstat = (void *)blkg_to_pd(pos_blkg, pol) + off;
- else
+ if (pol) {
+ struct blkg_policy_data *pd = blkg_to_pd(pos_blkg, pol);
+
+ if (!pd)
+ continue;
+ rwstat = (void *)pd + off;
+ } else {
rwstat = (void *)pos_blkg + off;
+ }
for (i = 0; i < BLKG_RWSTAT_NR; i++)
sum->cnt[i] += blkg_rwstat_read_counter(rwstat, i);
}
- rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum);
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index c991c263cb5ab..d6355338f2900 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -699,9 +699,9 @@ const char *blkg_dev_name(struct blkcg_gq *blkg)
*
* This function invokes @prfill on each blkg of @blkcg if pd for the
* policy specified by @pol exists. @prfill is invoked with @sf, the
- * policy data and @data and the matching queue lock held. If @show_total
- * is %true, the sum of the return values from @prfill is printed with
- * "Total" label at the end.
+ * policy data and @data under RCU read lock. If @show_total is %true, the
+ * sum of the return values from @prfill is printed with "Total" label at the
+ * end.
*
* This is to be used to construct print functions for
* cftype->read_seq_string method.
@@ -717,10 +717,14 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg,
rcu_read_lock();
hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) {
- spin_lock_irq(&blkg->q->queue_lock);
- if (blkcg_policy_enabled(blkg->q, pol))
- total += prfill(sf, blkg->pd[pol->plid], data);
- spin_unlock_irq(&blkg->q->queue_lock);
+ struct blkg_policy_data *pd;
+
+ if (!blkcg_policy_enabled(blkg->q, pol))
+ continue;
+
+ pd = blkg_to_pd(blkg, pol);
+ if (pd)
+ total += prfill(sf, pd, data);
}
rcu_read_unlock();
@@ -1604,7 +1608,7 @@ retry:
pd->blkg = blkg;
pd->plid = pol->plid;
- blkg->pd[pol->plid] = pd;
+ WRITE_ONCE(blkg->pd[pol->plid], pd);
if (pol->pd_init_fn)
pol->pd_init_fn(pd);
@@ -1643,7 +1647,7 @@ enomem:
pol->pd_offline_fn(pd);
pd->online = false;
pol->pd_free_fn(pd);
- blkg->pd[pol->plid] = NULL;
+ WRITE_ONCE(blkg->pd[pol->plid], NULL);
}
spin_unlock(&blkcg->lock);
}
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index cc603ded6ded4..615390f751aa2 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -284,9 +284,9 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg,
* Return pointer to private data associated with the @blkg-@pol pair.
*/
static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
- struct blkcg_policy *pol)
+ const struct blkcg_policy *pol)
{
- return blkg ? blkg->pd[pol->plid] : NULL;
+ return blkg ? READ_ONCE(blkg->pd[pol->plid]) : NULL;
}
static inline struct blkcg_policy_data *blkcg_to_cpd(struct blkcg *blkcg,
@@ -493,7 +493,7 @@ static inline void blkcg_deactivate_policy(struct gendisk *disk,
const struct blkcg_policy *pol) { }
static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg,
- struct blkcg_policy *pol) { return NULL; }
+ const struct blkcg_policy *pol) { return NULL; }
static inline struct blkcg_gq *pd_to_blkg(struct blkg_policy_data *pd) { return NULL; }
static inline void blkg_get(struct blkcg_gq *blkg) { }
static inline void blkg_put(struct blkcg_gq *blkg) { }
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 27d2dcaa65f02..8b2aeba2e1e39 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -3221,7 +3221,7 @@ static u64 ioc_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
if (!dname)
return 0;
- spin_lock(&ioc->lock);
+ spin_lock_irq(&ioc->lock);
seq_printf(sf, "%s enable=%d ctrl=%s rpct=%u.%02u rlat=%u wpct=%u.%02u wlat=%u min=%u.%02u max=%u.%02u\n",
dname, ioc->enabled, ioc->user_qos_params ? "user" : "auto",
ioc->params.qos[QOS_RPPM] / 10000,
@@ -3234,7 +3234,7 @@ static u64 ioc_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
ioc->params.qos[QOS_MIN] % 10000 / 100,
ioc->params.qos[QOS_MAX] / 10000,
ioc->params.qos[QOS_MAX] % 10000 / 100);
- spin_unlock(&ioc->lock);
+ spin_unlock_irq(&ioc->lock);
return 0;
}
@@ -3430,14 +3430,14 @@ static u64 ioc_cost_model_prfill(struct seq_file *sf,
if (!dname)
return 0;
- spin_lock(&ioc->lock);
+ spin_lock_irq(&ioc->lock);
seq_printf(sf, "%s ctrl=%s model=linear "
"rbps=%llu rseqiops=%llu rrandiops=%llu "
"wbps=%llu wseqiops=%llu wrandiops=%llu\n",
dname, ioc->user_cost_model ? "user" : "auto",
u[I_LCOEF_RBPS], u[I_LCOEF_RSEQIOPS], u[I_LCOEF_RRANDIOPS],
u[I_LCOEF_WBPS], u[I_LCOEF_WSEQIOPS], u[I_LCOEF_WRANDIOPS]);
- spin_unlock(&ioc->lock);
+ spin_unlock_irq(&ioc->lock);
return 0;
}