blk-mq: move cpuhp callback registering out of q->sysfs_lock

Registering and unregistering cpuhp callback requires global cpu hotplug lock,
which is used everywhere. Meantime q->sysfs_lock is used in block layer
almost everywhere.

It is easy to trigger lockdep warning[1] by connecting the two locks.

Fix the warning by moving blk-mq's cpuhp callback registering out of
q->sysfs_lock. Add one dedicated global lock for covering registering &
unregistering hctx's cpuhp, and it is safe to do so because hctx is
guaranteed to be live if our request_queue is live.

[1] https://lore.kernel.org/lkml/Z04pz3AlvI4o0Mr8@agluck-desk3/

Cc: Reinette Chatre <reinette.chatre@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Peter Newman <peternewman@google.com>
Cc: Babu Moger <babu.moger@amd.com>
Reported-by: Luck Tony <tony.luck@intel.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Link: https://lore.kernel.org/r/20241206111611.978870-3-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Ming Lei 2024-12-06 19:16:07 +08:00 committed by Jens Axboe
parent 4bf485a7db
commit 22465bbac5

View File

@ -43,6 +43,7 @@
static DEFINE_PER_CPU(struct llist_head, blk_cpu_done); static DEFINE_PER_CPU(struct llist_head, blk_cpu_done);
static DEFINE_PER_CPU(call_single_data_t, blk_cpu_csd); static DEFINE_PER_CPU(call_single_data_t, blk_cpu_csd);
static DEFINE_MUTEX(blk_mq_cpuhp_lock);
static void blk_mq_insert_request(struct request *rq, blk_insert_t flags); static void blk_mq_insert_request(struct request *rq, blk_insert_t flags);
static void blk_mq_request_bypass_insert(struct request *rq, static void blk_mq_request_bypass_insert(struct request *rq,
@ -3739,13 +3740,91 @@ static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node)
return 0; return 0;
} }
static void blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx) static void __blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx)
{ {
if (!(hctx->flags & BLK_MQ_F_STACKING)) lockdep_assert_held(&blk_mq_cpuhp_lock);
if (!(hctx->flags & BLK_MQ_F_STACKING) &&
!hlist_unhashed(&hctx->cpuhp_online)) {
cpuhp_state_remove_instance_nocalls(CPUHP_AP_BLK_MQ_ONLINE, cpuhp_state_remove_instance_nocalls(CPUHP_AP_BLK_MQ_ONLINE,
&hctx->cpuhp_online); &hctx->cpuhp_online);
cpuhp_state_remove_instance_nocalls(CPUHP_BLK_MQ_DEAD, INIT_HLIST_NODE(&hctx->cpuhp_online);
&hctx->cpuhp_dead); }
if (!hlist_unhashed(&hctx->cpuhp_dead)) {
cpuhp_state_remove_instance_nocalls(CPUHP_BLK_MQ_DEAD,
&hctx->cpuhp_dead);
INIT_HLIST_NODE(&hctx->cpuhp_dead);
}
}
static void blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx)
{
mutex_lock(&blk_mq_cpuhp_lock);
__blk_mq_remove_cpuhp(hctx);
mutex_unlock(&blk_mq_cpuhp_lock);
}
static void __blk_mq_add_cpuhp(struct blk_mq_hw_ctx *hctx)
{
lockdep_assert_held(&blk_mq_cpuhp_lock);
if (!(hctx->flags & BLK_MQ_F_STACKING) &&
hlist_unhashed(&hctx->cpuhp_online))
cpuhp_state_add_instance_nocalls(CPUHP_AP_BLK_MQ_ONLINE,
&hctx->cpuhp_online);
if (hlist_unhashed(&hctx->cpuhp_dead))
cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD,
&hctx->cpuhp_dead);
}
static void __blk_mq_remove_cpuhp_list(struct list_head *head)
{
struct blk_mq_hw_ctx *hctx;
lockdep_assert_held(&blk_mq_cpuhp_lock);
list_for_each_entry(hctx, head, hctx_list)
__blk_mq_remove_cpuhp(hctx);
}
/*
* Unregister cpuhp callbacks from exited hw queues
*
* Safe to call if this `request_queue` is live
*/
static void blk_mq_remove_hw_queues_cpuhp(struct request_queue *q)
{
LIST_HEAD(hctx_list);
spin_lock(&q->unused_hctx_lock);
list_splice_init(&q->unused_hctx_list, &hctx_list);
spin_unlock(&q->unused_hctx_lock);
mutex_lock(&blk_mq_cpuhp_lock);
__blk_mq_remove_cpuhp_list(&hctx_list);
mutex_unlock(&blk_mq_cpuhp_lock);
spin_lock(&q->unused_hctx_lock);
list_splice(&hctx_list, &q->unused_hctx_list);
spin_unlock(&q->unused_hctx_lock);
}
/*
* Register cpuhp callbacks from all hw queues
*
* Safe to call if this `request_queue` is live
*/
static void blk_mq_add_hw_queues_cpuhp(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
unsigned long i;
mutex_lock(&blk_mq_cpuhp_lock);
queue_for_each_hw_ctx(q, hctx, i)
__blk_mq_add_cpuhp(hctx);
mutex_unlock(&blk_mq_cpuhp_lock);
} }
/* /*
@ -3796,8 +3875,6 @@ static void blk_mq_exit_hctx(struct request_queue *q,
if (set->ops->exit_hctx) if (set->ops->exit_hctx)
set->ops->exit_hctx(hctx, hctx_idx); set->ops->exit_hctx(hctx, hctx_idx);
blk_mq_remove_cpuhp(hctx);
xa_erase(&q->hctx_table, hctx_idx); xa_erase(&q->hctx_table, hctx_idx);
spin_lock(&q->unused_hctx_lock); spin_lock(&q->unused_hctx_lock);
@ -3814,6 +3891,7 @@ static void blk_mq_exit_hw_queues(struct request_queue *q,
queue_for_each_hw_ctx(q, hctx, i) { queue_for_each_hw_ctx(q, hctx, i) {
if (i == nr_queue) if (i == nr_queue)
break; break;
blk_mq_remove_cpuhp(hctx);
blk_mq_exit_hctx(q, set, hctx, i); blk_mq_exit_hctx(q, set, hctx, i);
} }
} }
@ -3837,11 +3915,6 @@ static int blk_mq_init_hctx(struct request_queue *q,
if (xa_insert(&q->hctx_table, hctx_idx, hctx, GFP_KERNEL)) if (xa_insert(&q->hctx_table, hctx_idx, hctx, GFP_KERNEL))
goto exit_flush_rq; goto exit_flush_rq;
if (!(hctx->flags & BLK_MQ_F_STACKING))
cpuhp_state_add_instance_nocalls(CPUHP_AP_BLK_MQ_ONLINE,
&hctx->cpuhp_online);
cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead);
return 0; return 0;
exit_flush_rq: exit_flush_rq:
@ -3876,6 +3949,8 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set,
INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn); INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn);
spin_lock_init(&hctx->lock); spin_lock_init(&hctx->lock);
INIT_LIST_HEAD(&hctx->dispatch); INIT_LIST_HEAD(&hctx->dispatch);
INIT_HLIST_NODE(&hctx->cpuhp_dead);
INIT_HLIST_NODE(&hctx->cpuhp_online);
hctx->queue = q; hctx->queue = q;
hctx->flags = set->flags & ~BLK_MQ_F_TAG_QUEUE_SHARED; hctx->flags = set->flags & ~BLK_MQ_F_TAG_QUEUE_SHARED;
@ -4414,6 +4489,12 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
xa_for_each_start(&q->hctx_table, j, hctx, j) xa_for_each_start(&q->hctx_table, j, hctx, j)
blk_mq_exit_hctx(q, set, hctx, j); blk_mq_exit_hctx(q, set, hctx, j);
mutex_unlock(&q->sysfs_lock); mutex_unlock(&q->sysfs_lock);
/* unregister cpuhp callbacks for exited hctxs */
blk_mq_remove_hw_queues_cpuhp(q);
/* register cpuhp for new initialized hctxs */
blk_mq_add_hw_queues_cpuhp(q);
} }
int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,