mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-06 05:02:31 +00:00
Merge branch 'for-4.9/block-smp' of git://git.kernel.dk/linux-block
Pull blk-mq CPU hotplug update from Jens Axboe: "This is the conversion of blk-mq to the new hotplug state machine" * 'for-4.9/block-smp' of git://git.kernel.dk/linux-block: blk-mq: fixup "Convert to new hotplug state machine" blk-mq: Convert to new hotplug state machine blk-mq/cpu-notif: Convert to new hotplug state machine
This commit is contained in:
commit
24532f7681
@ -6,7 +6,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \
|
|||||||
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
|
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
|
||||||
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
|
blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
|
||||||
blk-lib.o blk-mq.o blk-mq-tag.o \
|
blk-lib.o blk-mq.o blk-mq-tag.o \
|
||||||
blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \
|
blk-mq-sysfs.o blk-mq-cpumap.o ioctl.o \
|
||||||
genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
|
genhd.o scsi_ioctl.o partition-generic.o ioprio.o \
|
||||||
badblocks.o partitions/
|
badblocks.o partitions/
|
||||||
|
|
||||||
|
@ -1,67 +0,0 @@
|
|||||||
/*
|
|
||||||
* CPU notifier helper code for blk-mq
|
|
||||||
*
|
|
||||||
* Copyright (C) 2013-2014 Jens Axboe
|
|
||||||
*/
|
|
||||||
#include <linux/kernel.h>
|
|
||||||
#include <linux/module.h>
|
|
||||||
#include <linux/init.h>
|
|
||||||
#include <linux/blkdev.h>
|
|
||||||
#include <linux/list.h>
|
|
||||||
#include <linux/llist.h>
|
|
||||||
#include <linux/smp.h>
|
|
||||||
#include <linux/cpu.h>
|
|
||||||
|
|
||||||
#include <linux/blk-mq.h>
|
|
||||||
#include "blk-mq.h"
|
|
||||||
|
|
||||||
static LIST_HEAD(blk_mq_cpu_notify_list);
|
|
||||||
static DEFINE_RAW_SPINLOCK(blk_mq_cpu_notify_lock);
|
|
||||||
|
|
||||||
static int blk_mq_main_cpu_notify(struct notifier_block *self,
|
|
||||||
unsigned long action, void *hcpu)
|
|
||||||
{
|
|
||||||
unsigned int cpu = (unsigned long) hcpu;
|
|
||||||
struct blk_mq_cpu_notifier *notify;
|
|
||||||
int ret = NOTIFY_OK;
|
|
||||||
|
|
||||||
raw_spin_lock(&blk_mq_cpu_notify_lock);
|
|
||||||
|
|
||||||
list_for_each_entry(notify, &blk_mq_cpu_notify_list, list) {
|
|
||||||
ret = notify->notify(notify->data, action, cpu);
|
|
||||||
if (ret != NOTIFY_OK)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
raw_spin_unlock(&blk_mq_cpu_notify_lock);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier)
|
|
||||||
{
|
|
||||||
BUG_ON(!notifier->notify);
|
|
||||||
|
|
||||||
raw_spin_lock(&blk_mq_cpu_notify_lock);
|
|
||||||
list_add_tail(¬ifier->list, &blk_mq_cpu_notify_list);
|
|
||||||
raw_spin_unlock(&blk_mq_cpu_notify_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier)
|
|
||||||
{
|
|
||||||
raw_spin_lock(&blk_mq_cpu_notify_lock);
|
|
||||||
list_del(¬ifier->list);
|
|
||||||
raw_spin_unlock(&blk_mq_cpu_notify_lock);
|
|
||||||
}
|
|
||||||
|
|
||||||
void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier,
|
|
||||||
int (*fn)(void *, unsigned long, unsigned int),
|
|
||||||
void *data)
|
|
||||||
{
|
|
||||||
notifier->notify = fn;
|
|
||||||
notifier->data = data;
|
|
||||||
}
|
|
||||||
|
|
||||||
void __init blk_mq_cpu_init(void)
|
|
||||||
{
|
|
||||||
hotcpu_notifier(blk_mq_main_cpu_notify, 0);
|
|
||||||
}
|
|
123
block/blk-mq.c
123
block/blk-mq.c
@ -1563,11 +1563,13 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set,
|
|||||||
* software queue to the hw queue dispatch list, and ensure that it
|
* software queue to the hw queue dispatch list, and ensure that it
|
||||||
* gets run.
|
* gets run.
|
||||||
*/
|
*/
|
||||||
static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu)
|
static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node)
|
||||||
{
|
{
|
||||||
|
struct blk_mq_hw_ctx *hctx;
|
||||||
struct blk_mq_ctx *ctx;
|
struct blk_mq_ctx *ctx;
|
||||||
LIST_HEAD(tmp);
|
LIST_HEAD(tmp);
|
||||||
|
|
||||||
|
hctx = hlist_entry_safe(node, struct blk_mq_hw_ctx, cpuhp_dead);
|
||||||
ctx = __blk_mq_get_ctx(hctx->queue, cpu);
|
ctx = __blk_mq_get_ctx(hctx->queue, cpu);
|
||||||
|
|
||||||
spin_lock(&ctx->lock);
|
spin_lock(&ctx->lock);
|
||||||
@ -1578,30 +1580,20 @@ static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu)
|
|||||||
spin_unlock(&ctx->lock);
|
spin_unlock(&ctx->lock);
|
||||||
|
|
||||||
if (list_empty(&tmp))
|
if (list_empty(&tmp))
|
||||||
return NOTIFY_OK;
|
return 0;
|
||||||
|
|
||||||
spin_lock(&hctx->lock);
|
spin_lock(&hctx->lock);
|
||||||
list_splice_tail_init(&tmp, &hctx->dispatch);
|
list_splice_tail_init(&tmp, &hctx->dispatch);
|
||||||
spin_unlock(&hctx->lock);
|
spin_unlock(&hctx->lock);
|
||||||
|
|
||||||
blk_mq_run_hw_queue(hctx, true);
|
blk_mq_run_hw_queue(hctx, true);
|
||||||
return NOTIFY_OK;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int blk_mq_hctx_notify(void *data, unsigned long action,
|
static void blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx)
|
||||||
unsigned int cpu)
|
|
||||||
{
|
{
|
||||||
struct blk_mq_hw_ctx *hctx = data;
|
cpuhp_state_remove_instance_nocalls(CPUHP_BLK_MQ_DEAD,
|
||||||
|
&hctx->cpuhp_dead);
|
||||||
if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
|
|
||||||
return blk_mq_hctx_cpu_offline(hctx, cpu);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* In case of CPU online, tags may be reallocated
|
|
||||||
* in blk_mq_map_swqueue() after mapping is updated.
|
|
||||||
*/
|
|
||||||
|
|
||||||
return NOTIFY_OK;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* hctx->ctxs will be freed in queue's release handler */
|
/* hctx->ctxs will be freed in queue's release handler */
|
||||||
@ -1621,7 +1613,7 @@ static void blk_mq_exit_hctx(struct request_queue *q,
|
|||||||
if (set->ops->exit_hctx)
|
if (set->ops->exit_hctx)
|
||||||
set->ops->exit_hctx(hctx, hctx_idx);
|
set->ops->exit_hctx(hctx, hctx_idx);
|
||||||
|
|
||||||
blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
|
blk_mq_remove_cpuhp(hctx);
|
||||||
blk_free_flush_queue(hctx->fq);
|
blk_free_flush_queue(hctx->fq);
|
||||||
sbitmap_free(&hctx->ctx_map);
|
sbitmap_free(&hctx->ctx_map);
|
||||||
}
|
}
|
||||||
@ -1668,9 +1660,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
|
|||||||
hctx->queue_num = hctx_idx;
|
hctx->queue_num = hctx_idx;
|
||||||
hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED;
|
hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED;
|
||||||
|
|
||||||
blk_mq_init_cpu_notifier(&hctx->cpu_notifier,
|
cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead);
|
||||||
blk_mq_hctx_notify, hctx);
|
|
||||||
blk_mq_register_cpu_notifier(&hctx->cpu_notifier);
|
|
||||||
|
|
||||||
hctx->tags = set->tags[hctx_idx];
|
hctx->tags = set->tags[hctx_idx];
|
||||||
|
|
||||||
@ -1715,8 +1705,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
|
|||||||
free_ctxs:
|
free_ctxs:
|
||||||
kfree(hctx->ctxs);
|
kfree(hctx->ctxs);
|
||||||
unregister_cpu_notifier:
|
unregister_cpu_notifier:
|
||||||
blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
|
blk_mq_remove_cpuhp(hctx);
|
||||||
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2089,50 +2078,18 @@ static void blk_mq_queue_reinit(struct request_queue *q,
|
|||||||
blk_mq_sysfs_register(q);
|
blk_mq_sysfs_register(q);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
|
/*
|
||||||
unsigned long action, void *hcpu)
|
* New online cpumask which is going to be set in this hotplug event.
|
||||||
|
* Declare this cpumasks as global as cpu-hotplug operation is invoked
|
||||||
|
* one-by-one and dynamically allocating this could result in a failure.
|
||||||
|
*/
|
||||||
|
static struct cpumask cpuhp_online_new;
|
||||||
|
|
||||||
|
static void blk_mq_queue_reinit_work(void)
|
||||||
{
|
{
|
||||||
struct request_queue *q;
|
struct request_queue *q;
|
||||||
int cpu = (unsigned long)hcpu;
|
|
||||||
/*
|
|
||||||
* New online cpumask which is going to be set in this hotplug event.
|
|
||||||
* Declare this cpumasks as global as cpu-hotplug operation is invoked
|
|
||||||
* one-by-one and dynamically allocating this could result in a failure.
|
|
||||||
*/
|
|
||||||
static struct cpumask online_new;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Before hotadded cpu starts handling requests, new mappings must
|
|
||||||
* be established. Otherwise, these requests in hw queue might
|
|
||||||
* never be dispatched.
|
|
||||||
*
|
|
||||||
* For example, there is a single hw queue (hctx) and two CPU queues
|
|
||||||
* (ctx0 for CPU0, and ctx1 for CPU1).
|
|
||||||
*
|
|
||||||
* Now CPU1 is just onlined and a request is inserted into
|
|
||||||
* ctx1->rq_list and set bit0 in pending bitmap as ctx1->index_hw is
|
|
||||||
* still zero.
|
|
||||||
*
|
|
||||||
* And then while running hw queue, flush_busy_ctxs() finds bit0 is
|
|
||||||
* set in pending bitmap and tries to retrieve requests in
|
|
||||||
* hctx->ctxs[0]->rq_list. But htx->ctxs[0] is a pointer to ctx0,
|
|
||||||
* so the request in ctx1->rq_list is ignored.
|
|
||||||
*/
|
|
||||||
switch (action & ~CPU_TASKS_FROZEN) {
|
|
||||||
case CPU_DEAD:
|
|
||||||
case CPU_UP_CANCELED:
|
|
||||||
cpumask_copy(&online_new, cpu_online_mask);
|
|
||||||
break;
|
|
||||||
case CPU_UP_PREPARE:
|
|
||||||
cpumask_copy(&online_new, cpu_online_mask);
|
|
||||||
cpumask_set_cpu(cpu, &online_new);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
return NOTIFY_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
mutex_lock(&all_q_mutex);
|
mutex_lock(&all_q_mutex);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We need to freeze and reinit all existing queues. Freezing
|
* We need to freeze and reinit all existing queues. Freezing
|
||||||
* involves synchronous wait for an RCU grace period and doing it
|
* involves synchronous wait for an RCU grace period and doing it
|
||||||
@ -2153,13 +2110,43 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb,
|
|||||||
}
|
}
|
||||||
|
|
||||||
list_for_each_entry(q, &all_q_list, all_q_node)
|
list_for_each_entry(q, &all_q_list, all_q_node)
|
||||||
blk_mq_queue_reinit(q, &online_new);
|
blk_mq_queue_reinit(q, &cpuhp_online_new);
|
||||||
|
|
||||||
list_for_each_entry(q, &all_q_list, all_q_node)
|
list_for_each_entry(q, &all_q_list, all_q_node)
|
||||||
blk_mq_unfreeze_queue(q);
|
blk_mq_unfreeze_queue(q);
|
||||||
|
|
||||||
mutex_unlock(&all_q_mutex);
|
mutex_unlock(&all_q_mutex);
|
||||||
return NOTIFY_OK;
|
}
|
||||||
|
|
||||||
|
static int blk_mq_queue_reinit_dead(unsigned int cpu)
|
||||||
|
{
|
||||||
|
cpumask_copy(&cpuhp_online_new, cpu_online_mask);
|
||||||
|
blk_mq_queue_reinit_work();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Before hotadded cpu starts handling requests, new mappings must be
|
||||||
|
* established. Otherwise, these requests in hw queue might never be
|
||||||
|
* dispatched.
|
||||||
|
*
|
||||||
|
* For example, there is a single hw queue (hctx) and two CPU queues (ctx0
|
||||||
|
* for CPU0, and ctx1 for CPU1).
|
||||||
|
*
|
||||||
|
* Now CPU1 is just onlined and a request is inserted into ctx1->rq_list
|
||||||
|
* and set bit0 in pending bitmap as ctx1->index_hw is still zero.
|
||||||
|
*
|
||||||
|
* And then while running hw queue, flush_busy_ctxs() finds bit0 is set in
|
||||||
|
* pending bitmap and tries to retrieve requests in hctx->ctxs[0]->rq_list.
|
||||||
|
* But htx->ctxs[0] is a pointer to ctx0, so the request in ctx1->rq_list
|
||||||
|
* is ignored.
|
||||||
|
*/
|
||||||
|
static int blk_mq_queue_reinit_prepare(unsigned int cpu)
|
||||||
|
{
|
||||||
|
cpumask_copy(&cpuhp_online_new, cpu_online_mask);
|
||||||
|
cpumask_set_cpu(cpu, &cpuhp_online_new);
|
||||||
|
blk_mq_queue_reinit_work();
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
|
static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
|
||||||
@ -2378,10 +2365,12 @@ void blk_mq_enable_hotplug(void)
|
|||||||
|
|
||||||
static int __init blk_mq_init(void)
|
static int __init blk_mq_init(void)
|
||||||
{
|
{
|
||||||
blk_mq_cpu_init();
|
cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL,
|
||||||
|
blk_mq_hctx_notify_dead);
|
||||||
hotcpu_notifier(blk_mq_queue_reinit_notify, 0);
|
|
||||||
|
|
||||||
|
cpuhp_setup_state_nocalls(CPUHP_BLK_MQ_PREPARE, "block/mq:prepare",
|
||||||
|
blk_mq_queue_reinit_prepare,
|
||||||
|
blk_mq_queue_reinit_dead);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
subsys_initcall(blk_mq_init);
|
subsys_initcall(blk_mq_init);
|
||||||
|
@ -32,13 +32,6 @@ void blk_mq_wake_waiters(struct request_queue *q);
|
|||||||
/*
|
/*
|
||||||
* CPU hotplug helpers
|
* CPU hotplug helpers
|
||||||
*/
|
*/
|
||||||
struct blk_mq_cpu_notifier;
|
|
||||||
void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier,
|
|
||||||
int (*fn)(void *, unsigned long, unsigned int),
|
|
||||||
void *data);
|
|
||||||
void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier);
|
|
||||||
void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier);
|
|
||||||
void blk_mq_cpu_init(void);
|
|
||||||
void blk_mq_enable_hotplug(void);
|
void blk_mq_enable_hotplug(void);
|
||||||
void blk_mq_disable_hotplug(void);
|
void blk_mq_disable_hotplug(void);
|
||||||
|
|
||||||
|
@ -7,12 +7,6 @@
|
|||||||
struct blk_mq_tags;
|
struct blk_mq_tags;
|
||||||
struct blk_flush_queue;
|
struct blk_flush_queue;
|
||||||
|
|
||||||
struct blk_mq_cpu_notifier {
|
|
||||||
struct list_head list;
|
|
||||||
void *data;
|
|
||||||
int (*notify)(void *data, unsigned long action, unsigned int cpu);
|
|
||||||
};
|
|
||||||
|
|
||||||
struct blk_mq_hw_ctx {
|
struct blk_mq_hw_ctx {
|
||||||
struct {
|
struct {
|
||||||
spinlock_t lock;
|
spinlock_t lock;
|
||||||
@ -53,7 +47,7 @@ struct blk_mq_hw_ctx {
|
|||||||
|
|
||||||
struct delayed_work delay_work;
|
struct delayed_work delay_work;
|
||||||
|
|
||||||
struct blk_mq_cpu_notifier cpu_notifier;
|
struct hlist_node cpuhp_dead;
|
||||||
struct kobject kobj;
|
struct kobject kobj;
|
||||||
|
|
||||||
unsigned long poll_considered;
|
unsigned long poll_considered;
|
||||||
|
Loading…
Reference in New Issue
Block a user