raid5: make_request does less prepare wait

In NUMA machine, prepare_to_wait/finish_wait in make_request exposes a
lot of contention for sequential workload (or big request size
workload). For such workload, each bio includes several stripes. So we
can just do prepare_to_wait/finish_wait once for the whold bio instead
of every stripe.  This reduces the lock contention completely for such
workload. Random workload might have the similar lock contention too,
but I didn't see it yet, maybe because my stroage is still not fast
enough.

Signed-off-by: Shaohua Li <shli@fusionio.com>
Signed-off-by: NeilBrown <neilb@suse.de>
This commit is contained in:
Shaohua Li 2014-04-09 11:25:47 +08:00 committed by NeilBrown
parent e2f23b606b
commit 27c0f68f07

View File

@ -4552,6 +4552,8 @@ static void make_request(struct mddev *mddev, struct bio * bi)
struct stripe_head *sh; struct stripe_head *sh;
const int rw = bio_data_dir(bi); const int rw = bio_data_dir(bi);
int remaining; int remaining;
DEFINE_WAIT(w);
bool do_prepare;
if (unlikely(bi->bi_rw & REQ_FLUSH)) { if (unlikely(bi->bi_rw & REQ_FLUSH)) {
md_flush_request(mddev, bi); md_flush_request(mddev, bi);
@ -4575,15 +4577,18 @@ static void make_request(struct mddev *mddev, struct bio * bi)
bi->bi_next = NULL; bi->bi_next = NULL;
bi->bi_phys_segments = 1; /* over-loaded to count active stripes */ bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE);
for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) { for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
DEFINE_WAIT(w);
int previous; int previous;
int seq; int seq;
do_prepare = false;
retry: retry:
seq = read_seqcount_begin(&conf->gen_lock); seq = read_seqcount_begin(&conf->gen_lock);
previous = 0; previous = 0;
prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE); if (do_prepare)
prepare_to_wait(&conf->wait_for_overlap, &w,
TASK_UNINTERRUPTIBLE);
if (unlikely(conf->reshape_progress != MaxSector)) { if (unlikely(conf->reshape_progress != MaxSector)) {
/* spinlock is needed as reshape_progress may be /* spinlock is needed as reshape_progress may be
* 64bit on a 32bit platform, and so it might be * 64bit on a 32bit platform, and so it might be
@ -4604,6 +4609,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
: logical_sector >= conf->reshape_safe) { : logical_sector >= conf->reshape_safe) {
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
schedule(); schedule();
do_prepare = true;
goto retry; goto retry;
} }
} }
@ -4640,6 +4646,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
if (must_retry) { if (must_retry) {
release_stripe(sh); release_stripe(sh);
schedule(); schedule();
do_prepare = true;
goto retry; goto retry;
} }
} }
@ -4663,8 +4670,10 @@ static void make_request(struct mddev *mddev, struct bio * bi)
prepare_to_wait(&conf->wait_for_overlap, prepare_to_wait(&conf->wait_for_overlap,
&w, TASK_INTERRUPTIBLE); &w, TASK_INTERRUPTIBLE);
if (logical_sector >= mddev->suspend_lo && if (logical_sector >= mddev->suspend_lo &&
logical_sector < mddev->suspend_hi) logical_sector < mddev->suspend_hi) {
schedule(); schedule();
do_prepare = true;
}
goto retry; goto retry;
} }
@ -4677,9 +4686,9 @@ static void make_request(struct mddev *mddev, struct bio * bi)
md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->thread);
release_stripe(sh); release_stripe(sh);
schedule(); schedule();
do_prepare = true;
goto retry; goto retry;
} }
finish_wait(&conf->wait_for_overlap, &w);
set_bit(STRIPE_HANDLE, &sh->state); set_bit(STRIPE_HANDLE, &sh->state);
clear_bit(STRIPE_DELAYED, &sh->state); clear_bit(STRIPE_DELAYED, &sh->state);
if ((bi->bi_rw & REQ_SYNC) && if ((bi->bi_rw & REQ_SYNC) &&
@ -4689,10 +4698,10 @@ static void make_request(struct mddev *mddev, struct bio * bi)
} else { } else {
/* cannot get stripe for read-ahead, just give-up */ /* cannot get stripe for read-ahead, just give-up */
clear_bit(BIO_UPTODATE, &bi->bi_flags); clear_bit(BIO_UPTODATE, &bi->bi_flags);
finish_wait(&conf->wait_for_overlap, &w);
break; break;
} }
} }
finish_wait(&conf->wait_for_overlap, &w);
remaining = raid5_dec_bi_active_stripes(bi); remaining = raid5_dec_bi_active_stripes(bi);
if (remaining == 0) { if (remaining == 0) {