3 fixes for md in 3.3-rc

2 relate to the recently added drive replacement.
 
 One causes read error in RAID10 to sometimes be retried indefinitely.
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2.0.18 (GNU/Linux)
 
 iQIVAwUAT1VI1znsnt1WYoG5AQK47Q//d51y5QCpABFNUcgIM626zJXlBWFUSmzU
 wFOGXh5emN6/TWguzkiZwrvcspDmXMzz1zmJtGWixYb2jBpn2MHEN4uNz3Vq68w+
 IYk/dJg/CG4+lzX+6IjiHOb3+TASRx94QZHJASx68vypqniAyikshqcbUeZBMTB0
 Fu+sKqsOGYmwQfe6/vtRPVXY7DYK2dFDBRMFpmOl+o4Y2XxmmWzMw4Dg1RIEdtFS
 Jo9GwLHTnlw2xoc0XooufeT0Q2KOpqi9T8L6Nj0ORwpgsFqgtZ/kIOoGU6qOpSri
 ofLTrobVKMpjFtmiYVOp9TaBlPnd/TNX3E4WPLGNsAwYuRUFjq8evmJKjG+pOdeB
 3ArxRKRJCaI2jnVhH+NpT7i/tpkEg/8a/BoOAihX+hM/8QkmsWluaRBOGMhpuuuc
 1baPVTusi/zijO9cM8RGIXaQj5UG4s3LUpCIOIYdDyxsfmAH5KN1F2EPrU4NMME2
 96THSshIZLkgAg5ICwtva0qoHlBlEclAlVAzEomT7R9KwHojEB1xUiyMmaIdMFoy
 JjGFAMp2E5+KBKZ1eYEHjthPWCb+nZ3eYHUh0DOnEt4kASCXnn45GJREQkpkNIR/
 HhDTS8vI743unKnbCtYFMxiw/9OXZbMkdoZhobg7lxcpoQlWJ+5ziOtACl0h0Kv8
 +ET+Kp3W8K4=
 =93ms
 -----END PGP SIGNATURE-----

Merge tag 'md-3.3-fixes' of git://neil.brown.name/md

Pull md fixes from Neil Brown:
 "Three fixes for md in 3.3-rc: Two relate to the recently added drive
  replacement.  One fixes the problem where a read error in RAID10 would
  sometimes be retried indefinitely."

* tag 'md-3.3-fixes' of git://neil.brown.name/md:
  md/raid10: fix assembling of arrays with replacement devices.
  md/raid10: fix handling of error on last working device in array.
  md/raid1: fix buglet in md_raid1_contested.
This commit is contained in:
Linus Torvalds 2012-03-05 16:01:25 -08:00
commit a2e5f13ce8
2 changed files with 28 additions and 12 deletions

View File

@ -624,7 +624,7 @@ int md_raid1_congested(struct mddev *mddev, int bits)
return 1; return 1;
rcu_read_lock(); rcu_read_lock();
for (i = 0; i < conf->raid_disks; i++) { for (i = 0; i < conf->raid_disks * 2; i++) {
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev); struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev && !test_bit(Faulty, &rdev->flags)) { if (rdev && !test_bit(Faulty, &rdev->flags)) {
struct request_queue *q = bdev_get_queue(rdev->bdev); struct request_queue *q = bdev_get_queue(rdev->bdev);

View File

@ -67,6 +67,7 @@ static int max_queued_requests = 1024;
static void allow_barrier(struct r10conf *conf); static void allow_barrier(struct r10conf *conf);
static void lower_barrier(struct r10conf *conf); static void lower_barrier(struct r10conf *conf);
static int enough(struct r10conf *conf, int ignore);
static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data) static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
{ {
@ -347,6 +348,19 @@ static void raid10_end_read_request(struct bio *bio, int error)
* wait for the 'master' bio. * wait for the 'master' bio.
*/ */
set_bit(R10BIO_Uptodate, &r10_bio->state); set_bit(R10BIO_Uptodate, &r10_bio->state);
} else {
/* If all other devices that store this block have
* failed, we want to return the error upwards rather
* than fail the last device. Here we redefine
* "uptodate" to mean "Don't want to retry"
*/
unsigned long flags;
spin_lock_irqsave(&conf->device_lock, flags);
if (!enough(conf, rdev->raid_disk))
uptodate = 1;
spin_unlock_irqrestore(&conf->device_lock, flags);
}
if (uptodate) {
raid_end_bio_io(r10_bio); raid_end_bio_io(r10_bio);
rdev_dec_pending(rdev, conf->mddev); rdev_dec_pending(rdev, conf->mddev);
} else { } else {
@ -2052,6 +2066,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
"md/raid10:%s: %s: Failing raid device\n", "md/raid10:%s: %s: Failing raid device\n",
mdname(mddev), b); mdname(mddev), b);
md_error(mddev, conf->mirrors[d].rdev); md_error(mddev, conf->mirrors[d].rdev);
r10_bio->devs[r10_bio->read_slot].bio = IO_BLOCKED;
return; return;
} }
@ -2105,8 +2120,11 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
rdev, rdev,
r10_bio->devs[r10_bio->read_slot].addr r10_bio->devs[r10_bio->read_slot].addr
+ sect, + sect,
s, 0)) s, 0)) {
md_error(mddev, rdev); md_error(mddev, rdev);
r10_bio->devs[r10_bio->read_slot].bio
= IO_BLOCKED;
}
break; break;
} }
@ -2299,17 +2317,20 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
* This is all done synchronously while the array is * This is all done synchronously while the array is
* frozen. * frozen.
*/ */
bio = r10_bio->devs[slot].bio;
bdevname(bio->bi_bdev, b);
bio_put(bio);
r10_bio->devs[slot].bio = NULL;
if (mddev->ro == 0) { if (mddev->ro == 0) {
freeze_array(conf); freeze_array(conf);
fix_read_error(conf, mddev, r10_bio); fix_read_error(conf, mddev, r10_bio);
unfreeze_array(conf); unfreeze_array(conf);
} } else
r10_bio->devs[slot].bio = IO_BLOCKED;
rdev_dec_pending(rdev, mddev); rdev_dec_pending(rdev, mddev);
bio = r10_bio->devs[slot].bio;
bdevname(bio->bi_bdev, b);
r10_bio->devs[slot].bio =
mddev->ro ? IO_BLOCKED : NULL;
read_more: read_more:
rdev = read_balance(conf, r10_bio, &max_sectors); rdev = read_balance(conf, r10_bio, &max_sectors);
if (rdev == NULL) { if (rdev == NULL) {
@ -2318,13 +2339,10 @@ read_more:
mdname(mddev), b, mdname(mddev), b,
(unsigned long long)r10_bio->sector); (unsigned long long)r10_bio->sector);
raid_end_bio_io(r10_bio); raid_end_bio_io(r10_bio);
bio_put(bio);
return; return;
} }
do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC); do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC);
if (bio)
bio_put(bio);
slot = r10_bio->read_slot; slot = r10_bio->read_slot;
printk_ratelimited( printk_ratelimited(
KERN_ERR KERN_ERR
@ -2360,7 +2378,6 @@ read_more:
mbio->bi_phys_segments++; mbio->bi_phys_segments++;
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
generic_make_request(bio); generic_make_request(bio);
bio = NULL;
r10_bio = mempool_alloc(conf->r10bio_pool, r10_bio = mempool_alloc(conf->r10bio_pool,
GFP_NOIO); GFP_NOIO);
@ -3243,7 +3260,6 @@ static int run(struct mddev *mddev)
disk->rdev = rdev; disk->rdev = rdev;
} }
disk->rdev = rdev;
disk_stack_limits(mddev->gendisk, rdev->bdev, disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->data_offset << 9); rdev->data_offset << 9);
/* as we don't honour merge_bvec_fn, we must never risk /* as we don't honour merge_bvec_fn, we must never risk