mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-07 13:43:51 +00:00
block: add partition resize function to blkpg ioctl
Add a new operation code (BLKPG_RESIZE_PARTITION) to the BLKPG ioctl that allows altering the size of an existing partition, even if it is currently in use. This patch converts hd_struct->nr_sects into sequence counter because One might extend a partition while IO is happening to it and update of nr_sects can be non-atomic on 32bit machines with 64bit sector_t. This can lead to issues like reading inconsistent size of a partition. Sequence counter have been used so that readers don't have to take bdev mutex lock as we call sector_in_part() very frequently. Now all the access to hd_struct->nr_sects should happen using sequence counter read/update helper functions part_nr_sects_read/part_nr_sects_write. There is one exception though, set_capacity()/get_capacity(). I think theoritically race should exist there too but this patch does not modify set_capacity()/get_capacity() due to sheer number of call sites and I am afraid that change might break something. I have left that as a TODO item. We can handle it later if need be. This patch does not introduce any new races as such w.r.t set_capacity()/get_capacity(). v2: Add CONFIG_LBDAF test to UP preempt case as suggested by Phillip. Signed-off-by: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Phillip Susi <psusi@ubuntu.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
4638a83e86
commit
c83f6bf98d
@ -154,7 +154,7 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter)
|
|||||||
part = rcu_dereference(ptbl->part[piter->idx]);
|
part = rcu_dereference(ptbl->part[piter->idx]);
|
||||||
if (!part)
|
if (!part)
|
||||||
continue;
|
continue;
|
||||||
if (!part->nr_sects &&
|
if (!part_nr_sects_read(part) &&
|
||||||
!(piter->flags & DISK_PITER_INCL_EMPTY) &&
|
!(piter->flags & DISK_PITER_INCL_EMPTY) &&
|
||||||
!(piter->flags & DISK_PITER_INCL_EMPTY_PART0 &&
|
!(piter->flags & DISK_PITER_INCL_EMPTY_PART0 &&
|
||||||
piter->idx == 0))
|
piter->idx == 0))
|
||||||
@ -191,7 +191,7 @@ EXPORT_SYMBOL_GPL(disk_part_iter_exit);
|
|||||||
static inline int sector_in_part(struct hd_struct *part, sector_t sector)
|
static inline int sector_in_part(struct hd_struct *part, sector_t sector)
|
||||||
{
|
{
|
||||||
return part->start_sect <= sector &&
|
return part->start_sect <= sector &&
|
||||||
sector < part->start_sect + part->nr_sects;
|
sector < part->start_sect + part_nr_sects_read(part);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -769,8 +769,8 @@ void __init printk_all_partitions(void)
|
|||||||
|
|
||||||
printk("%s%s %10llu %s %s", is_part0 ? "" : " ",
|
printk("%s%s %10llu %s %s", is_part0 ? "" : " ",
|
||||||
bdevt_str(part_devt(part), devt_buf),
|
bdevt_str(part_devt(part), devt_buf),
|
||||||
(unsigned long long)part->nr_sects >> 1,
|
(unsigned long long)part_nr_sects_read(part) >> 1
|
||||||
disk_name(disk, part->partno, name_buf),
|
, disk_name(disk, part->partno, name_buf),
|
||||||
uuid_buf);
|
uuid_buf);
|
||||||
if (is_part0) {
|
if (is_part0) {
|
||||||
if (disk->driverfs_dev != NULL &&
|
if (disk->driverfs_dev != NULL &&
|
||||||
@ -862,7 +862,7 @@ static int show_partition(struct seq_file *seqf, void *v)
|
|||||||
while ((part = disk_part_iter_next(&piter)))
|
while ((part = disk_part_iter_next(&piter)))
|
||||||
seq_printf(seqf, "%4d %7d %10llu %s\n",
|
seq_printf(seqf, "%4d %7d %10llu %s\n",
|
||||||
MAJOR(part_devt(part)), MINOR(part_devt(part)),
|
MAJOR(part_devt(part)), MINOR(part_devt(part)),
|
||||||
(unsigned long long)part->nr_sects >> 1,
|
(unsigned long long)part_nr_sects_read(part) >> 1,
|
||||||
disk_name(sgp, part->partno, buf));
|
disk_name(sgp, part->partno, buf));
|
||||||
disk_part_iter_exit(&piter);
|
disk_part_iter_exit(&piter);
|
||||||
|
|
||||||
@ -1268,6 +1268,16 @@ struct gendisk *alloc_disk_node(int minors, int node_id)
|
|||||||
}
|
}
|
||||||
disk->part_tbl->part[0] = &disk->part0;
|
disk->part_tbl->part[0] = &disk->part0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* set_capacity() and get_capacity() currently don't use
|
||||||
|
* seqcounter to read/update the part0->nr_sects. Still init
|
||||||
|
* the counter as we can read the sectors in IO submission
|
||||||
|
* patch using seqence counters.
|
||||||
|
*
|
||||||
|
* TODO: Ideally set_capacity() and get_capacity() should be
|
||||||
|
* converted to make use of bd_mutex and sequence counters.
|
||||||
|
*/
|
||||||
|
seqcount_init(&disk->part0.nr_sects_seq);
|
||||||
hd_ref_init(&disk->part0);
|
hd_ref_init(&disk->part0);
|
||||||
|
|
||||||
disk->minors = minors;
|
disk->minors = minors;
|
||||||
|
@ -13,7 +13,7 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
|
|||||||
{
|
{
|
||||||
struct block_device *bdevp;
|
struct block_device *bdevp;
|
||||||
struct gendisk *disk;
|
struct gendisk *disk;
|
||||||
struct hd_struct *part;
|
struct hd_struct *part, *lpart;
|
||||||
struct blkpg_ioctl_arg a;
|
struct blkpg_ioctl_arg a;
|
||||||
struct blkpg_partition p;
|
struct blkpg_partition p;
|
||||||
struct disk_part_iter piter;
|
struct disk_part_iter piter;
|
||||||
@ -36,8 +36,8 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
|
|||||||
case BLKPG_ADD_PARTITION:
|
case BLKPG_ADD_PARTITION:
|
||||||
start = p.start >> 9;
|
start = p.start >> 9;
|
||||||
length = p.length >> 9;
|
length = p.length >> 9;
|
||||||
/* check for fit in a hd_struct */
|
/* check for fit in a hd_struct */
|
||||||
if (sizeof(sector_t) == sizeof(long) &&
|
if (sizeof(sector_t) == sizeof(long) &&
|
||||||
sizeof(long long) > sizeof(long)) {
|
sizeof(long long) > sizeof(long)) {
|
||||||
long pstart = start, plength = length;
|
long pstart = start, plength = length;
|
||||||
if (pstart != start || plength != length
|
if (pstart != start || plength != length
|
||||||
@ -91,6 +91,59 @@ static int blkpg_ioctl(struct block_device *bdev, struct blkpg_ioctl_arg __user
|
|||||||
mutex_unlock(&bdevp->bd_mutex);
|
mutex_unlock(&bdevp->bd_mutex);
|
||||||
bdput(bdevp);
|
bdput(bdevp);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
case BLKPG_RESIZE_PARTITION:
|
||||||
|
start = p.start >> 9;
|
||||||
|
/* new length of partition in bytes */
|
||||||
|
length = p.length >> 9;
|
||||||
|
/* check for fit in a hd_struct */
|
||||||
|
if (sizeof(sector_t) == sizeof(long) &&
|
||||||
|
sizeof(long long) > sizeof(long)) {
|
||||||
|
long pstart = start, plength = length;
|
||||||
|
if (pstart != start || plength != length
|
||||||
|
|| pstart < 0 || plength < 0)
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
part = disk_get_part(disk, partno);
|
||||||
|
if (!part)
|
||||||
|
return -ENXIO;
|
||||||
|
bdevp = bdget(part_devt(part));
|
||||||
|
if (!bdevp) {
|
||||||
|
disk_put_part(part);
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
mutex_lock(&bdevp->bd_mutex);
|
||||||
|
mutex_lock_nested(&bdev->bd_mutex, 1);
|
||||||
|
if (start != part->start_sect) {
|
||||||
|
mutex_unlock(&bdevp->bd_mutex);
|
||||||
|
mutex_unlock(&bdev->bd_mutex);
|
||||||
|
bdput(bdevp);
|
||||||
|
disk_put_part(part);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
/* overlap? */
|
||||||
|
disk_part_iter_init(&piter, disk,
|
||||||
|
DISK_PITER_INCL_EMPTY);
|
||||||
|
while ((lpart = disk_part_iter_next(&piter))) {
|
||||||
|
if (lpart->partno != partno &&
|
||||||
|
!(start + length <= lpart->start_sect ||
|
||||||
|
start >= lpart->start_sect + lpart->nr_sects)
|
||||||
|
) {
|
||||||
|
disk_part_iter_exit(&piter);
|
||||||
|
mutex_unlock(&bdevp->bd_mutex);
|
||||||
|
mutex_unlock(&bdev->bd_mutex);
|
||||||
|
bdput(bdevp);
|
||||||
|
disk_put_part(part);
|
||||||
|
return -EBUSY;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
disk_part_iter_exit(&piter);
|
||||||
|
part_nr_sects_write(part, (sector_t)length);
|
||||||
|
i_size_write(bdevp->bd_inode, p.length);
|
||||||
|
mutex_unlock(&bdevp->bd_mutex);
|
||||||
|
mutex_unlock(&bdev->bd_mutex);
|
||||||
|
bdput(bdevp);
|
||||||
|
disk_put_part(part);
|
||||||
return 0;
|
return 0;
|
||||||
default:
|
default:
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
@ -84,7 +84,7 @@ ssize_t part_size_show(struct device *dev,
|
|||||||
struct device_attribute *attr, char *buf)
|
struct device_attribute *attr, char *buf)
|
||||||
{
|
{
|
||||||
struct hd_struct *p = dev_to_part(dev);
|
struct hd_struct *p = dev_to_part(dev);
|
||||||
return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects);
|
return sprintf(buf, "%llu\n",(unsigned long long)part_nr_sects_read(p));
|
||||||
}
|
}
|
||||||
|
|
||||||
static ssize_t part_ro_show(struct device *dev,
|
static ssize_t part_ro_show(struct device *dev,
|
||||||
@ -294,6 +294,8 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno,
|
|||||||
err = -ENOMEM;
|
err = -ENOMEM;
|
||||||
goto out_free;
|
goto out_free;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
seqcount_init(&p->nr_sects_seq);
|
||||||
pdev = part_to_dev(p);
|
pdev = part_to_dev(p);
|
||||||
|
|
||||||
p->start_sect = start;
|
p->start_sect = start;
|
||||||
|
@ -40,6 +40,7 @@ struct blkpg_ioctl_arg {
|
|||||||
/* The subfunctions (for the op field) */
|
/* The subfunctions (for the op field) */
|
||||||
#define BLKPG_ADD_PARTITION 1
|
#define BLKPG_ADD_PARTITION 1
|
||||||
#define BLKPG_DEL_PARTITION 2
|
#define BLKPG_DEL_PARTITION 2
|
||||||
|
#define BLKPG_RESIZE_PARTITION 3
|
||||||
|
|
||||||
/* Sizes of name fields. Unused at present. */
|
/* Sizes of name fields. Unused at present. */
|
||||||
#define BLKPG_DEVNAMELTH 64
|
#define BLKPG_DEVNAMELTH 64
|
||||||
|
@ -98,7 +98,13 @@ struct partition_meta_info {
|
|||||||
|
|
||||||
struct hd_struct {
|
struct hd_struct {
|
||||||
sector_t start_sect;
|
sector_t start_sect;
|
||||||
|
/*
|
||||||
|
* nr_sects is protected by sequence counter. One might extend a
|
||||||
|
* partition while IO is happening to it and update of nr_sects
|
||||||
|
* can be non-atomic on 32bit machines with 64bit sector_t.
|
||||||
|
*/
|
||||||
sector_t nr_sects;
|
sector_t nr_sects;
|
||||||
|
seqcount_t nr_sects_seq;
|
||||||
sector_t alignment_offset;
|
sector_t alignment_offset;
|
||||||
unsigned int discard_alignment;
|
unsigned int discard_alignment;
|
||||||
struct device __dev;
|
struct device __dev;
|
||||||
@ -648,6 +654,57 @@ static inline void hd_struct_put(struct hd_struct *part)
|
|||||||
__delete_partition(part);
|
__delete_partition(part);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Any access of part->nr_sects which is not protected by partition
|
||||||
|
* bd_mutex or gendisk bdev bd_mutex, should be done using this
|
||||||
|
* accessor function.
|
||||||
|
*
|
||||||
|
* Code written along the lines of i_size_read() and i_size_write().
|
||||||
|
* CONFIG_PREEMPT case optimizes the case of UP kernel with preemption
|
||||||
|
* on.
|
||||||
|
*/
|
||||||
|
static inline sector_t part_nr_sects_read(struct hd_struct *part)
|
||||||
|
{
|
||||||
|
#if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP)
|
||||||
|
sector_t nr_sects;
|
||||||
|
unsigned seq;
|
||||||
|
do {
|
||||||
|
seq = read_seqcount_begin(&part->nr_sects_seq);
|
||||||
|
nr_sects = part->nr_sects;
|
||||||
|
} while (read_seqcount_retry(&part->nr_sects_seq, seq));
|
||||||
|
return nr_sects;
|
||||||
|
#elif BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_PREEMPT)
|
||||||
|
sector_t nr_sects;
|
||||||
|
|
||||||
|
preempt_disable();
|
||||||
|
nr_sects = part->nr_sects;
|
||||||
|
preempt_enable();
|
||||||
|
return nr_sects;
|
||||||
|
#else
|
||||||
|
return part->nr_sects;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Should be called with mutex lock held (typically bd_mutex) of partition
|
||||||
|
* to provide mutual exlusion among writers otherwise seqcount might be
|
||||||
|
* left in wrong state leaving the readers spinning infinitely.
|
||||||
|
*/
|
||||||
|
static inline void part_nr_sects_write(struct hd_struct *part, sector_t size)
|
||||||
|
{
|
||||||
|
#if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP)
|
||||||
|
write_seqcount_begin(&part->nr_sects_seq);
|
||||||
|
part->nr_sects = size;
|
||||||
|
write_seqcount_end(&part->nr_sects_seq);
|
||||||
|
#elif BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_PREEMPT)
|
||||||
|
preempt_disable();
|
||||||
|
part->nr_sects = size;
|
||||||
|
preempt_enable();
|
||||||
|
#else
|
||||||
|
part->nr_sects = size;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
#else /* CONFIG_BLOCK */
|
#else /* CONFIG_BLOCK */
|
||||||
|
|
||||||
static inline void printk_all_partitions(void) { }
|
static inline void printk_all_partitions(void) { }
|
||||||
|
Loading…
Reference in New Issue
Block a user