ext4: add support for online resizing with bigalloc

This patch adds support for online resizing on bigalloc file system by
implementing EXT4_IOC_RESIZE_FS ioctl. Old resize interfaces (add
block groups and extend last block group) are left untouched. Tests
performed with cluster sizes of 1, 2, 4 and 8 blocks (of size 4k) per
cluster. I will add these tests to xfstests.

Signed-off-by: Harshad Shirwadkar <harshads@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
This commit is contained in:
harshads 2017-10-29 09:38:46 -04:00 committed by Theodore Ts'o
parent d98bf8cd11
commit d77147ff44
4 changed files with 84 additions and 58 deletions

View File

@ -545,8 +545,8 @@ struct ext4_new_group_data {
__u64 inode_table; __u64 inode_table;
__u32 blocks_count; __u32 blocks_count;
__u16 reserved_blocks; __u16 reserved_blocks;
__u16 unused; __u16 mdata_blocks;
__u32 free_blocks_count; __u32 free_clusters_count;
}; };
/* Indexes used to index group tables in ext4_new_group_data */ /* Indexes used to index group tables in ext4_new_group_data */

View File

@ -871,12 +871,6 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
int err = 0, err2 = 0; int err = 0, err2 = 0;
ext4_group_t o_group = EXT4_SB(sb)->s_groups_count; ext4_group_t o_group = EXT4_SB(sb)->s_groups_count;
if (ext4_has_feature_bigalloc(sb)) {
ext4_msg(sb, KERN_ERR,
"Online resizing not (yet) supported with bigalloc");
return -EOPNOTSUPP;
}
if (copy_from_user(&n_blocks_count, (__u64 __user *)arg, if (copy_from_user(&n_blocks_count, (__u64 __user *)arg,
sizeof(__u64))) { sizeof(__u64))) {
return -EFAULT; return -EFAULT;

View File

@ -4994,8 +4994,11 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
struct ext4_group_desc *desc; struct ext4_group_desc *desc;
struct ext4_sb_info *sbi = EXT4_SB(sb); struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_buddy e4b; struct ext4_buddy e4b;
int err = 0, ret, blk_free_count; int err = 0, ret, free_clusters_count;
ext4_grpblk_t blocks_freed; ext4_grpblk_t clusters_freed;
ext4_fsblk_t first_cluster = EXT4_B2C(sbi, block);
ext4_fsblk_t last_cluster = EXT4_B2C(sbi, block + count - 1);
unsigned long cluster_count = last_cluster - first_cluster + 1;
ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1); ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
@ -5007,8 +5010,8 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
* Check to see if we are freeing blocks across a group * Check to see if we are freeing blocks across a group
* boundary. * boundary.
*/ */
if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) { if (bit + cluster_count > EXT4_CLUSTERS_PER_GROUP(sb)) {
ext4_warning(sb, "too much blocks added to group %u", ext4_warning(sb, "too many blocks added to group %u",
block_group); block_group);
err = -EINVAL; err = -EINVAL;
goto error_return; goto error_return;
@ -5054,14 +5057,14 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
if (err) if (err)
goto error_return; goto error_return;
for (i = 0, blocks_freed = 0; i < count; i++) { for (i = 0, clusters_freed = 0; i < cluster_count; i++) {
BUFFER_TRACE(bitmap_bh, "clear bit"); BUFFER_TRACE(bitmap_bh, "clear bit");
if (!mb_test_bit(bit + i, bitmap_bh->b_data)) { if (!mb_test_bit(bit + i, bitmap_bh->b_data)) {
ext4_error(sb, "bit already cleared for block %llu", ext4_error(sb, "bit already cleared for block %llu",
(ext4_fsblk_t)(block + i)); (ext4_fsblk_t)(block + i));
BUFFER_TRACE(bitmap_bh, "bit already cleared"); BUFFER_TRACE(bitmap_bh, "bit already cleared");
} else { } else {
blocks_freed++; clusters_freed++;
} }
} }
@ -5075,19 +5078,20 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
* them with group lock_held * them with group lock_held
*/ */
ext4_lock_group(sb, block_group); ext4_lock_group(sb, block_group);
mb_clear_bits(bitmap_bh->b_data, bit, count); mb_clear_bits(bitmap_bh->b_data, bit, cluster_count);
mb_free_blocks(NULL, &e4b, bit, count); mb_free_blocks(NULL, &e4b, bit, cluster_count);
blk_free_count = blocks_freed + ext4_free_group_clusters(sb, desc); free_clusters_count = clusters_freed +
ext4_free_group_clusters_set(sb, desc, blk_free_count); ext4_free_group_clusters(sb, desc);
ext4_free_group_clusters_set(sb, desc, free_clusters_count);
ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh); ext4_block_bitmap_csum_set(sb, block_group, desc, bitmap_bh);
ext4_group_desc_csum_set(sb, block_group, desc); ext4_group_desc_csum_set(sb, block_group, desc);
ext4_unlock_group(sb, block_group); ext4_unlock_group(sb, block_group);
percpu_counter_add(&sbi->s_freeclusters_counter, percpu_counter_add(&sbi->s_freeclusters_counter,
EXT4_NUM_B2C(sbi, blocks_freed)); clusters_freed);
if (sbi->s_log_groups_per_flex) { if (sbi->s_log_groups_per_flex) {
ext4_group_t flex_group = ext4_flex_group(sbi, block_group); ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
atomic64_add(EXT4_NUM_B2C(sbi, blocks_freed), atomic64_add(clusters_freed,
&sbi->s_flex_groups[flex_group].free_clusters); &sbi->s_flex_groups[flex_group].free_clusters);
} }

View File

@ -106,7 +106,7 @@ static int verify_group_input(struct super_block *sb,
overhead = ext4_group_overhead_blocks(sb, group); overhead = ext4_group_overhead_blocks(sb, group);
metaend = start + overhead; metaend = start + overhead;
input->free_blocks_count = free_blocks_count = input->free_clusters_count = free_blocks_count =
input->blocks_count - 2 - overhead - sbi->s_itb_per_group; input->blocks_count - 2 - overhead - sbi->s_itb_per_group;
if (test_opt(sb, DEBUG)) if (test_opt(sb, DEBUG))
@ -257,6 +257,7 @@ static int ext4_alloc_group_tables(struct super_block *sb,
ext4_group_t last_group; ext4_group_t last_group;
unsigned overhead; unsigned overhead;
__u16 uninit_mask = (flexbg_size > 1) ? ~EXT4_BG_BLOCK_UNINIT : ~0; __u16 uninit_mask = (flexbg_size > 1) ? ~EXT4_BG_BLOCK_UNINIT : ~0;
int i;
BUG_ON(flex_gd->count == 0 || group_data == NULL); BUG_ON(flex_gd->count == 0 || group_data == NULL);
@ -293,7 +294,7 @@ static int ext4_alloc_group_tables(struct super_block *sb,
group_data[bb_index].block_bitmap = start_blk++; group_data[bb_index].block_bitmap = start_blk++;
group = ext4_get_group_number(sb, start_blk - 1); group = ext4_get_group_number(sb, start_blk - 1);
group -= group_data[0].group; group -= group_data[0].group;
group_data[group].free_blocks_count--; group_data[group].mdata_blocks++;
flex_gd->bg_flags[group] &= uninit_mask; flex_gd->bg_flags[group] &= uninit_mask;
} }
@ -304,7 +305,7 @@ static int ext4_alloc_group_tables(struct super_block *sb,
group_data[ib_index].inode_bitmap = start_blk++; group_data[ib_index].inode_bitmap = start_blk++;
group = ext4_get_group_number(sb, start_blk - 1); group = ext4_get_group_number(sb, start_blk - 1);
group -= group_data[0].group; group -= group_data[0].group;
group_data[group].free_blocks_count--; group_data[group].mdata_blocks++;
flex_gd->bg_flags[group] &= uninit_mask; flex_gd->bg_flags[group] &= uninit_mask;
} }
@ -323,15 +324,22 @@ static int ext4_alloc_group_tables(struct super_block *sb,
if (start_blk + itb > next_group_start) { if (start_blk + itb > next_group_start) {
flex_gd->bg_flags[group + 1] &= uninit_mask; flex_gd->bg_flags[group + 1] &= uninit_mask;
overhead = start_blk + itb - next_group_start; overhead = start_blk + itb - next_group_start;
group_data[group + 1].free_blocks_count -= overhead; group_data[group + 1].mdata_blocks += overhead;
itb -= overhead; itb -= overhead;
} }
group_data[group].free_blocks_count -= itb; group_data[group].mdata_blocks += itb;
flex_gd->bg_flags[group] &= uninit_mask; flex_gd->bg_flags[group] &= uninit_mask;
start_blk += EXT4_SB(sb)->s_itb_per_group; start_blk += EXT4_SB(sb)->s_itb_per_group;
} }
/* Update free clusters count to exclude metadata blocks */
for (i = 0; i < flex_gd->count; i++) {
group_data[i].free_clusters_count -=
EXT4_NUM_B2C(EXT4_SB(sb),
group_data[i].mdata_blocks);
}
if (test_opt(sb, DEBUG)) { if (test_opt(sb, DEBUG)) {
int i; int i;
group = group_data[0].group; group = group_data[0].group;
@ -341,12 +349,13 @@ static int ext4_alloc_group_tables(struct super_block *sb,
flexbg_size); flexbg_size);
for (i = 0; i < flex_gd->count; i++) { for (i = 0; i < flex_gd->count; i++) {
printk(KERN_DEBUG "adding %s group %u: %u " ext4_debug(
"blocks (%d free)\n", "adding %s group %u: %u blocks (%d free, %d mdata blocks)\n",
ext4_bg_has_super(sb, group + i) ? "normal" : ext4_bg_has_super(sb, group + i) ? "normal" :
"no-super", group + i, "no-super", group + i,
group_data[i].blocks_count, group_data[i].blocks_count,
group_data[i].free_blocks_count); group_data[i].free_clusters_count,
group_data[i].mdata_blocks);
} }
} }
return 0; return 0;
@ -398,7 +407,7 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh)
} }
/* /*
* set_flexbg_block_bitmap() mark @count blocks starting from @block used. * set_flexbg_block_bitmap() mark clusters [@first_cluster, @last_cluster] used.
* *
* Helper function for ext4_setup_new_group_blocks() which set . * Helper function for ext4_setup_new_group_blocks() which set .
* *
@ -408,22 +417,26 @@ static int extend_or_restart_transaction(handle_t *handle, int thresh)
*/ */
static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle, static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
struct ext4_new_flex_group_data *flex_gd, struct ext4_new_flex_group_data *flex_gd,
ext4_fsblk_t block, ext4_group_t count) ext4_fsblk_t first_cluster, ext4_fsblk_t last_cluster)
{ {
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_group_t count = last_cluster - first_cluster + 1;
ext4_group_t count2; ext4_group_t count2;
ext4_debug("mark blocks [%llu/%u] used\n", block, count); ext4_debug("mark clusters [%llu-%llu] used\n", first_cluster,
for (count2 = count; count > 0; count -= count2, block += count2) { last_cluster);
for (count2 = count; count > 0;
count -= count2, first_cluster += count2) {
ext4_fsblk_t start; ext4_fsblk_t start;
struct buffer_head *bh; struct buffer_head *bh;
ext4_group_t group; ext4_group_t group;
int err; int err;
group = ext4_get_group_number(sb, block); group = ext4_get_group_number(sb, EXT4_C2B(sbi, first_cluster));
start = ext4_group_first_block_no(sb, group); start = EXT4_B2C(sbi, ext4_group_first_block_no(sb, group));
group -= flex_gd->groups[0].group; group -= flex_gd->groups[0].group;
count2 = EXT4_BLOCKS_PER_GROUP(sb) - (block - start); count2 = EXT4_CLUSTERS_PER_GROUP(sb) - (first_cluster - start);
if (count2 > count) if (count2 > count)
count2 = count; count2 = count;
@ -444,9 +457,9 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
err = ext4_journal_get_write_access(handle, bh); err = ext4_journal_get_write_access(handle, bh);
if (err) if (err)
return err; return err;
ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n", block, ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n",
block - start, count2); first_cluster, first_cluster - start, count2);
ext4_set_bits(bh->b_data, block - start, count2); ext4_set_bits(bh->b_data, first_cluster - start, count2);
err = ext4_handle_dirty_metadata(handle, NULL, bh); err = ext4_handle_dirty_metadata(handle, NULL, bh);
if (unlikely(err)) if (unlikely(err))
@ -595,9 +608,10 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
if (overhead != 0) { if (overhead != 0) {
ext4_debug("mark backup superblock %#04llx (+0)\n", ext4_debug("mark backup superblock %#04llx (+0)\n",
start); start);
ext4_set_bits(bh->b_data, 0, overhead); ext4_set_bits(bh->b_data, 0,
EXT4_NUM_B2C(sbi, overhead));
} }
ext4_mark_bitmap_end(group_data[i].blocks_count, ext4_mark_bitmap_end(EXT4_B2C(sbi, group_data[i].blocks_count),
sb->s_blocksize * 8, bh->b_data); sb->s_blocksize * 8, bh->b_data);
err = ext4_handle_dirty_metadata(handle, NULL, bh); err = ext4_handle_dirty_metadata(handle, NULL, bh);
if (err) if (err)
@ -642,7 +656,11 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
continue; continue;
} }
err = set_flexbg_block_bitmap(sb, handle, err = set_flexbg_block_bitmap(sb, handle,
flex_gd, start, count); flex_gd,
EXT4_B2C(sbi, start),
EXT4_B2C(sbi,
start + count
- 1));
if (err) if (err)
goto out; goto out;
count = group_table_count[j]; count = group_table_count[j];
@ -652,7 +670,11 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
if (count) { if (count) {
err = set_flexbg_block_bitmap(sb, handle, err = set_flexbg_block_bitmap(sb, handle,
flex_gd, start, count); flex_gd,
EXT4_B2C(sbi, start),
EXT4_B2C(sbi,
start + count
- 1));
if (err) if (err)
goto out; goto out;
} }
@ -840,7 +862,8 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
ext4_std_error(sb, err); ext4_std_error(sb, err);
goto exit_inode; goto exit_inode;
} }
inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9; inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >>
(9 - EXT4_SB(sb)->s_cluster_bits);
ext4_mark_iloc_dirty(handle, inode, &iloc); ext4_mark_iloc_dirty(handle, inode, &iloc);
memset(gdb_bh->b_data, 0, sb->s_blocksize); memset(gdb_bh->b_data, 0, sb->s_blocksize);
err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh); err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh);
@ -935,6 +958,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
{ {
struct super_block *sb = inode->i_sb; struct super_block *sb = inode->i_sb;
int reserved_gdb =le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks); int reserved_gdb =le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks);
int cluster_bits = EXT4_SB(sb)->s_cluster_bits;
struct buffer_head **primary; struct buffer_head **primary;
struct buffer_head *dind; struct buffer_head *dind;
struct ext4_iloc iloc; struct ext4_iloc iloc;
@ -1010,7 +1034,8 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
if (!err) if (!err)
err = err2; err = err2;
} }
inode->i_blocks += reserved_gdb * sb->s_blocksize >> 9;
inode->i_blocks += reserved_gdb * sb->s_blocksize >> (9 - cluster_bits);
ext4_mark_iloc_dirty(handle, inode, &iloc); ext4_mark_iloc_dirty(handle, inode, &iloc);
exit_bh: exit_bh:
@ -1244,7 +1269,7 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb,
ext4_group_t group; ext4_group_t group;
__u16 *bg_flags = flex_gd->bg_flags; __u16 *bg_flags = flex_gd->bg_flags;
int i, gdb_off, gdb_num, err = 0; int i, gdb_off, gdb_num, err = 0;
for (i = 0; i < flex_gd->count; i++, group_data++, bg_flags++) { for (i = 0; i < flex_gd->count; i++, group_data++, bg_flags++) {
group = group_data->group; group = group_data->group;
@ -1271,7 +1296,7 @@ static int ext4_setup_new_descs(handle_t *handle, struct super_block *sb,
ext4_inode_table_set(sb, gdp, group_data->inode_table); ext4_inode_table_set(sb, gdp, group_data->inode_table);
ext4_free_group_clusters_set(sb, gdp, ext4_free_group_clusters_set(sb, gdp,
EXT4_NUM_B2C(sbi, group_data->free_blocks_count)); group_data->free_clusters_count);
ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb)); ext4_free_inodes_set(sb, gdp, EXT4_INODES_PER_GROUP(sb));
if (ext4_has_group_desc_csum(sb)) if (ext4_has_group_desc_csum(sb))
ext4_itable_unused_set(sb, gdp, ext4_itable_unused_set(sb, gdp,
@ -1327,7 +1352,7 @@ static void ext4_update_super(struct super_block *sb,
*/ */
for (i = 0; i < flex_gd->count; i++) { for (i = 0; i < flex_gd->count; i++) {
blocks_count += group_data[i].blocks_count; blocks_count += group_data[i].blocks_count;
free_blocks += group_data[i].free_blocks_count; free_blocks += EXT4_C2B(sbi, group_data[i].free_clusters_count);
} }
reserved_blocks = ext4_r_blocks_count(es) * 100; reserved_blocks = ext4_r_blocks_count(es) * 100;
@ -1499,17 +1524,18 @@ static int ext4_setup_next_flex_gd(struct super_block *sb,
ext4_fsblk_t n_blocks_count, ext4_fsblk_t n_blocks_count,
unsigned long flexbg_size) unsigned long flexbg_size)
{ {
struct ext4_super_block *es = EXT4_SB(sb)->s_es; struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
struct ext4_new_group_data *group_data = flex_gd->groups; struct ext4_new_group_data *group_data = flex_gd->groups;
ext4_fsblk_t o_blocks_count; ext4_fsblk_t o_blocks_count;
ext4_group_t n_group; ext4_group_t n_group;
ext4_group_t group; ext4_group_t group;
ext4_group_t last_group; ext4_group_t last_group;
ext4_grpblk_t last; ext4_grpblk_t last;
ext4_grpblk_t blocks_per_group; ext4_grpblk_t clusters_per_group;
unsigned long i; unsigned long i;
blocks_per_group = EXT4_BLOCKS_PER_GROUP(sb); clusters_per_group = EXT4_CLUSTERS_PER_GROUP(sb);
o_blocks_count = ext4_blocks_count(es); o_blocks_count = ext4_blocks_count(es);
@ -1530,9 +1556,10 @@ static int ext4_setup_next_flex_gd(struct super_block *sb,
int overhead; int overhead;
group_data[i].group = group + i; group_data[i].group = group + i;
group_data[i].blocks_count = blocks_per_group; group_data[i].blocks_count = EXT4_BLOCKS_PER_GROUP(sb);
overhead = ext4_group_overhead_blocks(sb, group + i); overhead = ext4_group_overhead_blocks(sb, group + i);
group_data[i].free_blocks_count = blocks_per_group - overhead; group_data[i].mdata_blocks = overhead;
group_data[i].free_clusters_count = EXT4_CLUSTERS_PER_GROUP(sb);
if (ext4_has_group_desc_csum(sb)) { if (ext4_has_group_desc_csum(sb)) {
flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT | flex_gd->bg_flags[i] = EXT4_BG_BLOCK_UNINIT |
EXT4_BG_INODE_UNINIT; EXT4_BG_INODE_UNINIT;
@ -1546,10 +1573,10 @@ static int ext4_setup_next_flex_gd(struct super_block *sb,
/* We need to initialize block bitmap of last group. */ /* We need to initialize block bitmap of last group. */
flex_gd->bg_flags[i - 1] &= ~EXT4_BG_BLOCK_UNINIT; flex_gd->bg_flags[i - 1] &= ~EXT4_BG_BLOCK_UNINIT;
if ((last_group == n_group) && (last != blocks_per_group - 1)) { if ((last_group == n_group) && (last != clusters_per_group - 1)) {
group_data[i - 1].blocks_count = last + 1; group_data[i - 1].blocks_count = EXT4_C2B(sbi, last + 1);
group_data[i - 1].free_blocks_count -= blocks_per_group- group_data[i - 1].free_clusters_count -= clusters_per_group -
last - 1; last - 1;
} }
return 1; return 1;
@ -1796,7 +1823,8 @@ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode)
} }
/* Do a quick sanity check of the resize inode */ /* Do a quick sanity check of the resize inode */
if (inode->i_blocks != 1 << (inode->i_blkbits - 9)) if (inode->i_blocks != 1 << (inode->i_blkbits -
(9 - sbi->s_cluster_bits)))
goto invalid_resize_inode; goto invalid_resize_inode;
for (i = 0; i < EXT4_N_BLOCKS; i++) { for (i = 0; i < EXT4_N_BLOCKS; i++) {
if (i == EXT4_DIND_BLOCK) { if (i == EXT4_DIND_BLOCK) {
@ -1959,7 +1987,7 @@ int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count)
if (n_group == o_group) if (n_group == o_group)
add = n_blocks_count - o_blocks_count; add = n_blocks_count - o_blocks_count;
else else
add = EXT4_BLOCKS_PER_GROUP(sb) - (offset + 1); add = EXT4_C2B(sbi, EXT4_CLUSTERS_PER_GROUP(sb) - (offset + 1));
if (add > 0) { if (add > 0) {
err = ext4_group_extend_no_check(sb, o_blocks_count, add); err = ext4_group_extend_no_check(sb, o_blocks_count, add);
if (err) if (err)