diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 90e70e21e479..7ae758685c7b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1419,6 +1419,16 @@ struct btrfs_block_group_cache { */ atomic_t reservations; + /* + * Incremented while holding the spinlock *lock* by a task checking if + * it can perform a nocow write (incremented if the value for the *ro* + * field is 0). Decremented by such tasks once they create an ordered + * extent or before that if some error happens before reaching that step. + * This is to prevent races between block group relocation and nocow + * writes through direct IO. + */ + atomic_t nocow_writers; + /* Lock for free space tree operations. */ struct mutex free_space_lock; @@ -3513,6 +3523,9 @@ int btrfs_check_space_for_delayed_refs(struct btrfs_trans_handle *trans, void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info, const u64 start); void btrfs_wait_block_group_reservations(struct btrfs_block_group_cache *bg); +bool btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr); +void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr); +void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg); void btrfs_put_block_group(struct btrfs_block_group_cache *cache); int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, struct btrfs_root *root, unsigned long count); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 09aad7b447f5..dcf89bfa990d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3824,6 +3824,59 @@ int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) return readonly; } +bool btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr) +{ + struct btrfs_block_group_cache *bg; + bool ret = true; + + bg = btrfs_lookup_block_group(fs_info, bytenr); + if (!bg) + return false; + + spin_lock(&bg->lock); + if (bg->ro) + ret = false; + else + atomic_inc(&bg->nocow_writers); + spin_unlock(&bg->lock); + + /* no put on block group, done by btrfs_dec_nocow_writers */ + if (!ret) + btrfs_put_block_group(bg); + + return ret; + +} + +void btrfs_dec_nocow_writers(struct btrfs_fs_info *fs_info, u64 bytenr) +{ + struct btrfs_block_group_cache *bg; + + bg = btrfs_lookup_block_group(fs_info, bytenr); + ASSERT(bg); + if (atomic_dec_and_test(&bg->nocow_writers)) + wake_up_atomic_t(&bg->nocow_writers); + /* + * Once for our lookup and once for the lookup done by a previous call + * to btrfs_inc_nocow_writers() + */ + btrfs_put_block_group(bg); + btrfs_put_block_group(bg); +} + +static int btrfs_wait_nocow_writers_atomic_t(atomic_t *a) +{ + schedule(); + return 0; +} + +void btrfs_wait_nocow_writers(struct btrfs_block_group_cache *bg) +{ + wait_on_atomic_t(&bg->nocow_writers, + btrfs_wait_nocow_writers_atomic_t, + TASK_UNINTERRUPTIBLE); +} + static const char *alloc_name(u64 flags) { switch (flags) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 45d0dafbbf40..ee9be4199e7c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1382,6 +1382,9 @@ static noinline int run_delalloc_nocow(struct inode *inode, */ if (csum_exist_in_range(root, disk_bytenr, num_bytes)) goto out_check; + if (!btrfs_inc_nocow_writers(root->fs_info, + disk_bytenr)) + goto out_check; nocow = 1; } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) { extent_end = found_key.offset + @@ -1396,6 +1399,9 @@ static noinline int run_delalloc_nocow(struct inode *inode, path->slots[0]++; if (!nolock && nocow) btrfs_end_write_no_snapshoting(root); + if (nocow) + btrfs_dec_nocow_writers(root->fs_info, + disk_bytenr); goto next_slot; } if (!nocow) { @@ -1416,6 +1422,9 @@ static noinline int run_delalloc_nocow(struct inode *inode, if (ret) { if (!nolock && nocow) btrfs_end_write_no_snapshoting(root); + if (nocow) + btrfs_dec_nocow_writers(root->fs_info, + disk_bytenr); goto error; } cow_start = (u64)-1; @@ -1458,6 +1467,8 @@ static noinline int run_delalloc_nocow(struct inode *inode, ret = btrfs_add_ordered_extent(inode, cur_offset, disk_bytenr, num_bytes, num_bytes, type); + if (nocow) + btrfs_dec_nocow_writers(root->fs_info, disk_bytenr); BUG_ON(ret); /* -ENOMEM */ if (root->root_key.objectid == @@ -7657,7 +7668,8 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, block_start = em->block_start + (start - em->start); if (can_nocow_extent(inode, start, &len, &orig_start, - &orig_block_len, &ram_bytes) == 1) { + &orig_block_len, &ram_bytes) == 1 && + btrfs_inc_nocow_writers(root->fs_info, block_start)) { /* * Create the ordered extent before the extent map. This @@ -7672,6 +7684,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, */ ret = btrfs_add_ordered_extent_dio(inode, start, block_start, len, len, type); + btrfs_dec_nocow_writers(root->fs_info, block_start); if (ret) { free_extent_map(em); goto unlock_err; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index e78f8e44bd9a..054d9a80e77e 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4255,6 +4255,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) rc->block_group->key.objectid, rc->block_group->flags); btrfs_wait_block_group_reservations(rc->block_group); + btrfs_wait_nocow_writers(rc->block_group); btrfs_wait_ordered_roots(fs_info, -1, rc->block_group->key.objectid, rc->block_group->key.offset);