mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-16 10:17:32 +00:00
btrfs: stop locking the source extent range during reflink
Nowadays before starting a reflink operation we do this: 1) Take the VFS lock of the inodes in exclusive mode (a rw semaphore); 2) Take the mmap lock of the inodes (struct btrfs_inode::i_mmap_lock); 3) Flush all delalloc in the source and target ranges; 4) Wait for all ordered extents in the source and target ranges to complete; 5) Lock the source and destination ranges in the inodes' io trees. In step 5 we lock the source range because: 1) We needed to serialize against mmap writes, but that is not needed anymore because nowadays we do that through the inode's i_mmap_lock (step 2). This happens since commit 8c99516a8cdd ("btrfs: exclude mmaps while doing remap"); 2) To serialize against a concurrent relocation and avoid generating a delayed ref for an extent that was just dropped by relocation, see commit d8b552424210 ("Btrfs: fix race between reflink/dedupe and relocation"). Locking the source range however blocks any concurrent reads for that range and makes test case generic/733 fail. So instead of locking the source range during reflinks, make relocation read lock the inode's i_mmap_lock, so that it serializes with a concurrent reflink while still able to run concurrently with mmap writes and allow concurrent reads too. Reviewed-by: Boris Burkov <boris@bur.io> Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
parent
4a43d735a6
commit
5d6f0e9890
@ -616,35 +616,6 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1,
|
||||
struct inode *inode2, u64 loff2, u64 len)
|
||||
{
|
||||
unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1, NULL);
|
||||
unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1, NULL);
|
||||
}
|
||||
|
||||
static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1,
|
||||
struct inode *inode2, u64 loff2, u64 len)
|
||||
{
|
||||
u64 range1_end = loff1 + len - 1;
|
||||
u64 range2_end = loff2 + len - 1;
|
||||
|
||||
if (inode1 < inode2) {
|
||||
swap(inode1, inode2);
|
||||
swap(loff1, loff2);
|
||||
swap(range1_end, range2_end);
|
||||
} else if (inode1 == inode2 && loff2 < loff1) {
|
||||
swap(loff1, loff2);
|
||||
swap(range1_end, range2_end);
|
||||
}
|
||||
|
||||
lock_extent(&BTRFS_I(inode1)->io_tree, loff1, range1_end, NULL);
|
||||
lock_extent(&BTRFS_I(inode2)->io_tree, loff2, range2_end, NULL);
|
||||
|
||||
btrfs_assert_inode_range_clean(BTRFS_I(inode1), loff1, range1_end);
|
||||
btrfs_assert_inode_range_clean(BTRFS_I(inode2), loff2, range2_end);
|
||||
}
|
||||
|
||||
static void btrfs_double_mmap_lock(struct inode *inode1, struct inode *inode2)
|
||||
{
|
||||
if (inode1 < inode2)
|
||||
@ -662,17 +633,21 @@ static void btrfs_double_mmap_unlock(struct inode *inode1, struct inode *inode2)
|
||||
static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 len,
|
||||
struct inode *dst, u64 dst_loff)
|
||||
{
|
||||
const u64 end = dst_loff + len - 1;
|
||||
struct extent_state *cached_state = NULL;
|
||||
struct btrfs_fs_info *fs_info = BTRFS_I(src)->root->fs_info;
|
||||
const u64 bs = fs_info->sectorsize;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Lock destination range to serialize with concurrent readahead() and
|
||||
* source range to serialize with relocation.
|
||||
* Lock destination range to serialize with concurrent readahead(), and
|
||||
* we are safe from concurrency with relocation of source extents
|
||||
* because we have already locked the inode's i_mmap_lock in exclusive
|
||||
* mode.
|
||||
*/
|
||||
btrfs_double_extent_lock(src, loff, dst, dst_loff, len);
|
||||
lock_extent(&BTRFS_I(dst)->io_tree, dst_loff, end, &cached_state);
|
||||
ret = btrfs_clone(src, dst, loff, len, ALIGN(len, bs), dst_loff, 1);
|
||||
btrfs_double_extent_unlock(src, loff, dst, dst_loff, len);
|
||||
unlock_extent(&BTRFS_I(dst)->io_tree, dst_loff, end, &cached_state);
|
||||
|
||||
btrfs_btree_balance_dirty(fs_info);
|
||||
|
||||
@ -724,6 +699,7 @@ out:
|
||||
static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
|
||||
u64 off, u64 olen, u64 destoff)
|
||||
{
|
||||
struct extent_state *cached_state = NULL;
|
||||
struct inode *inode = file_inode(file);
|
||||
struct inode *src = file_inode(file_src);
|
||||
struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
|
||||
@ -731,6 +707,7 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
|
||||
int wb_ret;
|
||||
u64 len = olen;
|
||||
u64 bs = fs_info->sectorsize;
|
||||
u64 end;
|
||||
|
||||
/*
|
||||
* VFS's generic_remap_file_range_prep() protects us from cloning the
|
||||
@ -763,12 +740,15 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
|
||||
}
|
||||
|
||||
/*
|
||||
* Lock destination range to serialize with concurrent readahead() and
|
||||
* source range to serialize with relocation.
|
||||
* Lock destination range to serialize with concurrent readahead(), and
|
||||
* we are safe from concurrency with relocation of source extents
|
||||
* because we have already locked the inode's i_mmap_lock in exclusive
|
||||
* mode.
|
||||
*/
|
||||
btrfs_double_extent_lock(src, off, inode, destoff, len);
|
||||
end = destoff + len - 1;
|
||||
lock_extent(&BTRFS_I(inode)->io_tree, destoff, end, &cached_state);
|
||||
ret = btrfs_clone(src, inode, off, olen, len, destoff, 0);
|
||||
btrfs_double_extent_unlock(src, off, inode, destoff, len);
|
||||
unlock_extent(&BTRFS_I(inode)->io_tree, destoff, end, &cached_state);
|
||||
|
||||
/*
|
||||
* We may have copied an inline extent into a page of the destination
|
||||
|
@ -1127,16 +1127,22 @@ int replace_file_extents(struct btrfs_trans_handle *trans,
|
||||
fs_info->sectorsize));
|
||||
WARN_ON(!IS_ALIGNED(end, fs_info->sectorsize));
|
||||
end--;
|
||||
/* Take mmap lock to serialize with reflinks. */
|
||||
if (!down_read_trylock(&BTRFS_I(inode)->i_mmap_lock))
|
||||
continue;
|
||||
ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
|
||||
key.offset, end,
|
||||
&cached_state);
|
||||
if (!ret)
|
||||
if (!ret) {
|
||||
up_read(&BTRFS_I(inode)->i_mmap_lock);
|
||||
continue;
|
||||
}
|
||||
|
||||
btrfs_drop_extent_map_range(BTRFS_I(inode),
|
||||
key.offset, end, true);
|
||||
unlock_extent(&BTRFS_I(inode)->io_tree,
|
||||
key.offset, end, &cached_state);
|
||||
up_read(&BTRFS_I(inode)->i_mmap_lock);
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user