mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-09 07:23:14 +00:00
btrfs: subpage: avoid potential deadlock with compression and delalloc
[BUG] With experimental subpage compression enabled, a simple fsstress can lead to self deadlock on page 720896: mkfs.btrfs -f -s 4k $dev > /dev/null mount $dev -o compress $mnt $fsstress -p 1 -n 100 -w -d $mnt -v -s 1625511156 [CAUSE] If we have a file layout looks like below: 0 32K 64K 96K 128K |//| |///////////////| 4K Then we run delalloc range for the inode, it will: - Call find_lock_delalloc_range() with @delalloc_start = 0 Then we got a delalloc range [0, 4K). This range will be COWed. - Call find_lock_delalloc_range() again with @delalloc_start = 4K Since find_lock_delalloc_range() never cares whether the range is still inside page range [0, 64K), it will return range [64K, 128K). This range meets the condition for subpage compression, will go through async COW path. And async COW path will return @page_started. But that @page_started is now for range [64K, 128K), not for range [0, 64K). - writepage_dellloc() returned 1 for page [0, 64K) Thus page [0, 64K) will not be unlocked, nor its page dirty status will be cleared. Next time when we try to lock page [0, 64K) we will deadlock, as there is no one to release page [0, 64K). This problem will never happen for regular page size as one page only contains one sector. After the first find_lock_delalloc_range() call, the @delalloc_end will go beyond @page_end no matter if we found a delalloc range or not Thus this bug only happens for subpage, as now we need multiple runs to exhaust the delalloc range of a page. [FIX] Fix the problem by ensuring the delalloc range we ran at least started inside @locked_page. So that we will never get incorrect @page_started. And to prevent such problem from happening again: - Make find_lock_delalloc_range() return false if the found range is beyond @end value passed in. Since @end will be utilized now, add an ASSERT() to ensure we pass correct @end into find_lock_delalloc_range(). This also means, for selftests we needs to populate @end before calling find_lock_delalloc_range(). - New ASSERT() in find_lock_delalloc_range() Now we will make sure the @start/@end passed in at least covers part of the page. - New ASSERT() in run_delalloc_range() To make sure the range at least starts inside @locked page. - Use @delalloc_start as proper cursor, while @delalloc_end is always reset to @page_end. Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
parent
164674a76b
commit
2749f7ef36
@ -1975,10 +1975,18 @@ static noinline int lock_delalloc_pages(struct inode *inode,
|
||||
|
||||
/*
|
||||
* Find and lock a contiguous range of bytes in the file marked as delalloc, no
|
||||
* more than @max_bytes. @Start and @end are used to return the range,
|
||||
* more than @max_bytes.
|
||||
*
|
||||
* Return: true if we find something
|
||||
* false if nothing was in the tree
|
||||
* @start: The original start bytenr to search.
|
||||
* Will store the extent range start bytenr.
|
||||
* @end: The original end bytenr of the search range
|
||||
* Will store the extent range end bytenr.
|
||||
*
|
||||
* Return true if we find a delalloc range which starts inside the original
|
||||
* range, and @start/@end will store the delalloc range start/end.
|
||||
*
|
||||
* Return false if we can't find any delalloc range which starts inside the
|
||||
* original range, and @start/@end will be the non-delalloc range start/end.
|
||||
*/
|
||||
EXPORT_FOR_TESTS
|
||||
noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
|
||||
@ -1986,6 +1994,8 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
|
||||
u64 *end)
|
||||
{
|
||||
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
|
||||
const u64 orig_start = *start;
|
||||
const u64 orig_end = *end;
|
||||
u64 max_bytes = BTRFS_MAX_EXTENT_SIZE;
|
||||
u64 delalloc_start;
|
||||
u64 delalloc_end;
|
||||
@ -1994,15 +2004,23 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
|
||||
int ret;
|
||||
int loops = 0;
|
||||
|
||||
/* Caller should pass a valid @end to indicate the search range end */
|
||||
ASSERT(orig_end > orig_start);
|
||||
|
||||
/* The range should at least cover part of the page */
|
||||
ASSERT(!(orig_start >= page_offset(locked_page) + PAGE_SIZE ||
|
||||
orig_end <= page_offset(locked_page)));
|
||||
again:
|
||||
/* step one, find a bunch of delalloc bytes starting at start */
|
||||
delalloc_start = *start;
|
||||
delalloc_end = 0;
|
||||
found = btrfs_find_delalloc_range(tree, &delalloc_start, &delalloc_end,
|
||||
max_bytes, &cached_state);
|
||||
if (!found || delalloc_end <= *start) {
|
||||
if (!found || delalloc_end <= *start || delalloc_start > orig_end) {
|
||||
*start = delalloc_start;
|
||||
*end = delalloc_end;
|
||||
|
||||
/* @delalloc_end can be -1, never go beyond @orig_end */
|
||||
*end = min(delalloc_end, orig_end);
|
||||
free_extent_state(cached_state);
|
||||
return false;
|
||||
}
|
||||
@ -3771,16 +3789,16 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
|
||||
struct page *page, struct writeback_control *wbc,
|
||||
unsigned long *nr_written)
|
||||
{
|
||||
u64 page_end = page_offset(page) + PAGE_SIZE - 1;
|
||||
bool found;
|
||||
const u64 page_end = page_offset(page) + PAGE_SIZE - 1;
|
||||
u64 delalloc_start = page_offset(page);
|
||||
u64 delalloc_to_write = 0;
|
||||
u64 delalloc_end = 0;
|
||||
int ret;
|
||||
int page_started = 0;
|
||||
|
||||
while (delalloc_start < page_end) {
|
||||
u64 delalloc_end = page_end;
|
||||
bool found;
|
||||
|
||||
while (delalloc_end < page_end) {
|
||||
found = find_lock_delalloc_range(&inode->vfs_inode, page,
|
||||
&delalloc_start,
|
||||
&delalloc_end);
|
||||
|
@ -1955,6 +1955,13 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
|
||||
int ret;
|
||||
const bool zoned = btrfs_is_zoned(inode->root->fs_info);
|
||||
|
||||
/*
|
||||
* The range must cover part of the @locked_page, or the returned
|
||||
* @page_started can confuse the caller.
|
||||
*/
|
||||
ASSERT(!(end <= page_offset(locked_page) ||
|
||||
start >= page_offset(locked_page) + PAGE_SIZE));
|
||||
|
||||
if (should_nocow(inode, start, end)) {
|
||||
/*
|
||||
* Normally on a zoned device we're only doing COW writes, but
|
||||
|
@ -112,7 +112,7 @@ static int test_find_delalloc(u32 sectorsize)
|
||||
*/
|
||||
set_extent_delalloc(tmp, 0, sectorsize - 1, 0, NULL);
|
||||
start = 0;
|
||||
end = 0;
|
||||
end = start + PAGE_SIZE - 1;
|
||||
found = find_lock_delalloc_range(inode, locked_page, &start,
|
||||
&end);
|
||||
if (!found) {
|
||||
@ -143,7 +143,7 @@ static int test_find_delalloc(u32 sectorsize)
|
||||
}
|
||||
set_extent_delalloc(tmp, sectorsize, max_bytes - 1, 0, NULL);
|
||||
start = test_start;
|
||||
end = 0;
|
||||
end = start + PAGE_SIZE - 1;
|
||||
found = find_lock_delalloc_range(inode, locked_page, &start,
|
||||
&end);
|
||||
if (!found) {
|
||||
@ -177,14 +177,14 @@ static int test_find_delalloc(u32 sectorsize)
|
||||
goto out_bits;
|
||||
}
|
||||
start = test_start;
|
||||
end = 0;
|
||||
end = start + PAGE_SIZE - 1;
|
||||
found = find_lock_delalloc_range(inode, locked_page, &start,
|
||||
&end);
|
||||
if (found) {
|
||||
test_err("found range when we shouldn't have");
|
||||
goto out_bits;
|
||||
}
|
||||
if (end != (u64)-1) {
|
||||
if (end != test_start + PAGE_SIZE - 1) {
|
||||
test_err("did not return the proper end offset");
|
||||
goto out_bits;
|
||||
}
|
||||
@ -198,7 +198,7 @@ static int test_find_delalloc(u32 sectorsize)
|
||||
*/
|
||||
set_extent_delalloc(tmp, max_bytes, total_dirty - 1, 0, NULL);
|
||||
start = test_start;
|
||||
end = 0;
|
||||
end = start + PAGE_SIZE - 1;
|
||||
found = find_lock_delalloc_range(inode, locked_page, &start,
|
||||
&end);
|
||||
if (!found) {
|
||||
@ -233,7 +233,7 @@ static int test_find_delalloc(u32 sectorsize)
|
||||
/* We unlocked it in the previous test */
|
||||
lock_page(locked_page);
|
||||
start = test_start;
|
||||
end = 0;
|
||||
end = start + PAGE_SIZE - 1;
|
||||
/*
|
||||
* Currently if we fail to find dirty pages in the delalloc range we
|
||||
* will adjust max_bytes down to PAGE_SIZE and then re-search. If
|
||||
|
Loading…
Reference in New Issue
Block a user