mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-10 07:00:48 +00:00
Merge branch 'for-linus-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs fixes from Chris Mason: "This is an assorted set I've been queuing up: Jeff Mahoney tracked down a tricky one where we ended up starting IO on the wrong mapping for special files in btrfs_evict_inode. A few people reported this one on the list. Filipe found (and provided a test for) a difficult bug in reading compressed extents, and Josef fixed up some quota record keeping with snapshot deletion. Chandan killed off an accounting bug during DIO that lead to WARN_ONs as we freed inodes" * 'for-linus-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: Btrfs: keep dropped roots in cache until transaction commit Btrfs: Direct I/O: Fix space accounting btrfs: skip waiting on ordered range for special files Btrfs: fix read corruption of compressed and shared extents Btrfs: remove unnecessary locking of cleaner_mutex to avoid deadlock Btrfs: don't initialize a space info as full to prevent ENOSPC
This commit is contained in:
commit
03e8f64486
@ -44,8 +44,6 @@
|
||||
#define BTRFS_INODE_IN_DELALLOC_LIST 9
|
||||
#define BTRFS_INODE_READDIO_NEED_LOCK 10
|
||||
#define BTRFS_INODE_HAS_PROPS 11
|
||||
/* DIO is ready to submit */
|
||||
#define BTRFS_INODE_DIO_READY 12
|
||||
/*
|
||||
* The following 3 bits are meant only for the btree inode.
|
||||
* When any of them is set, it means an error happened while writing an
|
||||
|
@ -3765,9 +3765,7 @@ void close_ctree(struct btrfs_root *root)
|
||||
* block groups queued for removal, the deletion will be
|
||||
* skipped when we quit the cleaner thread.
|
||||
*/
|
||||
mutex_lock(&root->fs_info->cleaner_mutex);
|
||||
btrfs_delete_unused_bgs(root->fs_info);
|
||||
mutex_unlock(&root->fs_info->cleaner_mutex);
|
||||
|
||||
ret = btrfs_commit_super(root);
|
||||
if (ret)
|
||||
|
@ -3742,10 +3742,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
|
||||
found->bytes_reserved = 0;
|
||||
found->bytes_readonly = 0;
|
||||
found->bytes_may_use = 0;
|
||||
if (total_bytes > 0)
|
||||
found->full = 0;
|
||||
else
|
||||
found->full = 1;
|
||||
found->full = 0;
|
||||
found->force_alloc = CHUNK_ALLOC_NO_FORCE;
|
||||
found->chunk_alloc = 0;
|
||||
found->flush = 0;
|
||||
@ -8668,7 +8665,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
|
||||
}
|
||||
|
||||
if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state)) {
|
||||
btrfs_drop_and_free_fs_root(tree_root->fs_info, root);
|
||||
btrfs_add_dropped_root(trans, root);
|
||||
} else {
|
||||
free_extent_buffer(root->node);
|
||||
free_extent_buffer(root->commit_root);
|
||||
|
@ -2798,7 +2798,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
|
||||
bio_end_io_t end_io_func,
|
||||
int mirror_num,
|
||||
unsigned long prev_bio_flags,
|
||||
unsigned long bio_flags)
|
||||
unsigned long bio_flags,
|
||||
bool force_bio_submit)
|
||||
{
|
||||
int ret = 0;
|
||||
struct bio *bio;
|
||||
@ -2814,6 +2815,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
|
||||
contig = bio_end_sector(bio) == sector;
|
||||
|
||||
if (prev_bio_flags != bio_flags || !contig ||
|
||||
force_bio_submit ||
|
||||
merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) ||
|
||||
bio_add_page(bio, page, page_size, offset) < page_size) {
|
||||
ret = submit_one_bio(rw, bio, mirror_num,
|
||||
@ -2910,7 +2912,8 @@ static int __do_readpage(struct extent_io_tree *tree,
|
||||
get_extent_t *get_extent,
|
||||
struct extent_map **em_cached,
|
||||
struct bio **bio, int mirror_num,
|
||||
unsigned long *bio_flags, int rw)
|
||||
unsigned long *bio_flags, int rw,
|
||||
u64 *prev_em_start)
|
||||
{
|
||||
struct inode *inode = page->mapping->host;
|
||||
u64 start = page_offset(page);
|
||||
@ -2958,6 +2961,7 @@ static int __do_readpage(struct extent_io_tree *tree,
|
||||
}
|
||||
while (cur <= end) {
|
||||
unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
|
||||
bool force_bio_submit = false;
|
||||
|
||||
if (cur >= last_byte) {
|
||||
char *userpage;
|
||||
@ -3008,6 +3012,49 @@ static int __do_readpage(struct extent_io_tree *tree,
|
||||
block_start = em->block_start;
|
||||
if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
|
||||
block_start = EXTENT_MAP_HOLE;
|
||||
|
||||
/*
|
||||
* If we have a file range that points to a compressed extent
|
||||
* and it's followed by a consecutive file range that points to
|
||||
* to the same compressed extent (possibly with a different
|
||||
* offset and/or length, so it either points to the whole extent
|
||||
* or only part of it), we must make sure we do not submit a
|
||||
* single bio to populate the pages for the 2 ranges because
|
||||
* this makes the compressed extent read zero out the pages
|
||||
* belonging to the 2nd range. Imagine the following scenario:
|
||||
*
|
||||
* File layout
|
||||
* [0 - 8K] [8K - 24K]
|
||||
* | |
|
||||
* | |
|
||||
* points to extent X, points to extent X,
|
||||
* offset 4K, length of 8K offset 0, length 16K
|
||||
*
|
||||
* [extent X, compressed length = 4K uncompressed length = 16K]
|
||||
*
|
||||
* If the bio to read the compressed extent covers both ranges,
|
||||
* it will decompress extent X into the pages belonging to the
|
||||
* first range and then it will stop, zeroing out the remaining
|
||||
* pages that belong to the other range that points to extent X.
|
||||
* So here we make sure we submit 2 bios, one for the first
|
||||
* range and another one for the third range. Both will target
|
||||
* the same physical extent from disk, but we can't currently
|
||||
* make the compressed bio endio callback populate the pages
|
||||
* for both ranges because each compressed bio is tightly
|
||||
* coupled with a single extent map, and each range can have
|
||||
* an extent map with a different offset value relative to the
|
||||
* uncompressed data of our extent and different lengths. This
|
||||
* is a corner case so we prioritize correctness over
|
||||
* non-optimal behavior (submitting 2 bios for the same extent).
|
||||
*/
|
||||
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) &&
|
||||
prev_em_start && *prev_em_start != (u64)-1 &&
|
||||
*prev_em_start != em->orig_start)
|
||||
force_bio_submit = true;
|
||||
|
||||
if (prev_em_start)
|
||||
*prev_em_start = em->orig_start;
|
||||
|
||||
free_extent_map(em);
|
||||
em = NULL;
|
||||
|
||||
@ -3057,7 +3104,8 @@ static int __do_readpage(struct extent_io_tree *tree,
|
||||
bdev, bio, pnr,
|
||||
end_bio_extent_readpage, mirror_num,
|
||||
*bio_flags,
|
||||
this_bio_flag);
|
||||
this_bio_flag,
|
||||
force_bio_submit);
|
||||
if (!ret) {
|
||||
nr++;
|
||||
*bio_flags = this_bio_flag;
|
||||
@ -3089,6 +3137,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
|
||||
struct inode *inode;
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
int index;
|
||||
u64 prev_em_start = (u64)-1;
|
||||
|
||||
inode = pages[0]->mapping->host;
|
||||
while (1) {
|
||||
@ -3104,7 +3153,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
|
||||
|
||||
for (index = 0; index < nr_pages; index++) {
|
||||
__do_readpage(tree, pages[index], get_extent, em_cached, bio,
|
||||
mirror_num, bio_flags, rw);
|
||||
mirror_num, bio_flags, rw, &prev_em_start);
|
||||
page_cache_release(pages[index]);
|
||||
}
|
||||
}
|
||||
@ -3172,7 +3221,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
|
||||
}
|
||||
|
||||
ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
|
||||
bio_flags, rw);
|
||||
bio_flags, rw, NULL);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -3198,7 +3247,7 @@ int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
|
||||
int ret;
|
||||
|
||||
ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num,
|
||||
&bio_flags, READ);
|
||||
&bio_flags, READ, NULL);
|
||||
if (bio)
|
||||
ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
|
||||
return ret;
|
||||
@ -3451,7 +3500,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
|
||||
sector, iosize, pg_offset,
|
||||
bdev, &epd->bio, max_nr,
|
||||
end_bio_extent_writepage,
|
||||
0, 0, 0);
|
||||
0, 0, 0, false);
|
||||
if (ret)
|
||||
SetPageError(page);
|
||||
}
|
||||
@ -3754,7 +3803,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
|
||||
ret = submit_extent_page(rw, tree, wbc, p, offset >> 9,
|
||||
PAGE_CACHE_SIZE, 0, bdev, &epd->bio,
|
||||
-1, end_bio_extent_buffer_writepage,
|
||||
0, epd->bio_flags, bio_flags);
|
||||
0, epd->bio_flags, bio_flags, false);
|
||||
epd->bio_flags = bio_flags;
|
||||
if (ret) {
|
||||
set_btree_ioerr(p);
|
||||
|
@ -5084,7 +5084,8 @@ void btrfs_evict_inode(struct inode *inode)
|
||||
goto no_delete;
|
||||
}
|
||||
/* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */
|
||||
btrfs_wait_ordered_range(inode, 0, (u64)-1);
|
||||
if (!special_file(inode->i_mode))
|
||||
btrfs_wait_ordered_range(inode, 0, (u64)-1);
|
||||
|
||||
btrfs_free_io_failure_record(inode, 0, (u64)-1);
|
||||
|
||||
@ -7408,6 +7409,10 @@ static struct extent_map *create_pinned_em(struct inode *inode, u64 start,
|
||||
return em;
|
||||
}
|
||||
|
||||
struct btrfs_dio_data {
|
||||
u64 outstanding_extents;
|
||||
u64 reserve;
|
||||
};
|
||||
|
||||
static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
|
||||
struct buffer_head *bh_result, int create)
|
||||
@ -7415,10 +7420,10 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
|
||||
struct extent_map *em;
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
struct extent_state *cached_state = NULL;
|
||||
struct btrfs_dio_data *dio_data = NULL;
|
||||
u64 start = iblock << inode->i_blkbits;
|
||||
u64 lockstart, lockend;
|
||||
u64 len = bh_result->b_size;
|
||||
u64 *outstanding_extents = NULL;
|
||||
int unlock_bits = EXTENT_LOCKED;
|
||||
int ret = 0;
|
||||
|
||||
@ -7436,7 +7441,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
|
||||
* that anything that needs to check if there's a transction doesn't get
|
||||
* confused.
|
||||
*/
|
||||
outstanding_extents = current->journal_info;
|
||||
dio_data = current->journal_info;
|
||||
current->journal_info = NULL;
|
||||
}
|
||||
|
||||
@ -7568,17 +7573,18 @@ unlock:
|
||||
* within our reservation, otherwise we need to adjust our inode
|
||||
* counter appropriately.
|
||||
*/
|
||||
if (*outstanding_extents) {
|
||||
(*outstanding_extents)--;
|
||||
if (dio_data->outstanding_extents) {
|
||||
(dio_data->outstanding_extents)--;
|
||||
} else {
|
||||
spin_lock(&BTRFS_I(inode)->lock);
|
||||
BTRFS_I(inode)->outstanding_extents++;
|
||||
spin_unlock(&BTRFS_I(inode)->lock);
|
||||
}
|
||||
|
||||
current->journal_info = outstanding_extents;
|
||||
btrfs_free_reserved_data_space(inode, len);
|
||||
set_bit(BTRFS_INODE_DIO_READY, &BTRFS_I(inode)->runtime_flags);
|
||||
WARN_ON(dio_data->reserve < len);
|
||||
dio_data->reserve -= len;
|
||||
current->journal_info = dio_data;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -7601,8 +7607,8 @@ unlock:
|
||||
unlock_err:
|
||||
clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
|
||||
unlock_bits, 1, 0, &cached_state, GFP_NOFS);
|
||||
if (outstanding_extents)
|
||||
current->journal_info = outstanding_extents;
|
||||
if (dio_data)
|
||||
current->journal_info = dio_data;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -8329,7 +8335,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct inode *inode = file->f_mapping->host;
|
||||
u64 outstanding_extents = 0;
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
struct btrfs_dio_data dio_data = { 0 };
|
||||
size_t count = 0;
|
||||
int flags = 0;
|
||||
bool wakeup = true;
|
||||
@ -8367,7 +8374,7 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
|
||||
ret = btrfs_delalloc_reserve_space(inode, count);
|
||||
if (ret)
|
||||
goto out;
|
||||
outstanding_extents = div64_u64(count +
|
||||
dio_data.outstanding_extents = div64_u64(count +
|
||||
BTRFS_MAX_EXTENT_SIZE - 1,
|
||||
BTRFS_MAX_EXTENT_SIZE);
|
||||
|
||||
@ -8376,7 +8383,8 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
|
||||
* do the accounting properly if we go over the number we
|
||||
* originally calculated. Abuse current->journal_info for this.
|
||||
*/
|
||||
current->journal_info = &outstanding_extents;
|
||||
dio_data.reserve = round_up(count, root->sectorsize);
|
||||
current->journal_info = &dio_data;
|
||||
} else if (test_bit(BTRFS_INODE_READDIO_NEED_LOCK,
|
||||
&BTRFS_I(inode)->runtime_flags)) {
|
||||
inode_dio_end(inode);
|
||||
@ -8391,16 +8399,9 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
|
||||
if (iov_iter_rw(iter) == WRITE) {
|
||||
current->journal_info = NULL;
|
||||
if (ret < 0 && ret != -EIOCBQUEUED) {
|
||||
/*
|
||||
* If the error comes from submitting stage,
|
||||
* btrfs_get_blocsk_direct() has free'd data space,
|
||||
* and metadata space will be handled by
|
||||
* finish_ordered_fn, don't do that again to make
|
||||
* sure bytes_may_use is correct.
|
||||
*/
|
||||
if (!test_and_clear_bit(BTRFS_INODE_DIO_READY,
|
||||
&BTRFS_I(inode)->runtime_flags))
|
||||
btrfs_delalloc_release_space(inode, count);
|
||||
if (dio_data.reserve)
|
||||
btrfs_delalloc_release_space(inode,
|
||||
dio_data.reserve);
|
||||
} else if (ret >= 0 && (size_t)ret < count)
|
||||
btrfs_delalloc_release_space(inode,
|
||||
count - (size_t)ret);
|
||||
|
@ -1658,9 +1658,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
|
||||
* groups on disk until we're mounted read-write again
|
||||
* unless we clean them up here.
|
||||
*/
|
||||
mutex_lock(&root->fs_info->cleaner_mutex);
|
||||
btrfs_delete_unused_bgs(fs_info);
|
||||
mutex_unlock(&root->fs_info->cleaner_mutex);
|
||||
|
||||
btrfs_dev_replace_suspend_for_unmount(fs_info);
|
||||
btrfs_scrub_cancel(fs_info);
|
||||
|
@ -117,6 +117,18 @@ static noinline void switch_commit_roots(struct btrfs_transaction *trans,
|
||||
btrfs_unpin_free_ino(root);
|
||||
clear_btree_io_tree(&root->dirty_log_pages);
|
||||
}
|
||||
|
||||
/* We can free old roots now. */
|
||||
spin_lock(&trans->dropped_roots_lock);
|
||||
while (!list_empty(&trans->dropped_roots)) {
|
||||
root = list_first_entry(&trans->dropped_roots,
|
||||
struct btrfs_root, root_list);
|
||||
list_del_init(&root->root_list);
|
||||
spin_unlock(&trans->dropped_roots_lock);
|
||||
btrfs_drop_and_free_fs_root(fs_info, root);
|
||||
spin_lock(&trans->dropped_roots_lock);
|
||||
}
|
||||
spin_unlock(&trans->dropped_roots_lock);
|
||||
up_write(&fs_info->commit_root_sem);
|
||||
}
|
||||
|
||||
@ -255,11 +267,13 @@ loop:
|
||||
INIT_LIST_HEAD(&cur_trans->pending_ordered);
|
||||
INIT_LIST_HEAD(&cur_trans->dirty_bgs);
|
||||
INIT_LIST_HEAD(&cur_trans->io_bgs);
|
||||
INIT_LIST_HEAD(&cur_trans->dropped_roots);
|
||||
mutex_init(&cur_trans->cache_write_mutex);
|
||||
cur_trans->num_dirty_bgs = 0;
|
||||
spin_lock_init(&cur_trans->dirty_bgs_lock);
|
||||
INIT_LIST_HEAD(&cur_trans->deleted_bgs);
|
||||
spin_lock_init(&cur_trans->deleted_bgs_lock);
|
||||
spin_lock_init(&cur_trans->dropped_roots_lock);
|
||||
list_add_tail(&cur_trans->list, &fs_info->trans_list);
|
||||
extent_io_tree_init(&cur_trans->dirty_pages,
|
||||
fs_info->btree_inode->i_mapping);
|
||||
@ -336,6 +350,24 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans,
|
||||
}
|
||||
|
||||
|
||||
void btrfs_add_dropped_root(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root)
|
||||
{
|
||||
struct btrfs_transaction *cur_trans = trans->transaction;
|
||||
|
||||
/* Add ourselves to the transaction dropped list */
|
||||
spin_lock(&cur_trans->dropped_roots_lock);
|
||||
list_add_tail(&root->root_list, &cur_trans->dropped_roots);
|
||||
spin_unlock(&cur_trans->dropped_roots_lock);
|
||||
|
||||
/* Make sure we don't try to update the root at commit time */
|
||||
spin_lock(&root->fs_info->fs_roots_radix_lock);
|
||||
radix_tree_tag_clear(&root->fs_info->fs_roots_radix,
|
||||
(unsigned long)root->root_key.objectid,
|
||||
BTRFS_ROOT_TRANS_TAG);
|
||||
spin_unlock(&root->fs_info->fs_roots_radix_lock);
|
||||
}
|
||||
|
||||
int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root)
|
||||
{
|
||||
|
@ -65,6 +65,7 @@ struct btrfs_transaction {
|
||||
struct list_head switch_commits;
|
||||
struct list_head dirty_bgs;
|
||||
struct list_head io_bgs;
|
||||
struct list_head dropped_roots;
|
||||
u64 num_dirty_bgs;
|
||||
|
||||
/*
|
||||
@ -76,6 +77,7 @@ struct btrfs_transaction {
|
||||
spinlock_t dirty_bgs_lock;
|
||||
struct list_head deleted_bgs;
|
||||
spinlock_t deleted_bgs_lock;
|
||||
spinlock_t dropped_roots_lock;
|
||||
struct btrfs_delayed_ref_root delayed_refs;
|
||||
int aborted;
|
||||
int dirty_bg_run;
|
||||
@ -216,5 +218,6 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info);
|
||||
int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
|
||||
void btrfs_put_transaction(struct btrfs_transaction *transaction);
|
||||
void btrfs_apply_pending_changes(struct btrfs_fs_info *fs_info);
|
||||
|
||||
void btrfs_add_dropped_root(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root);
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user