for-5.18-tag

-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmI44SgACgkQxWXV+ddt WDtzyg//YgMKr05jRsU3I/pIQ9znuKZmmllThwF63ZRG4PvKz2QfzvKdrMuzNjru 5kHbG59iJqtLmU/aVsdp8mL6mmg5U3Ym2bIRsrW5m4HTtTowKdirvL/lQ3/tWm8j CSDJhUdCL2SwFjpru+4cxOeHLXNSfsk4BoCu8nsLitL+oXv/EPo/dkmu6nPjiMY3 RjsIDBeDEf7J20KOuP/qJuN2YOAT7TeISPD3Ow4aDsmndWQ8n6KehEmAZb7QuqZQ SYubZ2wTb9HuPH/qpiTIA7innBIr+JkYtUYlz2xxixM2BUWNfqD6oKHw9RgOY5Sg CULFssw0i7cgGKsvuPJw1zdM002uG4wwXKigGiyljTVWvxneyr4mNDWiGad+LyFJ XWhnABPidkLs/1zbUkJ23DVub5VlfZsypkFDJAUXI0nGu3VrhjDfTYMa8eCe2L/F YuGG6CrAC+5K/arKAWTVj7hOb+52UzBTEBJz60LJJ6dS9eQoBy857V6pfo7w7ukZ t/tqA6q75O4tk/G3Ix3V1CjuAH3kJE6qXrvBxhpu8aZNjofopneLyGqS5oahpcE8 8edtT+ZZhNuU9sLSEJCJATVxXRDdNzpQ8CHgOR5HOUbmM/vwKNzHPfRQzDnImznw UaUlFaaHwK17M6Y/6CnMecz26U2nVSJ7pyh39mb784XYe2a1efE= =YARd -----END PGP SIGNATURE----- Merge tag 'for-5.18-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux Pull btrfs updates from David Sterba: "This contains feature updates, performance improvements, preparatory and core work and some related VFS updates: Features: - encoded read/write ioctls, allows user space to read or write raw data directly to extents (now compressed, encrypted in the future), will be used by send/receive v2 where it saves processing time - zoned mode now works with metadata DUP (the mkfs.btrfs default) - error message header updates: - print error state: transaction abort, other error, log tree errors - print transient filesystem state: remount, device replace, ignored checksum verifications - tree-checker: verify the transaction id of the to-be-written dirty extent buffer Performance improvements for fsync: - directory logging speedups (up to -90% run time) - avoid logging all directory changes during renames (up to -60% run time) - avoid inode logging during rename and link when possible (up to -60% run time) - prepare extents to be logged before locking a log tree path (throughput +7%) - stop copying old file extents when doing a full fsync() - improved logging of old extents after truncate Core, fixes: - improved stale device identification by dev_t and not just path (for devices that are behind other layers like device mapper) - continued extent tree v2 preparatory work - disable features that won't work yet - add wrappers and abstractions for new tree roots - improved error handling - add super block write annotations around background block group reclaim - fix device scanning messages potentially accessing stale pointer - cleanups and refactoring VFS: - allow reflinks/deduplication from two different mounts of the same filesystem - export and add helpers for read/write range verification, for the encoded ioctls" * tag 'for-5.18-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (98 commits) btrfs: zoned: put block group after final usage btrfs: don't access possibly stale fs_info data in device_list_add btrfs: add lockdep_assert_held to need_preemptive_reclaim btrfs: verify the tranisd of the to-be-written dirty extent buffer btrfs: unify the error handling of btrfs_read_buffer() btrfs: unify the error handling pattern for read_tree_block() btrfs: factor out do_free_extent_accounting helper btrfs: remove last_ref from the extent freeing code btrfs: add a alloc_reserved_extent helper btrfs: remove BUG_ON(ret) in alloc_reserved_tree_block btrfs: add and use helper for unlinking inode during log replay btrfs: extend locking to all space_info members accesses btrfs: zoned: mark relocation as writing fs: allow cross-vfsmount reflink/dedupe btrfs: remove the cross file system checks from remap btrfs: pass btrfs_fs_info to btrfs_recover_relocation btrfs: pass btrfs_fs_info for deleting snapshots and cleaner btrfs: add filesystems state details to error messages btrfs: deal with unexpected extent type during reflinking btrfs: fix unexpected error path when reflinking an inline extent ...
2025-01-06 13:16:22 +00:00 · 2022-03-22 10:51:40 -07:00 · 2022-03-22 10:51:40 -07:00 · 5191290407
commit 5191290407
parent 9b03992f0c d3e2996707
50 changed files with 3113 additions and 1335 deletions
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@ -789,11 +789,13 @@ static int add_missing_keys(struct btrfs_fs_info *fs_info,
 		if (IS_ERR(eb)) {
 			free_pref(ref);
 			return PTR_ERR(eb);
-		} else if (!extent_buffer_uptodate(eb)) {
+		}
 		if (!extent_buffer_uptodate(eb)) {
 			free_pref(ref);
 			free_extent_buffer(eb);
 			return -EIO;
 		}
 		if (lock)
 			btrfs_tree_read_lock(eb);
 		if (btrfs_header_level(eb) == 0)
@ -1335,7 +1337,8 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
 				if (IS_ERR(eb)) {
 					ret = PTR_ERR(eb);
 					goto out;
-				} else if (!extent_buffer_uptodate(eb)) {
+				}
 				if (!extent_buffer_uptodate(eb)) {
 					free_extent_buffer(eb);
 					ret = -EIO;
 					goto out;
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@ -1522,8 +1522,12 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
 	if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
 		return;
-	if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE))
+	sb_start_write(fs_info->sb);
 	if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
 		sb_end_write(fs_info->sb);
 		return;
 	}
 	/*
 	 * Long running balances can keep us blocked here for eternity, so
@ -1531,6 +1535,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
 	 */
 	if (!mutex_trylock(&fs_info->reclaim_bgs_lock)) {
 		btrfs_exclop_finish(fs_info);
 		sb_end_write(fs_info->sb);
 		return;
 	}
@ -1605,6 +1610,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
 	spin_unlock(&fs_info->unused_bgs_lock);
 	mutex_unlock(&fs_info->reclaim_bgs_lock);
 	btrfs_exclop_finish(fs_info);
 	sb_end_write(fs_info->sb);
 }
 void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info)
@ -2006,6 +2012,7 @@ static int read_one_block_group(struct btrfs_fs_info *info,
 	cache->length = key->offset;
 	cache->used = btrfs_stack_block_group_used(bgi);
 	cache->flags = btrfs_stack_block_group_flags(bgi);
 	cache->global_root_id = btrfs_stack_block_group_chunk_objectid(bgi);
 	set_free_space_tree_thresholds(cache);
@ -2288,7 +2295,7 @@ static int insert_block_group_item(struct btrfs_trans_handle *trans,
 	spin_lock(&block_group->lock);
 	btrfs_set_stack_block_group_used(&bgi, block_group->used);
 	btrfs_set_stack_block_group_chunk_objectid(&bgi,
-				BTRFS_FIRST_CHUNK_TREE_OBJECTID);
+						   block_group->global_root_id);
 	btrfs_set_stack_block_group_flags(&bgi, block_group->flags);
 	key.objectid = block_group->start;
 	key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
@ -2444,6 +2451,27 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
 	btrfs_trans_release_chunk_metadata(trans);
 }
 /*
 * For extent tree v2 we use the block_group_item->chunk_offset to point at our
 * global root id.  For v1 it's always set to BTRFS_FIRST_CHUNK_TREE_OBJECTID.
 */
 static u64 calculate_global_root_id(struct btrfs_fs_info *fs_info, u64 offset)
 {
 	u64 div = SZ_1G;
 	u64 index;
 	if (!btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
 		return BTRFS_FIRST_CHUNK_TREE_OBJECTID;
 	/* If we have a smaller fs index based on 128MiB. */
 	if (btrfs_super_total_bytes(fs_info->super_copy) <= (SZ_1G * 10ULL))
 		div = SZ_128M;
 	offset = div64_u64(offset, div);
 	div64_u64_rem(offset, fs_info->nr_global_roots, &index);
 	return index;
 }
 struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *trans,
 						 u64 bytes_used, u64 type,
 						 u64 chunk_offset, u64 size)
@ -2464,6 +2492,8 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
 	cache->flags = type;
 	cache->last_byte_to_unpin = (u64)-1;
 	cache->cached = BTRFS_CACHE_FINISHED;
 	cache->global_root_id = calculate_global_root_id(fs_info, cache->start);
 	if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
 		cache->needs_free_space = 1;
@ -2693,7 +2723,7 @@ static int update_block_group_item(struct btrfs_trans_handle *trans,
 	bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
 	btrfs_set_stack_block_group_used(&bgi, cache->used);
 	btrfs_set_stack_block_group_chunk_objectid(&bgi,
-			BTRFS_FIRST_CHUNK_TREE_OBJECTID);
+						   cache->global_root_id);
 	btrfs_set_stack_block_group_flags(&bgi, cache->flags);
 	write_extent_buffer(leaf, &bgi, bi, sizeof(bgi));
 	btrfs_mark_buffer_dirty(leaf);
--- a/fs/btrfs/block-group.h
+++ b/fs/btrfs/block-group.h
@ -68,6 +68,7 @@ struct btrfs_block_group {
 	u64 bytes_super;
 	u64 flags;
 	u64 cache_generation;
 	u64 global_root_id;
 	/*
 	 * If the free space extent count exceeds this number, convert the block
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@ -13,6 +13,13 @@
 #include "ordered-data.h"
 #include "delayed-inode.h"
 /*
 * Since we search a directory based on f_pos (struct dir_context::pos) we have
 * to start at 2 since '.' and '..' have f_pos of 0 and 1 respectively, so
 * everybody else has to start at 2 (see btrfs_real_readdir() and dir_emit_dots()).
 */
 #define BTRFS_DIR_START_INDEX 2
 /*
 * ordered_data_close is set by truncate when a file that used
 * to have good data has been truncated to zero.  When it is set
@ -173,8 +180,9 @@ struct btrfs_inode {
 	u64 disk_i_size;
 	/*
-	 * if this is a directory then index_cnt is the counter for the index
+	 * If this is a directory then index_cnt is the counter for the index
-	 * number for new files that are created
+	 * number for new files that are created. For an empty directory, this
 	 * must be initialized to BTRFS_DIR_START_INDEX.
 	 */
 	u64 index_cnt;
@ -333,6 +341,36 @@ static inline void btrfs_set_inode_last_sub_trans(struct btrfs_inode *inode)
 	spin_unlock(&inode->lock);
 }
 /*
 * Should be called while holding the inode's VFS lock in exclusive mode or in a
 * context where no one else can access the inode concurrently (during inode
 * creation or when loading an inode from disk).
 */
 static inline void btrfs_set_inode_full_sync(struct btrfs_inode *inode)
 {
 	set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
 	/*
 	 * The inode may have been part of a reflink operation in the last
 	 * transaction that modified it, and then a fsync has reset the
 	 * last_reflink_trans to avoid subsequent fsyncs in the same
 	 * transaction to do unnecessary work. So update last_reflink_trans
 	 * to the last_trans value (we have to be pessimistic and assume a
 	 * reflink happened).
 	 *
 	 * The ->last_trans is protected by the inode's spinlock and we can
 	 * have a concurrent ordered extent completion update it. Also set
 	 * last_reflink_trans to ->last_trans only if the former is less than
 	 * the later, because we can be called in a context where
 	 * last_reflink_trans was set to the current transaction generation
 	 * while ->last_trans was not yet updated in the current transaction,
 	 * and therefore has a lower value.
 	 */
 	spin_lock(&inode->lock);
 	if (inode->last_reflink_trans < inode->last_trans)
 		inode->last_reflink_trans = inode->last_trans;
 	spin_unlock(&inode->lock);
 }
 static inline bool btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
 {
 	bool ret = false;
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@ -219,7 +219,7 @@ static bool dec_and_test_compressed_bio(struct compressed_bio *cb, struct bio *b
 		bi_size += bvec->bv_len;
 	if (bio->bi_status)
-		cb->errors = 1;
+		cb->status = bio->bi_status;
 	ASSERT(bi_size && bi_size <= cb->compressed_len);
 	last_io = refcount_sub_and_test(bi_size >> fs_info->sectorsize_bits,
@ -234,7 +234,7 @@ static bool dec_and_test_compressed_bio(struct compressed_bio *cb, struct bio *b
 	return last_io;
 }
-static void finish_compressed_bio_read(struct compressed_bio *cb, struct bio *bio)
+static void finish_compressed_bio_read(struct compressed_bio *cb)
 {
 	unsigned int index;
 	struct page *page;
@ -247,19 +247,18 @@ static void finish_compressed_bio_read(struct compressed_bio *cb, struct bio *bi
 	}
 	/* Do io completion on the original bio */
-	if (cb->errors) {
+	if (cb->status != BLK_STS_OK) {
-		bio_io_error(cb->orig_bio);
+		cb->orig_bio->bi_status = cb->status;
 		bio_endio(cb->orig_bio);
 	} else {
 		struct bio_vec *bvec;
 		struct bvec_iter_all iter_all;
 		ASSERT(bio);
 		ASSERT(!bio->bi_status);
 		/*
 		 * We have verified the checksum already, set page checked so
 		 * the end_io handlers know about it
 		 */
-		ASSERT(!bio_flagged(bio, BIO_CLONED));
+		ASSERT(!bio_flagged(cb->orig_bio, BIO_CLONED));
 		bio_for_each_segment_all(bvec, cb->orig_bio, iter_all) {
 			u64 bvec_start = page_offset(bvec->bv_page) +
 					 bvec->bv_offset;
@ -308,7 +307,7 @@ static void end_compressed_bio_read(struct bio *bio)
 	 * Some IO in this cb have failed, just skip checksum as there
 	 * is no way it could be correct.
 	 */
-	if (cb->errors == 1)
+	if (cb->status != BLK_STS_OK)
 		goto csum_failed;
 	inode = cb->inode;
@ -324,8 +323,8 @@ static void end_compressed_bio_read(struct bio *bio)
 csum_failed:
 	if (ret)
-		cb->errors = 1;
+		cb->status = errno_to_blk_status(ret);
-	finish_compressed_bio_read(cb, bio);
+	finish_compressed_bio_read(cb);
 out:
 	bio_put(bio);
 }
@ -342,11 +341,12 @@ static noinline void end_compressed_writeback(struct inode *inode,
 	unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT;
 	struct page *pages[16];
 	unsigned long nr_pages = end_index - index + 1;
 	const int errno = blk_status_to_errno(cb->status);
 	int i;
 	int ret;
-	if (cb->errors)
+	if (errno)
-		mapping_set_error(inode->i_mapping, -EIO);
+		mapping_set_error(inode->i_mapping, errno);
 	while (nr_pages > 0) {
 		ret = find_get_pages_contig(inode->i_mapping, index,
@ -358,7 +358,7 @@ static noinline void end_compressed_writeback(struct inode *inode,
 			continue;
 		}
 		for (i = 0; i < ret; i++) {
-			if (cb->errors)
+			if (errno)
 				SetPageError(pages[i]);
 			btrfs_page_clamp_clear_writeback(fs_info, pages[i],
 							 cb->start, cb->len);
@ -381,9 +381,10 @@ static void finish_compressed_bio_write(struct compressed_bio *cb)
 	 */
 	btrfs_writepage_endio_finish_ordered(BTRFS_I(inode), NULL,
 			cb->start, cb->start + cb->len - 1,
-			!cb->errors);
+			cb->status == BLK_STS_OK);
-	end_compressed_writeback(inode, cb);
+	if (cb->writeback)
 		end_compressed_writeback(inode, cb);
 	/* Note, our inode could be gone now */
 	/*
@ -506,7 +507,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 				 struct page **compressed_pages,
 				 unsigned int nr_pages,
 				 unsigned int write_flags,
-				 struct cgroup_subsys_state *blkcg_css)
+				 struct cgroup_subsys_state *blkcg_css,
 				 bool writeback)
 {
 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	struct bio *bio = NULL;
@ -524,13 +526,14 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 	if (!cb)
 		return BLK_STS_RESOURCE;
 	refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
-	cb->errors = 0;
+	cb->status = BLK_STS_OK;
 	cb->inode = &inode->vfs_inode;
 	cb->start = start;
 	cb->len = len;
 	cb->mirror_num = 0;
 	cb->compressed_pages = compressed_pages;
 	cb->compressed_len = compressed_len;
 	cb->writeback = writeback;
 	cb->orig_bio = NULL;
 	cb->nr_pages = nr_pages;
@ -591,7 +594,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 		if (submit) {
 			if (!skip_sum) {
-				ret = btrfs_csum_one_bio(inode, bio, start, 1);
+				ret = btrfs_csum_one_bio(inode, bio, start, true);
 				if (ret)
 					goto finish_cb;
 			}
@ -808,7 +811,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 	u64 em_len;
 	u64 em_start;
 	struct extent_map *em;
-	blk_status_t ret = BLK_STS_RESOURCE;
+	blk_status_t ret;
 	int faili = 0;
 	u8 *sums;
@ -821,17 +824,21 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 	read_lock(&em_tree->lock);
 	em = lookup_extent_mapping(em_tree, file_offset, fs_info->sectorsize);
 	read_unlock(&em_tree->lock);
-	if (!em)
+	if (!em) {
-		return BLK_STS_IOERR;
+		ret = BLK_STS_IOERR;
 		goto out;
 	}
 	ASSERT(em->compress_type != BTRFS_COMPRESS_NONE);
 	compressed_len = em->block_len;
 	cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
-	if (!cb)
+	if (!cb) {
 		ret = BLK_STS_RESOURCE;
 		goto out;
 	}
 	refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
-	cb->errors = 0;
+	cb->status = BLK_STS_OK;
 	cb->inode = inode;
 	cb->mirror_num = mirror_num;
 	sums = cb->sums;
@ -851,8 +858,10 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 	nr_pages = DIV_ROUND_UP(compressed_len, PAGE_SIZE);
 	cb->compressed_pages = kcalloc(nr_pages, sizeof(struct page *),
 				       GFP_NOFS);
-	if (!cb->compressed_pages)
+	if (!cb->compressed_pages) {
 		ret = BLK_STS_RESOURCE;
 		goto fail1;
 	}
 	for (pg_index = 0; pg_index < nr_pages; pg_index++) {
 		cb->compressed_pages[pg_index] = alloc_page(GFP_NOFS);
@ -938,7 +947,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 			comp_bio = NULL;
 		}
 	}
-	return 0;
+	return BLK_STS_OK;
 fail2:
 	while (faili >= 0) {
@ -951,6 +960,8 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 	kfree(cb);
 out:
 	free_extent_map(em);
 	bio->bi_status = ret;
 	bio_endio(bio);
 	return ret;
 finish_cb:
 	if (comp_bio) {
@ -970,7 +981,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 	 */
 	ASSERT(refcount_read(&cb->pending_sectors));
 	/* Now we are the only one referring @cb, can finish it safely. */
-	finish_compressed_bio_read(cb, NULL);
+	finish_compressed_bio_read(cb);
 	return ret;
 }
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@ -22,6 +22,8 @@ struct btrfs_inode;
 /* Maximum length of compressed data stored on disk */
 #define BTRFS_MAX_COMPRESSED		(SZ_128K)
 static_assert((BTRFS_MAX_COMPRESSED % PAGE_SIZE) == 0);
 /* Maximum size of data before compression */
 #define BTRFS_MAX_UNCOMPRESSED		(SZ_128K)
@ -52,8 +54,11 @@ struct compressed_bio {
 	/* The compression algorithm for this bio */
 	u8 compress_type;
 	/* Whether this is a write for writeback. */
 	bool writeback;
 	/* IO errors */
-	u8 errors;
+	blk_status_t status;
 	int mirror_num;
 	/* for reads, this is the bio we are copying the data into */
@ -95,7 +100,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
 				  struct page **compressed_pages,
 				  unsigned int nr_pages,
 				  unsigned int write_flags,
-				  struct cgroup_subsys_state *blkcg_css);
+				  struct cgroup_subsys_state *blkcg_css,
 				  bool writeback);
 blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 				 int mirror_num, unsigned long bio_flags);
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@ -846,9 +846,11 @@ struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent,
 			     btrfs_header_owner(parent),
 			     btrfs_node_ptr_generation(parent, slot),
 			     level - 1, &first_key);
-	if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) {
+	if (IS_ERR(eb))
 		return eb;
 	if (!extent_buffer_uptodate(eb)) {
 		free_extent_buffer(eb);
-		eb = ERR_PTR(-EIO);
+		return ERR_PTR(-EIO);
 	}
 	return eb;
@ -1436,13 +1438,13 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
 		/* now we're allowed to do a blocking uptodate check */
 		ret = btrfs_read_buffer(tmp, gen, parent_level - 1, &first_key);
-		if (!ret) {
+		if (ret) {
-			*eb_ret = tmp;
+			free_extent_buffer(tmp);
-			return 0;
+			btrfs_release_path(p);
 			return -EIO;
 		}
-		free_extent_buffer(tmp);
+		*eb_ret = tmp;
-		btrfs_release_path(p);
+		return 0;
 		return -EIO;
 	}
 	/*
@ -1460,19 +1462,19 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
 	ret = -EAGAIN;
 	tmp = read_tree_block(fs_info, blocknr, root->root_key.objectid,
 			      gen, parent_level - 1, &first_key);
-	if (!IS_ERR(tmp)) {
+	if (IS_ERR(tmp)) {
-		/*
+		btrfs_release_path(p);
-		 * If the read above didn't mark this buffer up to date,
+		return PTR_ERR(tmp);
 		 * it will never end up being up to date.  Set ret to EIO now
 		 * and give up so that our caller doesn't loop forever
 		 * on our EAGAINs.
 		 */
 		if (!extent_buffer_uptodate(tmp))
 			ret = -EIO;
 		free_extent_buffer(tmp);
 	} else {
 		ret = PTR_ERR(tmp);
 	}
 	/*
 	 * If the read above didn't mark this buffer up to date,
 	 * it will never end up being up to date.  Set ret to EIO now
 	 * and give up so that our caller doesn't loop forever
 	 * on our EAGAINs.
 	 */
 	if (!extent_buffer_uptodate(tmp))
 		ret = -EIO;
 	free_extent_buffer(tmp);
 	btrfs_release_path(p);
 	return ret;
@ -2990,16 +2992,11 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
 	if (free_space < data_size)
 		goto out_unlock;
 	/* cow and double check */
 	ret = btrfs_cow_block(trans, root, right, upper,
 			      slot + 1, &right, BTRFS_NESTING_RIGHT_COW);
 	if (ret)
 		goto out_unlock;
 	free_space = btrfs_leaf_free_space(right);
 	if (free_space < data_size)
 		goto out_unlock;
 	left_nritems = btrfs_header_nritems(left);
 	if (left_nritems == 0)
 		goto out_unlock;
@ -3224,7 +3221,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
 		goto out;
 	}
 	/* cow and double check */
 	ret = btrfs_cow_block(trans, root, left,
 			      path->nodes[1], slot - 1, &left,
 			      BTRFS_NESTING_LEFT_COW);
@ -3235,12 +3231,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
 		goto out;
 	}
 	free_space = btrfs_leaf_free_space(left);
 	if (free_space < data_size) {
 		ret = 1;
 		goto out;
 	}
 	if (check_sibling_keys(left, right)) {
 		ret = -EUCLEAN;
 		goto out;
@ -4170,24 +4160,22 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 {
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct extent_buffer *leaf;
 	u32 last_off;
 	u32 dsize = 0;
 	int ret = 0;
 	int wret;
 	int i;
 	u32 nritems;
 	leaf = path->nodes[0];
 	last_off = btrfs_item_offset(leaf, slot + nr - 1);
 	for (i = 0; i < nr; i++)
 		dsize += btrfs_item_size(leaf, slot + i);
 	nritems = btrfs_header_nritems(leaf);
 	if (slot + nr != nritems) {
-		int data_end = leaf_data_end(leaf);
+		const u32 last_off = btrfs_item_offset(leaf, slot + nr - 1);
 		const int data_end = leaf_data_end(leaf);
 		struct btrfs_map_token token;
 		u32 dsize = 0;
 		int i;
 		for (i = 0; i < nr; i++)
 			dsize += btrfs_item_size(leaf, slot + i);
 		memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
 			      data_end + dsize,
@ -4227,24 +4215,50 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 			fixup_low_keys(path, &disk_key, 1);
 		}
-		/* delete the leaf if it is mostly empty */
+		/*
 		 * Try to delete the leaf if it is mostly empty. We do this by
 		 * trying to move all its items into its left and right neighbours.
 		 * If we can't move all the items, then we don't delete it - it's
 		 * not ideal, but future insertions might fill the leaf with more
 		 * items, or items from other leaves might be moved later into our
 		 * leaf due to deletions on those leaves.
 		 */
 		if (used < BTRFS_LEAF_DATA_SIZE(fs_info) / 3) {
 			u32 min_push_space;
 			/* push_leaf_left fixes the path.
 			 * make sure the path still points to our leaf
 			 * for possible call to del_ptr below
 			 */
 			slot = path->slots[1];
 			atomic_inc(&leaf->refs);
-
+			/*
-			wret = push_leaf_left(trans, root, path, 1, 1,
+			 * We want to be able to at least push one item to the
-					      1, (u32)-1);
+			 * left neighbour leaf, and that's the first item.
 			 */
 			min_push_space = sizeof(struct btrfs_item) +
 				btrfs_item_size(leaf, 0);
 			wret = push_leaf_left(trans, root, path, 0,
 					      min_push_space, 1, (u32)-1);
 			if (wret < 0 && wret != -ENOSPC)
 				ret = wret;
 			if (path->nodes[0] == leaf &&
 			    btrfs_header_nritems(leaf)) {
-				wret = push_leaf_right(trans, root, path, 1,
+				/*
-						       1, 1, 0);
+				 * If we were not able to push all items from our
 				 * leaf to its left neighbour, then attempt to
 				 * either push all the remaining items to the
 				 * right neighbour or none. There's no advantage
 				 * in pushing only some items, instead of all, as
 				 * it's pointless to end up with a leaf having
 				 * too few items while the neighbours can be full
 				 * or nearly full.
 				 */
 				nritems = btrfs_header_nritems(leaf);
 				min_push_space = leaf_space_used(leaf, 0, nritems);
 				wret = push_leaf_right(trans, root, path, 0,
 						       min_push_space, 1, 0);
 				if (wret < 0 && wret != -ENOSPC)
 					ret = wret;
 			}
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@ -49,6 +49,7 @@ extern struct kmem_cache *btrfs_free_space_bitmap_cachep;
 struct btrfs_ordered_sum;
 struct btrfs_ref;
 struct btrfs_bio;
 struct btrfs_ioctl_encoded_io_args;
 #define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
@ -148,6 +149,8 @@ enum {
 	/* Indicates there was an error cleaning up a log tree. */
 	BTRFS_FS_STATE_LOG_CLEANUP_ERROR,
 	BTRFS_FS_STATE_COUNT
 };
 #define BTRFS_BACKREF_REV_MAX		256
@ -274,8 +277,14 @@ struct btrfs_super_block {
 	/* the UUID written into btree blocks */
 	u8 metadata_uuid[BTRFS_FSID_SIZE];
 	/* Extent tree v2 */
 	__le64 block_group_root;
 	__le64 block_group_root_generation;
 	u8 block_group_root_level;
 	/* future expansion */
-	__le64 reserved[28];
+	u8 reserved8[7];
 	__le64 reserved[25];
 	u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
 	struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS];
@ -300,6 +309,26 @@ static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE);
 #define BTRFS_FEATURE_COMPAT_RO_SAFE_SET	0ULL
 #define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR	0ULL
 #ifdef CONFIG_BTRFS_DEBUG
 /*
 * Extent tree v2 supported only with CONFIG_BTRFS_DEBUG
 */
 #define BTRFS_FEATURE_INCOMPAT_SUPP			\
 	(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF |		\
 	 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL |	\
 	 BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS |		\
 	 BTRFS_FEATURE_INCOMPAT_BIG_METADATA |		\
 	 BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO |		\
 	 BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD |		\
 	 BTRFS_FEATURE_INCOMPAT_RAID56 |		\
 	 BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF |		\
 	 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA |	\
 	 BTRFS_FEATURE_INCOMPAT_NO_HOLES	|	\
 	 BTRFS_FEATURE_INCOMPAT_METADATA_UUID	|	\
 	 BTRFS_FEATURE_INCOMPAT_RAID1C34	|	\
 	 BTRFS_FEATURE_INCOMPAT_ZONED		|	\
 	 BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2)
 #else
 #define BTRFS_FEATURE_INCOMPAT_SUPP			\
 	(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF |		\
 	 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL |	\
@ -314,6 +343,7 @@ static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE);
 	 BTRFS_FEATURE_INCOMPAT_METADATA_UUID	|	\
 	 BTRFS_FEATURE_INCOMPAT_RAID1C34	|	\
 	 BTRFS_FEATURE_INCOMPAT_ZONED)
 #endif
 #define BTRFS_FEATURE_INCOMPAT_SAFE_SET			\
 	(BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)
@ -636,6 +666,7 @@ struct btrfs_fs_info {
 	struct btrfs_root *quota_root;
 	struct btrfs_root *uuid_root;
 	struct btrfs_root *data_reloc_root;
 	struct btrfs_root *block_group_root;
 	/* the log root tree is a directory of all the other log roots */
 	struct btrfs_root *log_root_tree;
@ -1030,6 +1061,8 @@ struct btrfs_fs_info {
 	spinlock_t relocation_bg_lock;
 	u64 data_reloc_bg;
 	u64 nr_global_roots;
 	spinlock_t zone_active_bgs_lock;
 	struct list_head zone_active_bgs;
@ -1609,25 +1642,25 @@ DECLARE_BTRFS_SETGET_BITS(64)
 static inline u##bits btrfs_##name(const struct extent_buffer *eb,	\
 				   const type *s)			\
 {									\
-	BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member);	\
+	static_assert(sizeof(u##bits) == sizeof(((type *)0))->member);	\
 	return btrfs_get_##bits(eb, s, offsetof(type, member));		\
 }									\
 static inline void btrfs_set_##name(const struct extent_buffer *eb, type *s, \
 				    u##bits val)			\
 {									\
-	BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member);	\
+	static_assert(sizeof(u##bits) == sizeof(((type *)0))->member);	\
 	btrfs_set_##bits(eb, s, offsetof(type, member), val);		\
 }									\
 static inline u##bits btrfs_token_##name(struct btrfs_map_token *token,	\
 					 const type *s)			\
 {									\
-	BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member);	\
+	static_assert(sizeof(u##bits) == sizeof(((type *)0))->member);	\
 	return btrfs_get_token_##bits(token, s, offsetof(type, member));\
 }									\
 static inline void btrfs_set_token_##name(struct btrfs_map_token *token,\
 					  type *s, u##bits val)		\
 {									\
-	BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member);	\
+	static_assert(sizeof(u##bits) == sizeof(((type *)0))->member);	\
 	btrfs_set_token_##bits(token, s, offsetof(type, member), val);	\
 }
@ -1658,8 +1691,8 @@ static inline void btrfs_set_##name(type *s, u##bits val)		\
 static inline u64 btrfs_device_total_bytes(const struct extent_buffer *eb,
 					   struct btrfs_dev_item *s)
 {
-	BUILD_BUG_ON(sizeof(u64) !=
+	static_assert(sizeof(u64) ==
-		     sizeof(((struct btrfs_dev_item *)0))->total_bytes);
+		      sizeof(((struct btrfs_dev_item *)0))->total_bytes);
 	return btrfs_get_64(eb, s, offsetof(struct btrfs_dev_item,
 					    total_bytes));
 }
@ -1667,8 +1700,8 @@ static inline void btrfs_set_device_total_bytes(const struct extent_buffer *eb,
 						struct btrfs_dev_item *s,
 						u64 val)
 {
-	BUILD_BUG_ON(sizeof(u64) !=
+	static_assert(sizeof(u64) ==
-		     sizeof(((struct btrfs_dev_item *)0))->total_bytes);
+		      sizeof(((struct btrfs_dev_item *)0))->total_bytes);
 	WARN_ON(!IS_ALIGNED(val, eb->fs_info->sectorsize));
 	btrfs_set_64(eb, s, offsetof(struct btrfs_dev_item, total_bytes), val);
 }
@ -2328,6 +2361,17 @@ BTRFS_SETGET_STACK_FUNCS(backup_bytes_used, struct btrfs_root_backup,
 BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup,
 		   num_devices, 64);
 /*
 * For extent tree v2 we overload the extent root with the block group root, as
 * we will have multiple extent roots.
 */
 BTRFS_SETGET_STACK_FUNCS(backup_block_group_root, struct btrfs_root_backup,
 			 extent_root, 64);
 BTRFS_SETGET_STACK_FUNCS(backup_block_group_root_gen, struct btrfs_root_backup,
 			 extent_root_gen, 64);
 BTRFS_SETGET_STACK_FUNCS(backup_block_group_root_level,
 			 struct btrfs_root_backup, extent_root_level, 8);
 /* struct btrfs_balance_item */
 BTRFS_SETGET_FUNCS(balance_flags, struct btrfs_balance_item, flags, 64);
@ -2462,6 +2506,13 @@ BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
 BTRFS_SETGET_STACK_FUNCS(super_magic, struct btrfs_super_block, magic, 64);
 BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block,
 			 uuid_tree_generation, 64);
 BTRFS_SETGET_STACK_FUNCS(super_block_group_root, struct btrfs_super_block,
 			 block_group_root, 64);
 BTRFS_SETGET_STACK_FUNCS(super_block_group_root_generation,
 			 struct btrfs_super_block,
 			 block_group_root_generation, 64);
 BTRFS_SETGET_STACK_FUNCS(super_block_group_root_level, struct btrfs_super_block,
 			 block_group_root_level, 8);
 int btrfs_super_csum_size(const struct btrfs_super_block *s);
 const char *btrfs_super_csum_name(u16 csum_type);
@ -2839,7 +2890,8 @@ void btrfs_subvolume_release_metadata(struct btrfs_root *root,
 				      struct btrfs_block_rsv *rsv);
 void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes);
-int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes);
+int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes,
 				    u64 disk_num_bytes);
 u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
 int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
 				   u64 start, u64 end);
@ -3155,7 +3207,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
 			   struct btrfs_root *root,
 			   struct btrfs_ordered_sum *sums);
 blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
-				u64 file_start, int contig);
+				u64 offset, bool one_ordered);
 int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
 			     struct list_head *list, int search_commit);
 void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
@ -3256,6 +3308,11 @@ int btrfs_writepage_cow_fixup(struct page *page);
 void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
 					  struct page *page, u64 start,
 					  u64 end, bool uptodate);
 ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
 			   struct btrfs_ioctl_encoded_io_args *encoded);
 ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
 			     const struct btrfs_ioctl_encoded_io_args *encoded);
 extern const struct dentry_operations btrfs_dentry_operations;
 extern const struct iomap_ops btrfs_dio_iomap_ops;
 extern const struct iomap_dio_ops btrfs_dio_ops;
@ -3318,6 +3375,8 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
 			   struct btrfs_trans_handle **trans_out);
 int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
 			      struct btrfs_inode *inode, u64 start, u64 end);
 ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
 			    const struct btrfs_ioctl_encoded_io_args *encoded);
 int btrfs_release_file(struct inode *inode, struct file *file);
 int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
 		      size_t num_pages, loff_t pos, size_t write_bytes,
@ -3774,7 +3833,7 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *root);
 int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
 			    struct btrfs_root *root);
-int btrfs_recover_relocation(struct btrfs_root *root);
+int btrfs_recover_relocation(struct btrfs_fs_info *fs_info);
 int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len);
 int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *root, struct extent_buffer *buf,
--- a/fs/btrfs/delalloc-space.c
+++ b/fs/btrfs/delalloc-space.c
@ -270,11 +270,11 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
 }
 static void calc_inode_reservations(struct btrfs_fs_info *fs_info,
-				    u64 num_bytes, u64 *meta_reserve,
+				    u64 num_bytes, u64 disk_num_bytes,
-				    u64 *qgroup_reserve)
+				    u64 *meta_reserve, u64 *qgroup_reserve)
 {
 	u64 nr_extents = count_max_extents(num_bytes);
-	u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, num_bytes);
+	u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, disk_num_bytes);
 	u64 inode_update = btrfs_calc_metadata_size(fs_info, 1);
 	*meta_reserve = btrfs_calc_insert_metadata_size(fs_info,
@ -288,7 +288,8 @@ static void calc_inode_reservations(struct btrfs_fs_info *fs_info,
 	*qgroup_reserve = nr_extents * fs_info->nodesize;
 }
-int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
+int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes,
 				    u64 disk_num_bytes)
 {
 	struct btrfs_root *root = inode->root;
 	struct btrfs_fs_info *fs_info = root->fs_info;
@ -318,6 +319,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
 	}
 	num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
 	disk_num_bytes = ALIGN(disk_num_bytes, fs_info->sectorsize);
 	/*
 	 * We always want to do it this way, every other way is wrong and ends
@ -329,8 +331,8 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
 	 * everything out and try again, which is bad.  This way we just
 	 * over-reserve slightly, and clean up the mess when we are done.
 	 */
-	calc_inode_reservations(fs_info, num_bytes, &meta_reserve,
+	calc_inode_reservations(fs_info, num_bytes, disk_num_bytes,
-				&qgroup_reserve);
+				&meta_reserve, &qgroup_reserve);
 	ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true);
 	if (ret)
 		return ret;
@ -349,7 +351,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
 	spin_lock(&inode->lock);
 	nr_extents = count_max_extents(num_bytes);
 	btrfs_mod_outstanding_extents(inode, nr_extents);
-	inode->csum_bytes += num_bytes;
+	inode->csum_bytes += disk_num_bytes;
 	btrfs_calculate_inode_block_rsv_size(fs_info, inode);
 	spin_unlock(&inode->lock);
@ -454,7 +456,7 @@ int btrfs_delalloc_reserve_space(struct btrfs_inode *inode,
 	ret = btrfs_check_data_free_space(inode, reserved, start, len);
 	if (ret < 0)
 		return ret;
-	ret = btrfs_delalloc_reserve_metadata(inode, len);
+	ret = btrfs_delalloc_reserve_metadata(inode, len, len);
 	if (ret < 0) {
 		btrfs_free_reserved_data_space(inode, *reserved, start, len);
 		extent_changeset_free(*reserved);
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@ -243,6 +243,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 				  struct btrfs_device *srcdev,
 				  struct btrfs_device **device_out)
 {
 	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
 	struct btrfs_device *device;
 	struct block_device *bdev;
 	struct rcu_string *name;
@ -271,7 +272,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 	sync_blockdev(bdev);
-	list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) {
+	list_for_each_entry(device, &fs_devices->devices, dev_list) {
 		if (device->bdev == bdev) {
 			btrfs_err(fs_info,
 				  "target device is in the filesystem!");
@ -302,6 +303,9 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 		goto error;
 	}
 	rcu_assign_pointer(device->name, name);
 	ret = lookup_bdev(device_path, &device->devt);
 	if (ret)
 		goto error;
 	set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
 	device->generation = 0;
@ -320,17 +324,17 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 	device->mode = FMODE_EXCL;
 	device->dev_stats_valid = 1;
 	set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
-	device->fs_devices = fs_info->fs_devices;
+	device->fs_devices = fs_devices;
 	ret = btrfs_get_dev_zone_info(device, false);
 	if (ret)
 		goto error;
-	mutex_lock(&fs_info->fs_devices->device_list_mutex);
+	mutex_lock(&fs_devices->device_list_mutex);
-	list_add(&device->dev_list, &fs_info->fs_devices->devices);
+	list_add(&device->dev_list, &fs_devices->devices);
-	fs_info->fs_devices->num_devices++;
+	fs_devices->num_devices++;
-	fs_info->fs_devices->open_devices++;
+	fs_devices->open_devices++;
-	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
+	mutex_unlock(&fs_devices->device_list_mutex);
 	*device_out = device;
 	return 0;
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@ -441,17 +441,31 @@ static int csum_one_extent_buffer(struct extent_buffer *eb)
 	else
 		ret = btrfs_check_leaf_full(eb);
-	if (ret < 0) {
+	if (ret < 0)
-		btrfs_print_tree(eb, 0);
+		goto error;
 	/*
 	 * Also check the generation, the eb reached here must be newer than
 	 * last committed. Or something seriously wrong happened.
 	 */
 	if (unlikely(btrfs_header_generation(eb) <= fs_info->last_trans_committed)) {
 		ret = -EUCLEAN;
 		btrfs_err(fs_info,
-			"block=%llu write time tree block corruption detected",
+			"block=%llu bad generation, have %llu expect > %llu",
-			eb->start);
+			  eb->start, btrfs_header_generation(eb),
-		WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
+			  fs_info->last_trans_committed);
-		return ret;
+		goto error;
 	}
 	write_extent_buffer(eb, result, 0, fs_info->csum_size);
 	return 0;
 error:
 	btrfs_print_tree(eb, 0);
 	btrfs_err(fs_info, "block=%llu write time tree block corruption detected",
 		  eb->start);
 	WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
 	return ret;
 }
 /* Checksum all dirty extent buffers in one bio_vec */
@ -1289,12 +1303,33 @@ struct btrfs_root *btrfs_global_root(struct btrfs_fs_info *fs_info,
 	return root;
 }
 static u64 btrfs_global_root_id(struct btrfs_fs_info *fs_info, u64 bytenr)
 {
 	struct btrfs_block_group *block_group;
 	u64 ret;
 	if (!btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
 		return 0;
 	if (bytenr)
 		block_group = btrfs_lookup_block_group(fs_info, bytenr);
 	else
 		block_group = btrfs_lookup_first_block_group(fs_info, bytenr);
 	ASSERT(block_group);
 	if (!block_group)
 		return 0;
 	ret = block_group->global_root_id;
 	btrfs_put_block_group(block_group);
 	return ret;
 }
 struct btrfs_root *btrfs_csum_root(struct btrfs_fs_info *fs_info, u64 bytenr)
 {
 	struct btrfs_key key = {
 		.objectid = BTRFS_CSUM_TREE_OBJECTID,
 		.type = BTRFS_ROOT_ITEM_KEY,
-		.offset = 0,
+		.offset = btrfs_global_root_id(fs_info, bytenr),
 	};
 	return btrfs_global_root(fs_info, &key);
@ -1305,7 +1340,7 @@ struct btrfs_root *btrfs_extent_root(struct btrfs_fs_info *fs_info, u64 bytenr)
 	struct btrfs_key key = {
 		.objectid = BTRFS_EXTENT_TREE_OBJECTID,
 		.type = BTRFS_ROOT_ITEM_KEY,
-		.offset = 0,
+		.offset = btrfs_global_root_id(fs_info, bytenr),
 	};
 	return btrfs_global_root(fs_info, &key);
@ -1522,7 +1557,8 @@ static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root,
 		ret = PTR_ERR(root->node);
 		root->node = NULL;
 		goto fail;
-	} else if (!btrfs_buffer_uptodate(root->node, generation, 0)) {
+	}
 	if (!btrfs_buffer_uptodate(root->node, generation, 0)) {
 		ret = -EIO;
 		goto fail;
 	}
@ -1727,6 +1763,7 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
 	btrfs_put_root(fs_info->uuid_root);
 	btrfs_put_root(fs_info->fs_root);
 	btrfs_put_root(fs_info->data_reloc_root);
 	btrfs_put_root(fs_info->block_group_root);
 	btrfs_check_leaked_roots(fs_info);
 	btrfs_extent_buffer_leak_debug_check(fs_info);
 	kfree(fs_info->super_copy);
@ -1925,8 +1962,7 @@ static void end_workqueue_fn(struct btrfs_work *work)
 static int cleaner_kthread(void *arg)
 {
-	struct btrfs_root *root = arg;
+	struct btrfs_fs_info *fs_info = (struct btrfs_fs_info *)arg;
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	int again;
 	while (1) {
@ -1959,7 +1995,7 @@ static int cleaner_kthread(void *arg)
 		btrfs_run_delayed_iputs(fs_info);
-		again = btrfs_clean_one_deleted_snapshot(root);
+		again = btrfs_clean_one_deleted_snapshot(fs_info);
 		mutex_unlock(&fs_info->cleaner_mutex);
 		/*
@ -2095,8 +2131,6 @@ static void backup_super_roots(struct btrfs_fs_info *info)
 {
 	const int next_backup = info->backup_root_index;
 	struct btrfs_root_backup *root_backup;
 	struct btrfs_root *extent_root = btrfs_extent_root(info, 0);
 	struct btrfs_root *csum_root = btrfs_csum_root(info, 0);
 	root_backup = info->super_for_commit->super_roots + next_backup;
@ -2121,11 +2155,30 @@ static void backup_super_roots(struct btrfs_fs_info *info)
 	btrfs_set_backup_chunk_root_level(root_backup,
 			       btrfs_header_level(info->chunk_root->node));
-	btrfs_set_backup_extent_root(root_backup, extent_root->node->start);
+	if (btrfs_fs_incompat(info, EXTENT_TREE_V2)) {
-	btrfs_set_backup_extent_root_gen(root_backup,
+		btrfs_set_backup_block_group_root(root_backup,
-			       btrfs_header_generation(extent_root->node));
+					info->block_group_root->node->start);
-	btrfs_set_backup_extent_root_level(root_backup,
+		btrfs_set_backup_block_group_root_gen(root_backup,
-			       btrfs_header_level(extent_root->node));
+			btrfs_header_generation(info->block_group_root->node));
 		btrfs_set_backup_block_group_root_level(root_backup,
 			btrfs_header_level(info->block_group_root->node));
 	} else {
 		struct btrfs_root *extent_root = btrfs_extent_root(info, 0);
 		struct btrfs_root *csum_root = btrfs_csum_root(info, 0);
 		btrfs_set_backup_extent_root(root_backup,
 					     extent_root->node->start);
 		btrfs_set_backup_extent_root_gen(root_backup,
 				btrfs_header_generation(extent_root->node));
 		btrfs_set_backup_extent_root_level(root_backup,
 					btrfs_header_level(extent_root->node));
 		btrfs_set_backup_csum_root(root_backup, csum_root->node->start);
 		btrfs_set_backup_csum_root_gen(root_backup,
 					       btrfs_header_generation(csum_root->node));
 		btrfs_set_backup_csum_root_level(root_backup,
 						 btrfs_header_level(csum_root->node));
 	}
 	/*
 	 * we might commit during log recovery, which happens before we set
@ -2146,12 +2199,6 @@ static void backup_super_roots(struct btrfs_fs_info *info)
 	btrfs_set_backup_dev_root_level(root_backup,
 				       btrfs_header_level(info->dev_root->node));
 	btrfs_set_backup_csum_root(root_backup, csum_root->node->start);
 	btrfs_set_backup_csum_root_gen(root_backup,
 				       btrfs_header_generation(csum_root->node));
 	btrfs_set_backup_csum_root_level(root_backup,
 					 btrfs_header_level(csum_root->node));
 	btrfs_set_backup_total_bytes(root_backup,
 			     btrfs_super_total_bytes(info->super_copy));
 	btrfs_set_backup_bytes_used(root_backup,
@ -2269,6 +2316,7 @@ static void free_root_pointers(struct btrfs_fs_info *info, bool free_chunk_root)
 	free_root_extent_buffers(info->uuid_root);
 	free_root_extent_buffers(info->fs_root);
 	free_root_extent_buffers(info->data_reloc_root);
 	free_root_extent_buffers(info->block_group_root);
 	if (free_chunk_root)
 		free_root_extent_buffers(info->chunk_root);
 }
@ -2504,11 +2552,13 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
 		log_tree_root->node = NULL;
 		btrfs_put_root(log_tree_root);
 		return ret;
-	} else if (!extent_buffer_uptodate(log_tree_root->node)) {
+	}
 	if (!extent_buffer_uptodate(log_tree_root->node)) {
 		btrfs_err(fs_info, "failed to read log tree");
 		btrfs_put_root(log_tree_root);
 		return -EIO;
 	}
 	/* returns with log_tree_root freed on success */
 	ret = btrfs_recover_log_trees(log_tree_root);
 	if (ret) {
@ -2533,6 +2583,7 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
 {
 	struct btrfs_fs_info *fs_info = tree_root->fs_info;
 	struct btrfs_root *root;
 	u64 max_global_id = 0;
 	int ret;
 	struct btrfs_key key = {
 		.objectid = objectid,
@ -2568,6 +2619,13 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
 			break;
 		btrfs_release_path(path);
 		/*
 		 * Just worry about this for extent tree, it'll be the same for
 		 * everybody.
 		 */
 		if (objectid == BTRFS_EXTENT_TREE_OBJECTID)
 			max_global_id = max(max_global_id, key.offset);
 		found = true;
 		root = read_tree_root_path(tree_root, path, &key);
 		if (IS_ERR(root)) {
@ -2585,6 +2643,9 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
 	}
 	btrfs_release_path(path);
 	if (objectid == BTRFS_EXTENT_TREE_OBJECTID)
 		fs_info->nr_global_roots = max_global_id + 1;
 	if (!found || ret) {
 		if (objectid == BTRFS_CSUM_TREE_OBJECTID)
 			set_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
@ -2930,6 +2991,56 @@ static int btrfs_validate_write_super(struct btrfs_fs_info *fs_info,
 	return ret;
 }
 static int load_super_root(struct btrfs_root *root, u64 bytenr, u64 gen, int level)
 {
 	int ret = 0;
 	root->node = read_tree_block(root->fs_info, bytenr,
 				     root->root_key.objectid, gen, level, NULL);
 	if (IS_ERR(root->node)) {
 		ret = PTR_ERR(root->node);
 		root->node = NULL;
 		return ret;
 	}
 	if (!extent_buffer_uptodate(root->node)) {
 		free_extent_buffer(root->node);
 		root->node = NULL;
 		return -EIO;
 	}
 	btrfs_set_root_node(&root->root_item, root->node);
 	root->commit_root = btrfs_root_node(root);
 	btrfs_set_root_refs(&root->root_item, 1);
 	return ret;
 }
 static int load_important_roots(struct btrfs_fs_info *fs_info)
 {
 	struct btrfs_super_block *sb = fs_info->super_copy;
 	u64 gen, bytenr;
 	int level, ret;
 	bytenr = btrfs_super_root(sb);
 	gen = btrfs_super_generation(sb);
 	level = btrfs_super_root_level(sb);
 	ret = load_super_root(fs_info->tree_root, bytenr, gen, level);
 	if (ret) {
 		btrfs_warn(fs_info, "couldn't read tree root");
 		return ret;
 	}
 	if (!btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
 		return 0;
 	bytenr = btrfs_super_block_group_root(sb);
 	gen = btrfs_super_block_group_root_generation(sb);
 	level = btrfs_super_block_group_root_level(sb);
 	ret = load_super_root(fs_info->block_group_root, bytenr, gen, level);
 	if (ret)
 		btrfs_warn(fs_info, "couldn't read block group root");
 	return ret;
 }
 static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
 {
 	int backup_index = find_newest_super_backup(fs_info);
@ -2939,10 +3050,17 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
 	int ret = 0;
 	int i;
-	for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) {
+	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
-		u64 generation;
+		struct btrfs_root *root;
 		int level;
 		root = btrfs_alloc_root(fs_info, BTRFS_BLOCK_GROUP_TREE_OBJECTID,
 					GFP_KERNEL);
 		if (!root)
 			return -ENOMEM;
 		fs_info->block_group_root = root;
 	}
 	for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) {
 		if (handle_error) {
 			if (!IS_ERR(tree_root->node))
 				free_extent_buffer(tree_root->node);
@ -2967,29 +3085,13 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
 			if (ret < 0)
 				return ret;
 		}
 		generation = btrfs_super_generation(sb);
 		level = btrfs_super_root_level(sb);
 		tree_root->node = read_tree_block(fs_info, btrfs_super_root(sb),
 						  BTRFS_ROOT_TREE_OBJECTID,
 						  generation, level, NULL);
 		if (IS_ERR(tree_root->node)) {
 			handle_error = true;
 			ret = PTR_ERR(tree_root->node);
 			tree_root->node = NULL;
 			btrfs_warn(fs_info, "couldn't read tree root");
 			continue;
-		} else if (!extent_buffer_uptodate(tree_root->node)) {
+		ret = load_important_roots(fs_info);
 		if (ret) {
 			handle_error = true;
 			ret = -EIO;
 			btrfs_warn(fs_info, "error while reading tree root");
 			continue;
 		}
 		btrfs_set_root_node(&tree_root->root_item, tree_root->node);
 		tree_root->commit_root = btrfs_root_node(tree_root);
 		btrfs_set_root_refs(&tree_root->root_item, 1);
 		/*
 		 * No need to hold btrfs_root::objectid_mutex since the fs
 		 * hasn't been fully initialised and we are the only user
@ -3009,8 +3111,8 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
 		}
 		/* All successful */
-		fs_info->generation = generation;
+		fs_info->generation = btrfs_header_generation(tree_root->node);
-		fs_info->last_trans_committed = generation;
+		fs_info->last_trans_committed = fs_info->generation;
 		fs_info->last_reloc_trans = 0;
 		/* Always begin writing backup roots after the one being used */
@ -3293,7 +3395,7 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info)
 	up_read(&fs_info->cleanup_work_sem);
 	mutex_lock(&fs_info->cleaner_mutex);
-	ret = btrfs_recover_relocation(fs_info->tree_root);
+	ret = btrfs_recover_relocation(fs_info);
 	mutex_unlock(&fs_info->cleaner_mutex);
 	if (ret < 0) {
 		btrfs_warn(fs_info, "failed to recover relocation: %d", ret);
@ -3594,21 +3696,12 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
 	generation = btrfs_super_chunk_root_generation(disk_super);
 	level = btrfs_super_chunk_root_level(disk_super);
-
+	ret = load_super_root(chunk_root, btrfs_super_chunk_root(disk_super),
-	chunk_root->node = read_tree_block(fs_info,
+			      generation, level);
-					   btrfs_super_chunk_root(disk_super),
+	if (ret) {
 					   BTRFS_CHUNK_TREE_OBJECTID,
 					   generation, level, NULL);
 	if (IS_ERR(chunk_root->node) ||
 	    !extent_buffer_uptodate(chunk_root->node)) {
 		btrfs_err(fs_info, "failed to read chunk root");
 		if (!IS_ERR(chunk_root->node))
 			free_extent_buffer(chunk_root->node);
 		chunk_root->node = NULL;
 		goto fail_tree_roots;
 	}
 	btrfs_set_root_node(&chunk_root->root_item, chunk_root->node);
 	chunk_root->commit_root = btrfs_root_node(chunk_root);
 	read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid,
 			   offsetof(struct btrfs_header, chunk_tree_uuid),
@ -3728,7 +3821,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
 		goto fail_sysfs;
 	}
-	fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root,
+	fs_info->cleaner_kthread = kthread_run(cleaner_kthread, fs_info,
 					       "btrfs-cleaner");
 	if (IS_ERR(fs_info->cleaner_kthread))
 		goto fail_sysfs;
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@ -111,6 +111,8 @@ static inline struct btrfs_root *btrfs_grab_root(struct btrfs_root *root)
 static inline struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info)
 {
 	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
 		return fs_info->block_group_root;
 	return btrfs_extent_root(fs_info, 0);
 }
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@ -598,7 +598,7 @@ static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
 static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
 					   struct btrfs_root *root,
 					   struct btrfs_path *path,
-					   int refs_to_drop, int *last_ref)
+					   int refs_to_drop)
 {
 	struct btrfs_key key;
 	struct btrfs_extent_data_ref *ref1 = NULL;
@ -631,7 +631,6 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
 	if (num_refs == 0) {
 		ret = btrfs_del_item(trans, root, path);
 		*last_ref = 1;
 	} else {
 		if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
 			btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
@ -1072,8 +1071,7 @@ static noinline_for_stack
 void update_inline_extent_backref(struct btrfs_path *path,
 				  struct btrfs_extent_inline_ref *iref,
 				  int refs_to_mod,
-				  struct btrfs_delayed_extent_op *extent_op,
+				  struct btrfs_delayed_extent_op *extent_op)
 				  int *last_ref)
 {
 	struct extent_buffer *leaf = path->nodes[0];
 	struct btrfs_extent_item *ei;
@ -1121,7 +1119,6 @@ void update_inline_extent_backref(struct btrfs_path *path,
 		else
 			btrfs_set_shared_data_ref_count(leaf, sref, refs);
 	} else {
 		*last_ref = 1;
 		size =  btrfs_extent_inline_ref_size(type);
 		item_size = btrfs_item_size(leaf, path->slots[0]);
 		ptr = (unsigned long)iref;
@ -1166,8 +1163,7 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
 			}
 			return -EUCLEAN;
 		}
-		update_inline_extent_backref(path, iref, refs_to_add,
+		update_inline_extent_backref(path, iref, refs_to_add, extent_op);
 					     extent_op, NULL);
 	} else if (ret == -ENOENT) {
 		setup_inline_extent_backref(trans->fs_info, path, iref, parent,
 					    root_objectid, owner, offset,
@ -1181,21 +1177,17 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
 				 struct btrfs_root *root,
 				 struct btrfs_path *path,
 				 struct btrfs_extent_inline_ref *iref,
-				 int refs_to_drop, int is_data, int *last_ref)
+				 int refs_to_drop, int is_data)
 {
 	int ret = 0;
 	BUG_ON(!is_data && refs_to_drop != 1);
-	if (iref) {
+	if (iref)
-		update_inline_extent_backref(path, iref, -refs_to_drop, NULL,
+		update_inline_extent_backref(path, iref, -refs_to_drop, NULL);
-					     last_ref);
+	else if (is_data)
-	} else if (is_data) {
+		ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
-		ret = remove_extent_data_ref(trans, root, path, refs_to_drop,
+	else
 					     last_ref);
 	} else {
 		*last_ref = 1;
 		ret = btrfs_del_item(trans, root, path);
 	}
 	return ret;
 }
@ -2766,12 +2758,11 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
 		spin_unlock(&cache->lock);
 		if (!readonly && return_free_space &&
 		    global_rsv->space_info == space_info) {
 			u64 to_add = len;
 			spin_lock(&global_rsv->lock);
 			if (!global_rsv->full) {
-				to_add = min(len, global_rsv->size -
+				u64 to_add = min(len, global_rsv->size -
-					     global_rsv->reserved);
+						      global_rsv->reserved);
 				global_rsv->reserved += to_add;
 				btrfs_space_info_update_bytes_may_use(fs_info,
 						space_info, to_add);
@ -2862,6 +2853,35 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
 	return 0;
 }
 static int do_free_extent_accounting(struct btrfs_trans_handle *trans,
 				     u64 bytenr, u64 num_bytes, bool is_data)
 {
 	int ret;
 	if (is_data) {
 		struct btrfs_root *csum_root;
 		csum_root = btrfs_csum_root(trans->fs_info, bytenr);
 		ret = btrfs_del_csums(trans, csum_root, bytenr, num_bytes);
 		if (ret) {
 			btrfs_abort_transaction(trans, ret);
 			return ret;
 		}
 	}
 	ret = add_to_free_space_tree(trans, bytenr, num_bytes);
 	if (ret) {
 		btrfs_abort_transaction(trans, ret);
 		return ret;
 	}
 	ret = btrfs_update_block_group(trans, bytenr, num_bytes, false);
 	if (ret)
 		btrfs_abort_transaction(trans, ret);
 	return ret;
 }
 /*
 * Drop one or more refs of @node.
 *
@ -2943,7 +2963,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 	u64 refs;
 	u64 bytenr = node->bytenr;
 	u64 num_bytes = node->num_bytes;
 	int last_ref = 0;
 	bool skinny_metadata = btrfs_fs_incompat(info, SKINNY_METADATA);
 	extent_root = btrfs_extent_root(info, bytenr);
@ -3010,8 +3029,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 			}
 			/* Must be SHARED_* item, remove the backref first */
 			ret = remove_extent_backref(trans, extent_root, path,
-						    NULL, refs_to_drop, is_data,
+						    NULL, refs_to_drop, is_data);
 						    &last_ref);
 			if (ret) {
 				btrfs_abort_transaction(trans, ret);
 				goto out;
@ -3136,8 +3154,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 		}
 		if (found_extent) {
 			ret = remove_extent_backref(trans, extent_root, path,
-						    iref, refs_to_drop, is_data,
+						    iref, refs_to_drop, is_data);
 						    &last_ref);
 			if (ret) {
 				btrfs_abort_transaction(trans, ret);
 				goto out;
@ -3182,7 +3199,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 			}
 		}
 		last_ref = 1;
 		ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
 				      num_to_del);
 		if (ret) {
@ -3191,28 +3207,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 		}
 		btrfs_release_path(path);
-		if (is_data) {
+		ret = do_free_extent_accounting(trans, bytenr, num_bytes, is_data);
 			struct btrfs_root *csum_root;
 			csum_root = btrfs_csum_root(info, bytenr);
 			ret = btrfs_del_csums(trans, csum_root, bytenr,
 					      num_bytes);
 			if (ret) {
 				btrfs_abort_transaction(trans, ret);
 				goto out;
 			}
 		}
 		ret = add_to_free_space_tree(trans, bytenr, num_bytes);
 		if (ret) {
 			btrfs_abort_transaction(trans, ret);
 			goto out;
 		}
 		ret = btrfs_update_block_group(trans, bytenr, num_bytes, false);
 		if (ret) {
 			btrfs_abort_transaction(trans, ret);
 			goto out;
 		}
 	}
 	btrfs_release_path(path);
@ -4605,6 +4600,28 @@ int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start,
 	return ret;
 }
 static int alloc_reserved_extent(struct btrfs_trans_handle *trans, u64 bytenr,
 				 u64 num_bytes)
 {
 	struct btrfs_fs_info *fs_info = trans->fs_info;
 	int ret;
 	ret = remove_from_free_space_tree(trans, bytenr, num_bytes);
 	if (ret)
 		return ret;
 	ret = btrfs_update_block_group(trans, bytenr, num_bytes, true);
 	if (ret) {
 		ASSERT(!ret);
 		btrfs_err(fs_info, "update block group failed for %llu %llu",
 			  bytenr, num_bytes);
 		return ret;
 	}
 	trace_btrfs_reserved_extent_alloc(fs_info, bytenr, num_bytes);
 	return 0;
 }
 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
 				      u64 parent, u64 root_objectid,
 				      u64 flags, u64 owner, u64 offset,
@ -4665,18 +4682,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
 	btrfs_mark_buffer_dirty(path->nodes[0]);
 	btrfs_free_path(path);
-	ret = remove_from_free_space_tree(trans, ins->objectid, ins->offset);
+	return alloc_reserved_extent(trans, ins->objectid, ins->offset);
 	if (ret)
 		return ret;
 	ret = btrfs_update_block_group(trans, ins->objectid, ins->offset, true);
 	if (ret) { /* -ENOENT, logic error */
 		btrfs_err(fs_info, "update block group failed for %llu %llu",
 			ins->objectid, ins->offset);
 		BUG();
 	}
 	trace_btrfs_reserved_extent_alloc(fs_info, ins->objectid, ins->offset);
 	return ret;
 }
 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
@ -4694,7 +4700,6 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
 	struct extent_buffer *leaf;
 	struct btrfs_delayed_tree_ref *ref;
 	u32 size = sizeof(*extent_item) + sizeof(*iref);
 	u64 num_bytes;
 	u64 flags = extent_op->flags_to_set;
 	bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
@ -4704,12 +4709,10 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
 	if (skinny_metadata) {
 		extent_key.offset = ref->level;
 		extent_key.type = BTRFS_METADATA_ITEM_KEY;
 		num_bytes = fs_info->nodesize;
 	} else {
 		extent_key.offset = node->num_bytes;
 		extent_key.type = BTRFS_EXTENT_ITEM_KEY;
 		size += sizeof(*block_info);
 		num_bytes = node->num_bytes;
 	}
 	path = btrfs_alloc_path();
@ -4754,22 +4757,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
 	btrfs_mark_buffer_dirty(leaf);
 	btrfs_free_path(path);
-	ret = remove_from_free_space_tree(trans, extent_key.objectid,
+	return alloc_reserved_extent(trans, node->bytenr, fs_info->nodesize);
 					  num_bytes);
 	if (ret)
 		return ret;
 	ret = btrfs_update_block_group(trans, extent_key.objectid,
 				       fs_info->nodesize, true);
 	if (ret) { /* -ENOENT, logic error */
 		btrfs_err(fs_info, "update block group failed for %llu %llu",
 			extent_key.objectid, extent_key.offset);
 		BUG();
 	}
 	trace_btrfs_reserved_extent_alloc(fs_info, extent_key.objectid,
 					  fs_info->nodesize);
 	return ret;
 }
 int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@ -2610,6 +2610,7 @@ static bool btrfs_check_repairable(struct inode *inode,
 	 * a good copy of the failed sector and if we succeed, we have setup
 	 * everything for repair_io_failure to do the rest for us.
 	 */
 	ASSERT(failed_mirror);
 	failrec->failed_mirror = failed_mirror;
 	failrec->this_mirror++;
 	if (failrec->this_mirror == failed_mirror)
@ -2639,7 +2640,6 @@ int btrfs_repair_one_sector(struct inode *inode,
 	const int icsum = bio_offset >> fs_info->sectorsize_bits;
 	struct bio *repair_bio;
 	struct btrfs_bio *repair_bbio;
 	blk_status_t status;
 	btrfs_debug(fs_info,
 		   "repair read error: read error at %llu", start);
@ -2678,13 +2678,13 @@ int btrfs_repair_one_sector(struct inode *inode,
 		    "repair read error: submitting new read to mirror %d",
 		    failrec->this_mirror);
-	status = submit_bio_hook(inode, repair_bio, failrec->this_mirror,
+	/*
-				 failrec->bio_flags);
+	 * At this point we have a bio, so any errors from submit_bio_hook()
-	if (status) {
+	 * will be handled by the endio on the repair_bio, so we can't return an
-		free_io_failure(failure_tree, tree, failrec);
+	 * error here.
-		bio_put(repair_bio);
+	 */
-	}
+	submit_bio_hook(inode, repair_bio, failrec->this_mirror, failrec->bio_flags);
-	return blk_status_to_errno(status);
+	return BLK_STS_OK;
 }
 static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
@ -3067,6 +3067,14 @@ static void end_bio_extent_readpage(struct bio *bio)
 			goto readpage_ok;
 		if (is_data_inode(inode)) {
 			/*
 			 * If we failed to submit the IO at all we'll have a
 			 * mirror_num == 0, in which case we need to just mark
 			 * the page with an error and unlock it and carry on.
 			 */
 			if (mirror == 0)
 				goto readpage_ok;
 			/*
 			 * btrfs_submit_read_repair() will handle all the good
 			 * and bad sectors, we just continue to the next bvec.
@ -3534,7 +3542,7 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
 	}
 	em = btrfs_get_extent(BTRFS_I(inode), page, pg_offset, start, len);
-	if (em_cached && !IS_ERR_OR_NULL(em)) {
+	if (em_cached && !IS_ERR(em)) {
 		BUG_ON(*em_cached);
 		refcount_inc(&em->refs);
 		*em_cached = em;
@ -3563,7 +3571,6 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
 	u64 cur_end;
 	struct extent_map *em;
 	int ret = 0;
 	int nr = 0;
 	size_t pg_offset = 0;
 	size_t iosize;
 	size_t blocksize = inode->i_sb->s_blocksize;
@ -3608,9 +3615,10 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
 		}
 		em = __get_extent_map(inode, page, pg_offset, cur,
 				      end - cur + 1, em_cached);
-		if (IS_ERR_OR_NULL(em)) {
+		if (IS_ERR(em)) {
 			unlock_extent(tree, cur, end);
 			end_page_read(page, false, cur, end + 1 - cur);
 			ret = PTR_ERR(em);
 			break;
 		}
 		extent_offset = cur - em->start;
@ -3721,9 +3729,7 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
 					 end_bio_extent_readpage, 0,
 					 this_bio_flag,
 					 force_bio_submit);
-		if (!ret) {
+		if (ret) {
 			nr++;
 		} else {
 			unlock_extent(tree, cur, cur + iosize - 1);
 			end_page_read(page, false, cur, iosize);
 			goto out;
@ -3951,7 +3957,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
 		}
 		em = btrfs_get_extent(inode, NULL, 0, cur, end - cur + 1);
-		if (IS_ERR_OR_NULL(em)) {
+		if (IS_ERR(em)) {
 			btrfs_page_set_error(fs_info, page, cur, end - cur + 1);
 			ret = PTR_ERR_OR_ZERO(em);
 			break;
@ -4780,11 +4786,12 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc,
 		return ret;
 	}
 	if (cache) {
-		/* Impiles write in zoned mode */
+		/*
-		btrfs_put_block_group(cache);
+		 * Implies write in zoned mode. Mark the last eb in a block group.
-		/* Mark the last eb in a block group */
+		 */
 		if (cache->seq_zone && eb->start + eb->len == cache->zone_capacity)
 			set_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags);
 		btrfs_put_block_group(cache);
 	}
 	ret = write_one_eb(eb, wbc, epd);
 	free_extent_buffer(eb);
@ -5390,7 +5397,7 @@ static struct extent_map *get_extent_skip_holes(struct btrfs_inode *inode,
 			break;
 		len = ALIGN(len, sectorsize);
 		em = btrfs_get_extent_fiemap(inode, offset, len);
-		if (IS_ERR_OR_NULL(em))
+		if (IS_ERR(em))
 			return em;
 		/* if this isn't a hole return it */
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@ -492,6 +492,8 @@ struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
 */
 void remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
 {
 	lockdep_assert_held_write(&tree->lock);
 	WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags));
 	rb_erase_cached(&em->rb_node, &tree->map);
 	if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags))
@ -506,6 +508,8 @@ void replace_extent_mapping(struct extent_map_tree *tree,
 			    struct extent_map *new,
 			    int modified)
 {
 	lockdep_assert_held_write(&tree->lock);
 	WARN_ON(test_bit(EXTENT_FLAG_PINNED, &cur->flags));
 	ASSERT(extent_map_in_tree(cur));
 	if (!test_bit(EXTENT_FLAG_LOGGING, &cur->flags))
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@ -305,7 +305,7 @@ static int search_csum_tree(struct btrfs_fs_info *fs_info,
 	read_extent_buffer(path->nodes[0], dst, (unsigned long)item,
 			ret * csum_size);
 out:
-	if (ret == -ENOENT)
+	if (ret == -ENOENT || ret == -EFBIG)
 		ret = 0;
 	return ret;
 }
@ -368,6 +368,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
 	struct btrfs_bio *bbio = NULL;
 	struct btrfs_path *path;
 	const u32 sectorsize = fs_info->sectorsize;
 	const u32 csum_size = fs_info->csum_size;
@ -377,6 +378,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
 	u8 *csum;
 	const unsigned int nblocks = orig_len >> fs_info->sectorsize_bits;
 	int count = 0;
 	blk_status_t ret = BLK_STS_OK;
 	if ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) ||
 	    test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state))
@ -400,7 +402,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
 		return BLK_STS_RESOURCE;
 	if (!dst) {
-		struct btrfs_bio *bbio = btrfs_bio(bio);
+		bbio = btrfs_bio(bio);
 		if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
 			bbio->csum = kmalloc_array(nblocks, csum_size, GFP_NOFS);
@ -456,21 +458,27 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
 		count = search_csum_tree(fs_info, path, cur_disk_bytenr,
 					 search_len, csum_dst);
-		if (count <= 0) {
+		if (count < 0) {
-			/*
+			ret = errno_to_blk_status(count);
-			 * Either we hit a critical error or we didn't find
+			if (bbio)
-			 * the csum.
+				btrfs_bio_free_csum(bbio);
-			 * Either way, we put zero into the csums dst, and skip
+			break;
-			 * to the next sector.
+		}
-			 */
+
 		/*
 		 * We didn't find a csum for this range.  We need to make sure
 		 * we complain loudly about this, because we are not NODATASUM.
 		 *
 		 * However for the DATA_RELOC inode we could potentially be
 		 * relocating data extents for a NODATASUM inode, so the inode
 		 * itself won't be marked with NODATASUM, but the extent we're
 		 * copying is in fact NODATASUM.  If we don't find a csum we
 		 * assume this is the case.
 		 */
 		if (count == 0) {
 			memset(csum_dst, 0, csum_size);
 			count = 1;
 			/*
 			 * For data reloc inode, we need to mark the range
 			 * NODATASUM so that balance won't report false csum
 			 * error.
 			 */
 			if (BTRFS_I(inode)->root->root_key.objectid ==
 			    BTRFS_DATA_RELOC_TREE_OBJECTID) {
 				u64 file_offset;
@ -491,7 +499,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
 	}
 	btrfs_free_path(path);
-	return BLK_STS_OK;
+	return ret;
 }
 int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
@ -612,32 +620,33 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
 	return ret;
 }
-/*
+/**
- * btrfs_csum_one_bio - Calculates checksums of the data contained inside a bio
+ * Calculate checksums of the data contained inside a bio
 *
 * @inode:	 Owner of the data inside the bio
 * @bio:	 Contains the data to be checksummed
- * @file_start:  offset in file this bio begins to describe
+ * @offset:      If (u64)-1, @bio may contain discontiguous bio vecs, so the
- * @contig:	 Boolean. If true/1 means all bio vecs in this bio are
+ *               file offsets are determined from the page offsets in the bio.
- *		 contiguous and they begin at @file_start in the file. False/0
+ *               Otherwise, this is the starting file offset of the bio vecs in
- *		 means this bio can contain potentially discontiguous bio vecs
+ *               @bio, which must be contiguous.
- *		 so the logical offset of each should be calculated separately.
+ * @one_ordered: If true, @bio only refers to one ordered extent.
 */
 blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
-		       u64 file_start, int contig)
+				u64 offset, bool one_ordered)
 {
 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
 	SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
 	struct btrfs_ordered_sum *sums;
 	struct btrfs_ordered_extent *ordered = NULL;
 	const bool use_page_offsets = (offset == (u64)-1);
 	char *data;
 	struct bvec_iter iter;
 	struct bio_vec bvec;
 	int index;
-	int nr_sectors;
+	unsigned int blockcount;
 	unsigned long total_bytes = 0;
 	unsigned long this_sum_bytes = 0;
 	int i;
 	u64 offset;
 	unsigned nofs_flag;
 	nofs_flag = memalloc_nofs_save();
@ -651,18 +660,13 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
 	sums->len = bio->bi_iter.bi_size;
 	INIT_LIST_HEAD(&sums->list);
 	if (contig)
 		offset = file_start;
 	else
 		offset = 0; /* shut up gcc */
 	sums->bytenr = bio->bi_iter.bi_sector << 9;
 	index = 0;
 	shash->tfm = fs_info->csum_shash;
 	bio_for_each_segment(bvec, bio, iter) {
-		if (!contig)
+		if (use_page_offsets)
 			offset = page_offset(bvec.bv_page) + bvec.bv_offset;
 		if (!ordered) {
@ -681,13 +685,14 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
 			}
 		}
-		nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info,
+		blockcount = BTRFS_BYTES_TO_BLKS(fs_info,
 						 bvec.bv_len + fs_info->sectorsize
 						 - 1);
-		for (i = 0; i < nr_sectors; i++) {
+		for (i = 0; i < blockcount; i++) {
-			if (offset >= ordered->file_offset + ordered->num_bytes ||
+			if (!one_ordered &&
-			    offset < ordered->file_offset) {
+			    !in_range(offset, ordered->file_offset,
 				      ordered->num_bytes)) {
 				unsigned long bytes_left;
 				sums->len = this_sum_bytes;
@ -1211,6 +1216,7 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
 	extent_start = key.offset;
 	extent_end = btrfs_file_extent_end(path);
 	em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
 	em->generation = btrfs_file_extent_generation(leaf, fi);
 	if (type == BTRFS_FILE_EXTENT_REG ||
 	    type == BTRFS_FILE_EXTENT_PREALLOC) {
 		em->start = extent_start;
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@ -691,7 +691,6 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
 	int modify_tree = -1;
 	int update_refs;
 	int found = 0;
 	int leafs_visited = 0;
 	struct btrfs_path *path = args->path;
 	args->bytes_found = 0;
@ -729,7 +728,6 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
 				path->slots[0]--;
 		}
 		ret = 0;
 		leafs_visited++;
 next_slot:
 		leaf = path->nodes[0];
 		if (path->slots[0] >= btrfs_header_nritems(leaf)) {
@ -741,7 +739,6 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
 				ret = 0;
 				break;
 			}
 			leafs_visited++;
 			leaf = path->nodes[0];
 			recow = 1;
 		}
@ -987,7 +984,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
 	 * which case it unlocked our path, so check path->locks[0] matches a
 	 * write lock.
 	 */
-	if (!ret && args->replace_extent && leafs_visited == 1 &&
+	if (!ret && args->replace_extent &&
 	    path->locks[0] == BTRFS_WRITE_LOCK &&
 	    btrfs_leaf_free_space(leaf) >=
 	    sizeof(struct btrfs_item) + args->extent_item_size) {
@ -1722,7 +1719,8 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
 					 fs_info->sectorsize);
 		WARN_ON(reserve_bytes == 0);
 		ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
-				reserve_bytes);
+						      reserve_bytes,
 						      reserve_bytes);
 		if (ret) {
 			if (!only_release_metadata)
 				btrfs_free_reserved_data_space(BTRFS_I(inode),
@ -2039,12 +2037,43 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
 	return err < 0 ? err : written;
 }
-static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
+static ssize_t btrfs_encoded_write(struct kiocb *iocb, struct iov_iter *from,
-				    struct iov_iter *from)
+			const struct btrfs_ioctl_encoded_io_args *encoded)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file_inode(file);
 	loff_t count;
 	ssize_t ret;
 	btrfs_inode_lock(inode, 0);
 	count = encoded->len;
 	ret = generic_write_checks_count(iocb, &count);
 	if (ret == 0 && count != encoded->len) {
 		/*
 		 * The write got truncated by generic_write_checks_count(). We
 		 * can't do a partial encoded write.
 		 */
 		ret = -EFBIG;
 	}
 	if (ret || encoded->len == 0)
 		goto out;
 	ret = btrfs_write_check(iocb, from, encoded->len);
 	if (ret < 0)
 		goto out;
 	ret = btrfs_do_encoded_write(iocb, from, encoded);
 out:
 	btrfs_inode_unlock(inode, 0);
 	return ret;
 }
 ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
 			    const struct btrfs_ioctl_encoded_io_args *encoded)
 {
 	struct file *file = iocb->ki_filp;
 	struct btrfs_inode *inode = BTRFS_I(file_inode(file));
-	ssize_t num_written = 0;
+	ssize_t num_written, num_sync;
 	const bool sync = iocb->ki_flags & IOCB_DSYNC;
 	/*
@ -2055,22 +2084,28 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
 	if (BTRFS_FS_ERROR(inode->root->fs_info))
 		return -EROFS;
-	if (!(iocb->ki_flags & IOCB_DIRECT) &&
+	if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
 	    (iocb->ki_flags & IOCB_NOWAIT))
 		return -EOPNOTSUPP;
 	if (sync)
 		atomic_inc(&inode->sync_writers);
-	if (iocb->ki_flags & IOCB_DIRECT)
+	if (encoded) {
-		num_written = btrfs_direct_write(iocb, from);
+		num_written = btrfs_encoded_write(iocb, from, encoded);
-	else
+		num_sync = encoded->len;
-		num_written = btrfs_buffered_write(iocb, from);
+	} else if (iocb->ki_flags & IOCB_DIRECT) {
 		num_written = num_sync = btrfs_direct_write(iocb, from);
 	} else {
 		num_written = num_sync = btrfs_buffered_write(iocb, from);
 	}
 	btrfs_set_inode_last_sub_trans(inode);
-	if (num_written > 0)
+	if (num_sync > 0) {
-		num_written = generic_write_sync(iocb, num_written);
+		num_sync = generic_write_sync(iocb, num_sync);
 		if (num_sync < 0)
 			num_written = num_sync;
 	}
 	if (sync)
 		atomic_dec(&inode->sync_writers);
@ -2079,6 +2114,11 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
 	return num_written;
 }
 static ssize_t btrfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 {
 	return btrfs_do_write_iter(iocb, from, NULL);
 }
 int btrfs_release_file(struct inode *inode, struct file *filp)
 {
 	struct btrfs_file_private *private = filp->private_data;
@ -2474,7 +2514,7 @@ static int fill_holes(struct btrfs_trans_handle *trans,
 	hole_em = alloc_extent_map();
 	if (!hole_em) {
 		btrfs_drop_extent_cache(inode, offset, end - 1, 0);
-		set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
+		btrfs_set_inode_full_sync(inode);
 	} else {
 		hole_em->start = offset;
 		hole_em->len = end - offset;
@ -2495,8 +2535,7 @@ static int fill_holes(struct btrfs_trans_handle *trans,
 		} while (ret == -EEXIST);
 		free_extent_map(hole_em);
 		if (ret)
-			set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+			btrfs_set_inode_full_sync(inode);
 					&inode->runtime_flags);
 	}
 	return 0;
@ -2850,7 +2889,7 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
 	 * maps for the replacement extents (or holes).
 	 */
 	if (extent_info && !extent_info->is_new_extent)
-		set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
+		btrfs_set_inode_full_sync(inode);
 	if (ret)
 		goto out_trans;
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@ -25,6 +25,8 @@ static struct btrfs_root *btrfs_free_space_root(
 		.offset = 0,
 	};
 	if (btrfs_fs_incompat(block_group->fs_info, EXTENT_TREE_V2))
 		key.offset = block_group->global_root_id;
 	return btrfs_global_root(block_group->fs_info, &key);
 }
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@ -28,6 +28,7 @@
 #include <linux/iversion.h>
 #include <linux/fileattr.h>
 #include <linux/fsverity.h>
 #include <linux/sched/xacct.h>
 #include "ctree.h"
 #include "disk-io.h"
 #include "export.h"
@ -88,6 +89,24 @@ struct btrfs_ioctl_send_args_32 {
 #define BTRFS_IOC_SEND_32 _IOW(BTRFS_IOCTL_MAGIC, 38, \
 			       struct btrfs_ioctl_send_args_32)
 struct btrfs_ioctl_encoded_io_args_32 {
 	compat_uptr_t iov;
 	compat_ulong_t iovcnt;
 	__s64 offset;
 	__u64 flags;
 	__u64 len;
 	__u64 unencoded_len;
 	__u64 unencoded_offset;
 	__u32 compression;
 	__u32 encryption;
 	__u8 reserved[64];
 };
 #define BTRFS_IOC_ENCODED_READ_32 _IOR(BTRFS_IOCTL_MAGIC, 64, \
 				       struct btrfs_ioctl_encoded_io_args_32)
 #define BTRFS_IOC_ENCODED_WRITE_32 _IOW(BTRFS_IOCTL_MAGIC, 64, \
 					struct btrfs_ioctl_encoded_io_args_32)
 #endif
 /* Mask out flags that are inappropriate for the given type of inode. */
@ -440,10 +459,8 @@ void btrfs_exclop_balance(struct btrfs_fs_info *fs_info,
 	}
 }
-static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
+static int btrfs_ioctl_getversion(struct inode *inode, int __user *arg)
 {
 	struct inode *inode = file_inode(file);
 	return put_user(inode->i_generation, arg);
 }
@ -753,6 +770,13 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
 	struct btrfs_trans_handle *trans;
 	int ret;
 	/* We do not support snapshotting right now. */
 	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
 		btrfs_warn(fs_info,
 			   "extent tree v2 doesn't support snapshotting yet");
 		return -EOPNOTSUPP;
 	}
 	if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
 		return -EINVAL;
@ -1522,6 +1546,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
 }
 #define CLUSTER_SIZE	(SZ_256K)
 static_assert(IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE));
 /*
 * Defrag one contiguous target range.
@ -1667,7 +1692,6 @@ static int defrag_one_cluster(struct btrfs_inode *inode,
 	LIST_HEAD(target_list);
 	int ret;
 	BUILD_BUG_ON(!IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE));
 	ret = defrag_collect_targets(inode, start, len, extent_thresh,
 				     newer_than, do_compress, false,
 				     &target_list, NULL);
@ -1810,9 +1834,6 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
 		u64 last_scanned = cur;
 		u64 cluster_end;
 		/* The cluster size 256K should always be page aligned */
 		BUILD_BUG_ON(!IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE));
 		if (btrfs_defrag_cancelled(fs_info)) {
 			ret = -EAGAIN;
 			break;
@ -2229,10 +2250,9 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
 	return ret;
 }
-static noinline int btrfs_ioctl_subvol_getflags(struct file *file,
+static noinline int btrfs_ioctl_subvol_getflags(struct inode *inode,
 						void __user *arg)
 {
 	struct inode *inode = file_inode(file);
 	struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	int ret = 0;
@ -2562,12 +2582,11 @@ static noinline int search_ioctl(struct inode *inode,
 	return ret;
 }
-static noinline int btrfs_ioctl_tree_search(struct file *file,
+static noinline int btrfs_ioctl_tree_search(struct inode *inode,
-					   void __user *argp)
+					    void __user *argp)
 {
 	struct btrfs_ioctl_search_args __user *uargs;
 	struct btrfs_ioctl_search_key sk;
 	struct inode *inode;
 	int ret;
 	size_t buf_size;
@ -2581,7 +2600,6 @@ static noinline int btrfs_ioctl_tree_search(struct file *file,
 	buf_size = sizeof(uargs->buf);
 	inode = file_inode(file);
 	ret = search_ioctl(inode, &sk, &buf_size, uargs->buf);
 	/*
@ -2596,12 +2614,11 @@ static noinline int btrfs_ioctl_tree_search(struct file *file,
 	return ret;
 }
-static noinline int btrfs_ioctl_tree_search_v2(struct file *file,
+static noinline int btrfs_ioctl_tree_search_v2(struct inode *inode,
 					       void __user *argp)
 {
 	struct btrfs_ioctl_search_args_v2 __user *uarg;
 	struct btrfs_ioctl_search_args_v2 args;
 	struct inode *inode;
 	int ret;
 	size_t buf_size;
 	const size_t buf_limit = SZ_16M;
@ -2620,7 +2637,6 @@ static noinline int btrfs_ioctl_tree_search_v2(struct file *file,
 	if (buf_size > buf_limit)
 		buf_size = buf_limit;
 	inode = file_inode(file);
 	ret = search_ioctl(inode, &args.key, &buf_size,
 			   (char __user *)(&uarg->buf[0]));
 	if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key)))
@ -2871,25 +2887,22 @@ static int btrfs_search_path_in_tree_user(struct user_namespace *mnt_userns,
 	return ret;
 }
-static noinline int btrfs_ioctl_ino_lookup(struct file *file,
+static noinline int btrfs_ioctl_ino_lookup(struct btrfs_root *root,
 					   void __user *argp)
 {
 	struct btrfs_ioctl_ino_lookup_args *args;
 	struct inode *inode;
 	int ret = 0;
 	args = memdup_user(argp, sizeof(*args));
 	if (IS_ERR(args))
 		return PTR_ERR(args);
 	inode = file_inode(file);
 	/*
 	 * Unprivileged query to obtain the containing subvolume root id. The
 	 * path is reset so it's consistent with btrfs_search_path_in_tree.
 	 */
 	if (args->treeid == 0)
-		args->treeid = BTRFS_I(inode)->root->root_key.objectid;
+		args->treeid = root->root_key.objectid;
 	if (args->objectid == BTRFS_FIRST_FREE_OBJECTID) {
 		args->name[0] = 0;
@ -2901,7 +2914,7 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
 		goto out;
 	}
-	ret = btrfs_search_path_in_tree(BTRFS_I(inode)->root->fs_info,
+	ret = btrfs_search_path_in_tree(root->fs_info,
 					args->treeid, args->objectid,
 					args->name);
@ -2957,7 +2970,7 @@ static int btrfs_ioctl_ino_lookup_user(struct file *file, void __user *argp)
 }
 /* Get the subvolume information in BTRFS_ROOT_ITEM and BTRFS_ROOT_BACKREF */
-static int btrfs_ioctl_get_subvol_info(struct file *file, void __user *argp)
+static int btrfs_ioctl_get_subvol_info(struct inode *inode, void __user *argp)
 {
 	struct btrfs_ioctl_get_subvol_info_args *subvol_info;
 	struct btrfs_fs_info *fs_info;
@ -2969,7 +2982,6 @@ static int btrfs_ioctl_get_subvol_info(struct file *file, void __user *argp)
 	struct extent_buffer *leaf;
 	unsigned long item_off;
 	unsigned long item_len;
 	struct inode *inode;
 	int slot;
 	int ret = 0;
@ -2983,7 +2995,6 @@ static int btrfs_ioctl_get_subvol_info(struct file *file, void __user *argp)
 		return -ENOMEM;
 	}
 	inode = file_inode(file);
 	fs_info = BTRFS_I(inode)->root->fs_info;
 	/* Get root_item of inode's subvolume */
@ -3077,15 +3088,14 @@ static int btrfs_ioctl_get_subvol_info(struct file *file, void __user *argp)
 * Return ROOT_REF information of the subvolume containing this inode
 * except the subvolume name.
 */
-static int btrfs_ioctl_get_subvol_rootref(struct file *file, void __user *argp)
+static int btrfs_ioctl_get_subvol_rootref(struct btrfs_root *root,
 					  void __user *argp)
 {
 	struct btrfs_ioctl_get_subvol_rootref_args *rootrefs;
 	struct btrfs_root_ref *rref;
 	struct btrfs_root *root;
 	struct btrfs_path *path;
 	struct btrfs_key key;
 	struct extent_buffer *leaf;
 	struct inode *inode;
 	u64 objectid;
 	int slot;
 	int ret;
@ -3101,15 +3111,13 @@ static int btrfs_ioctl_get_subvol_rootref(struct file *file, void __user *argp)
 		return PTR_ERR(rootrefs);
 	}
-	inode = file_inode(file);
+	objectid = root->root_key.objectid;
 	root = BTRFS_I(inode)->root->fs_info->tree_root;
 	objectid = BTRFS_I(inode)->root->root_key.objectid;
 	key.objectid = objectid;
 	key.type = BTRFS_ROOT_REF_KEY;
 	key.offset = rootrefs->min_treeid;
 	found = 0;
 	root = root->fs_info->tree_root;
 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
 	if (ret < 0) {
 		goto out;
@ -3189,6 +3197,13 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
 	int err = 0;
 	bool destroy_parent = false;
 	/* We don't support snapshots with extent tree v2 yet. */
 	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
 		btrfs_err(fs_info,
 			  "extent tree v2 doesn't support snapshot deletion yet");
 		return -EOPNOTSUPP;
 	}
 	if (destroy_v2) {
 		vol_args2 = memdup_user(arg, sizeof(*vol_args2));
 		if (IS_ERR(vol_args2))
@ -3464,6 +3479,11 @@ static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg)
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
 		btrfs_err(fs_info, "device add not supported on extent tree v2 yet");
 		return -EINVAL;
 	}
 	if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_DEV_ADD)) {
 		if (!btrfs_exclop_start_try_lock(fs_info, BTRFS_EXCLOP_DEV_ADD))
 			return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
@ -3989,6 +4009,11 @@ static long btrfs_ioctl_scrub(struct file *file, void __user *arg)
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
 		btrfs_err(fs_info, "scrub is not supported on extent tree v2 yet");
 		return -EINVAL;
 	}
 	sa = memdup_user(arg, sizeof(*sa));
 	if (IS_ERR(sa))
 		return PTR_ERR(sa);
@ -4088,6 +4113,11 @@ static long btrfs_ioctl_dev_replace(struct btrfs_fs_info *fs_info,
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
 		btrfs_err(fs_info, "device replace not supported on extent tree v2 yet");
 		return -EINVAL;
 	}
 	p = memdup_user(arg, sizeof(*p));
 	if (IS_ERR(p))
 		return PTR_ERR(p);
@ -5149,7 +5179,7 @@ static int btrfs_ioctl_set_features(struct file *file, void __user *arg)
 	return ret;
 }
-static int _btrfs_ioctl_send(struct file *file, void __user *argp, bool compat)
+static int _btrfs_ioctl_send(struct inode *inode, void __user *argp, bool compat)
 {
 	struct btrfs_ioctl_send_args *arg;
 	int ret;
@ -5179,11 +5209,194 @@ static int _btrfs_ioctl_send(struct file *file, void __user *argp, bool compat)
 		if (IS_ERR(arg))
 			return PTR_ERR(arg);
 	}
-	ret = btrfs_ioctl_send(file, arg);
+	ret = btrfs_ioctl_send(inode, arg);
 	kfree(arg);
 	return ret;
 }
 static int btrfs_ioctl_encoded_read(struct file *file, void __user *argp,
 				    bool compat)
 {
 	struct btrfs_ioctl_encoded_io_args args = { 0 };
 	size_t copy_end_kernel = offsetofend(struct btrfs_ioctl_encoded_io_args,
 					     flags);
 	size_t copy_end;
 	struct iovec iovstack[UIO_FASTIOV];
 	struct iovec *iov = iovstack;
 	struct iov_iter iter;
 	loff_t pos;
 	struct kiocb kiocb;
 	ssize_t ret;
 	if (!capable(CAP_SYS_ADMIN)) {
 		ret = -EPERM;
 		goto out_acct;
 	}
 	if (compat) {
 #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
 		struct btrfs_ioctl_encoded_io_args_32 args32;
 		copy_end = offsetofend(struct btrfs_ioctl_encoded_io_args_32,
 				       flags);
 		if (copy_from_user(&args32, argp, copy_end)) {
 			ret = -EFAULT;
 			goto out_acct;
 		}
 		args.iov = compat_ptr(args32.iov);
 		args.iovcnt = args32.iovcnt;
 		args.offset = args32.offset;
 		args.flags = args32.flags;
 #else
 		return -ENOTTY;
 #endif
 	} else {
 		copy_end = copy_end_kernel;
 		if (copy_from_user(&args, argp, copy_end)) {
 			ret = -EFAULT;
 			goto out_acct;
 		}
 	}
 	if (args.flags != 0) {
 		ret = -EINVAL;
 		goto out_acct;
 	}
 	ret = import_iovec(READ, args.iov, args.iovcnt, ARRAY_SIZE(iovstack),
 			   &iov, &iter);
 	if (ret < 0)
 		goto out_acct;
 	if (iov_iter_count(&iter) == 0) {
 		ret = 0;
 		goto out_iov;
 	}
 	pos = args.offset;
 	ret = rw_verify_area(READ, file, &pos, args.len);
 	if (ret < 0)
 		goto out_iov;
 	init_sync_kiocb(&kiocb, file);
 	kiocb.ki_pos = pos;
 	ret = btrfs_encoded_read(&kiocb, &iter, &args);
 	if (ret >= 0) {
 		fsnotify_access(file);
 		if (copy_to_user(argp + copy_end,
 				 (char *)&args + copy_end_kernel,
 				 sizeof(args) - copy_end_kernel))
 			ret = -EFAULT;
 	}
 out_iov:
 	kfree(iov);
 out_acct:
 	if (ret > 0)
 		add_rchar(current, ret);
 	inc_syscr(current);
 	return ret;
 }
 static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, bool compat)
 {
 	struct btrfs_ioctl_encoded_io_args args;
 	struct iovec iovstack[UIO_FASTIOV];
 	struct iovec *iov = iovstack;
 	struct iov_iter iter;
 	loff_t pos;
 	struct kiocb kiocb;
 	ssize_t ret;
 	if (!capable(CAP_SYS_ADMIN)) {
 		ret = -EPERM;
 		goto out_acct;
 	}
 	if (!(file->f_mode & FMODE_WRITE)) {
 		ret = -EBADF;
 		goto out_acct;
 	}
 	if (compat) {
 #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
 		struct btrfs_ioctl_encoded_io_args_32 args32;
 		if (copy_from_user(&args32, argp, sizeof(args32))) {
 			ret = -EFAULT;
 			goto out_acct;
 		}
 		args.iov = compat_ptr(args32.iov);
 		args.iovcnt = args32.iovcnt;
 		args.offset = args32.offset;
 		args.flags = args32.flags;
 		args.len = args32.len;
 		args.unencoded_len = args32.unencoded_len;
 		args.unencoded_offset = args32.unencoded_offset;
 		args.compression = args32.compression;
 		args.encryption = args32.encryption;
 		memcpy(args.reserved, args32.reserved, sizeof(args.reserved));
 #else
 		return -ENOTTY;
 #endif
 	} else {
 		if (copy_from_user(&args, argp, sizeof(args))) {
 			ret = -EFAULT;
 			goto out_acct;
 		}
 	}
 	ret = -EINVAL;
 	if (args.flags != 0)
 		goto out_acct;
 	if (memchr_inv(args.reserved, 0, sizeof(args.reserved)))
 		goto out_acct;
 	if (args.compression == BTRFS_ENCODED_IO_COMPRESSION_NONE &&
 	    args.encryption == BTRFS_ENCODED_IO_ENCRYPTION_NONE)
 		goto out_acct;
 	if (args.compression >= BTRFS_ENCODED_IO_COMPRESSION_TYPES ||
 	    args.encryption >= BTRFS_ENCODED_IO_ENCRYPTION_TYPES)
 		goto out_acct;
 	if (args.unencoded_offset > args.unencoded_len)
 		goto out_acct;
 	if (args.len > args.unencoded_len - args.unencoded_offset)
 		goto out_acct;
 	ret = import_iovec(WRITE, args.iov, args.iovcnt, ARRAY_SIZE(iovstack),
 			   &iov, &iter);
 	if (ret < 0)
 		goto out_acct;
 	file_start_write(file);
 	if (iov_iter_count(&iter) == 0) {
 		ret = 0;
 		goto out_end_write;
 	}
 	pos = args.offset;
 	ret = rw_verify_area(WRITE, file, &pos, args.len);
 	if (ret < 0)
 		goto out_end_write;
 	init_sync_kiocb(&kiocb, file);
 	ret = kiocb_set_rw_flags(&kiocb, 0);
 	if (ret)
 		goto out_end_write;
 	kiocb.ki_pos = pos;
 	ret = btrfs_do_write_iter(&kiocb, &iter, &args);
 	if (ret > 0)
 		fsnotify_modify(file);
 out_end_write:
 	file_end_write(file);
 	kfree(iov);
 out_acct:
 	if (ret > 0)
 		add_wchar(current, ret);
 	inc_syscw(current);
 	return ret;
 }
 long btrfs_ioctl(struct file *file, unsigned int
 		cmd, unsigned long arg)
 {
@ -5194,7 +5407,7 @@ long btrfs_ioctl(struct file *file, unsigned int
 	switch (cmd) {
 	case FS_IOC_GETVERSION:
-		return btrfs_ioctl_getversion(file, argp);
+		return btrfs_ioctl_getversion(inode, argp);
 	case FS_IOC_GETFSLABEL:
 		return btrfs_ioctl_get_fslabel(fs_info, argp);
 	case FS_IOC_SETFSLABEL:
@ -5214,7 +5427,7 @@ long btrfs_ioctl(struct file *file, unsigned int
 	case BTRFS_IOC_SNAP_DESTROY_V2:
 		return btrfs_ioctl_snap_destroy(file, argp, true);
 	case BTRFS_IOC_SUBVOL_GETFLAGS:
-		return btrfs_ioctl_subvol_getflags(file, argp);
+		return btrfs_ioctl_subvol_getflags(inode, argp);
 	case BTRFS_IOC_SUBVOL_SETFLAGS:
 		return btrfs_ioctl_subvol_setflags(file, argp);
 	case BTRFS_IOC_DEFAULT_SUBVOL:
@ -5238,11 +5451,11 @@ long btrfs_ioctl(struct file *file, unsigned int
 	case BTRFS_IOC_BALANCE:
 		return btrfs_ioctl_balance(file, NULL);
 	case BTRFS_IOC_TREE_SEARCH:
-		return btrfs_ioctl_tree_search(file, argp);
+		return btrfs_ioctl_tree_search(inode, argp);
 	case BTRFS_IOC_TREE_SEARCH_V2:
-		return btrfs_ioctl_tree_search_v2(file, argp);
+		return btrfs_ioctl_tree_search_v2(inode, argp);
 	case BTRFS_IOC_INO_LOOKUP:
-		return btrfs_ioctl_ino_lookup(file, argp);
+		return btrfs_ioctl_ino_lookup(root, argp);
 	case BTRFS_IOC_INO_PATHS:
 		return btrfs_ioctl_ino_to_path(root, argp);
 	case BTRFS_IOC_LOGICAL_INO:
@ -5289,10 +5502,10 @@ long btrfs_ioctl(struct file *file, unsigned int
 		return btrfs_ioctl_set_received_subvol_32(file, argp);
 #endif
 	case BTRFS_IOC_SEND:
-		return _btrfs_ioctl_send(file, argp, false);
+		return _btrfs_ioctl_send(inode, argp, false);
 #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
 	case BTRFS_IOC_SEND_32:
-		return _btrfs_ioctl_send(file, argp, true);
+		return _btrfs_ioctl_send(inode, argp, true);
 #endif
 	case BTRFS_IOC_GET_DEV_STATS:
 		return btrfs_ioctl_get_dev_stats(fs_info, argp);
@ -5319,15 +5532,25 @@ long btrfs_ioctl(struct file *file, unsigned int
 	case BTRFS_IOC_SET_FEATURES:
 		return btrfs_ioctl_set_features(file, argp);
 	case BTRFS_IOC_GET_SUBVOL_INFO:
-		return btrfs_ioctl_get_subvol_info(file, argp);
+		return btrfs_ioctl_get_subvol_info(inode, argp);
 	case BTRFS_IOC_GET_SUBVOL_ROOTREF:
-		return btrfs_ioctl_get_subvol_rootref(file, argp);
+		return btrfs_ioctl_get_subvol_rootref(root, argp);
 	case BTRFS_IOC_INO_LOOKUP_USER:
 		return btrfs_ioctl_ino_lookup_user(file, argp);
 	case FS_IOC_ENABLE_VERITY:
 		return fsverity_ioctl_enable(file, (const void __user *)argp);
 	case FS_IOC_MEASURE_VERITY:
 		return fsverity_ioctl_measure(file, argp);
 	case BTRFS_IOC_ENCODED_READ:
 		return btrfs_ioctl_encoded_read(file, argp, false);
 	case BTRFS_IOC_ENCODED_WRITE:
 		return btrfs_ioctl_encoded_write(file, argp, false);
 #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
 	case BTRFS_IOC_ENCODED_READ_32:
 		return btrfs_ioctl_encoded_read(file, argp, true);
 	case BTRFS_IOC_ENCODED_WRITE_32:
 		return btrfs_ioctl_encoded_write(file, argp, true);
 #endif
 	}
 	return -ENOTTY;
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@ -55,6 +55,9 @@
 * 0x1000   | SegHdr N+1| Data payload N+1 ...                |
 */
 #define WORKSPACE_BUF_LENGTH	(lzo1x_worst_compress(PAGE_SIZE))
 #define WORKSPACE_CBUF_LENGTH	(lzo1x_worst_compress(PAGE_SIZE))
 struct workspace {
 	void *mem;
 	void *buf;	/* where decompressed data goes */
@ -83,8 +86,8 @@ struct list_head *lzo_alloc_workspace(unsigned int level)
 		return ERR_PTR(-ENOMEM);
 	workspace->mem = kvmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
-	workspace->buf = kvmalloc(lzo1x_worst_compress(PAGE_SIZE), GFP_KERNEL);
+	workspace->buf = kvmalloc(WORKSPACE_BUF_LENGTH, GFP_KERNEL);
-	workspace->cbuf = kvmalloc(lzo1x_worst_compress(PAGE_SIZE), GFP_KERNEL);
+	workspace->cbuf = kvmalloc(WORKSPACE_CBUF_LENGTH, GFP_KERNEL);
 	if (!workspace->mem || !workspace->buf || !workspace->cbuf)
 		goto fail;
@ -380,7 +383,7 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
 		kunmap(cur_page);
 		cur_in += LZO_LEN;
-		if (seg_len > lzo1x_worst_compress(PAGE_SIZE)) {
+		if (seg_len > WORKSPACE_CBUF_LENGTH) {
 			/*
 			 * seg_len shouldn't be larger than we have allocated
 			 * for workspace->cbuf
@ -433,7 +436,7 @@ int lzo_decompress(struct list_head *ws, unsigned char *data_in,
 	struct workspace *workspace = list_entry(ws, struct workspace, list);
 	size_t in_len;
 	size_t out_len;
-	size_t max_segment_len = lzo1x_worst_compress(PAGE_SIZE);
+	size_t max_segment_len = WORKSPACE_BUF_LENGTH;
 	int ret = 0;
 	char *kaddr;
 	unsigned long bytes;
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@ -143,16 +143,28 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
 	return ret;
 }
-/*
+/**
- * Allocate and add a new ordered_extent into the per-inode tree.
+ * Add an ordered extent to the per-inode tree.
 *
- * The tree is given a single reference on the ordered extent that was
+ * @inode:           Inode that this extent is for.
- * inserted.
+ * @file_offset:     Logical offset in file where the extent starts.
 * @num_bytes:       Logical length of extent in file.
 * @ram_bytes:       Full length of unencoded data.
 * @disk_bytenr:     Offset of extent on disk.
 * @disk_num_bytes:  Size of extent on disk.
 * @offset:          Offset into unencoded data where file data starts.
 * @flags:           Flags specifying type of extent (1 << BTRFS_ORDERED_*).
 * @compress_type:   Compression algorithm used for data.
 *
 * Most of these parameters correspond to &struct btrfs_file_extent_item. The
 * tree is given a single reference on the ordered extent that was inserted.
 *
 * Return: 0 or -ENOMEM.
 */
-static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
+int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
-				      u64 disk_bytenr, u64 num_bytes,
+			     u64 num_bytes, u64 ram_bytes, u64 disk_bytenr,
-				      u64 disk_num_bytes, int type, int dio,
+			     u64 disk_num_bytes, u64 offset, unsigned flags,
-				      int compress_type)
+			     int compress_type)
 {
 	struct btrfs_root *root = inode->root;
 	struct btrfs_fs_info *fs_info = root->fs_info;
@ -161,7 +173,8 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset
 	struct btrfs_ordered_extent *entry;
 	int ret;
-	if (type == BTRFS_ORDERED_NOCOW || type == BTRFS_ORDERED_PREALLOC) {
+	if (flags &
 	    ((1 << BTRFS_ORDERED_NOCOW) | (1 << BTRFS_ORDERED_PREALLOC))) {
 		/* For nocow write, we can release the qgroup rsv right now */
 		ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes);
 		if (ret < 0)
@ -181,9 +194,11 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset
 		return -ENOMEM;
 	entry->file_offset = file_offset;
 	entry->disk_bytenr = disk_bytenr;
 	entry->num_bytes = num_bytes;
 	entry->ram_bytes = ram_bytes;
 	entry->disk_bytenr = disk_bytenr;
 	entry->disk_num_bytes = disk_num_bytes;
 	entry->offset = offset;
 	entry->bytes_left = num_bytes;
 	entry->inode = igrab(&inode->vfs_inode);
 	entry->compress_type = compress_type;
@ -191,18 +206,12 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset
 	entry->qgroup_rsv = ret;
 	entry->physical = (u64)-1;
-	ASSERT(type == BTRFS_ORDERED_REGULAR ||
+	ASSERT((flags & ~BTRFS_ORDERED_TYPE_FLAGS) == 0);
-	       type == BTRFS_ORDERED_NOCOW ||
+	entry->flags = flags;
 	       type == BTRFS_ORDERED_PREALLOC ||
 	       type == BTRFS_ORDERED_COMPRESSED);
 	set_bit(type, &entry->flags);
 	percpu_counter_add_batch(&fs_info->ordered_bytes, num_bytes,
 				 fs_info->delalloc_batch);
 	if (dio)
 		set_bit(BTRFS_ORDERED_DIRECT, &entry->flags);
 	/* one ref for the tree */
 	refcount_set(&entry->refs, 1);
 	init_waitqueue_head(&entry->wait);
@ -247,41 +256,6 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset
 	return 0;
 }
 int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
 			     u64 disk_bytenr, u64 num_bytes, u64 disk_num_bytes,
 			     int type)
 {
 	ASSERT(type == BTRFS_ORDERED_REGULAR ||
 	       type == BTRFS_ORDERED_NOCOW ||
 	       type == BTRFS_ORDERED_PREALLOC);
 	return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr,
 					  num_bytes, disk_num_bytes, type, 0,
 					  BTRFS_COMPRESS_NONE);
 }
 int btrfs_add_ordered_extent_dio(struct btrfs_inode *inode, u64 file_offset,
 				 u64 disk_bytenr, u64 num_bytes,
 				 u64 disk_num_bytes, int type)
 {
 	ASSERT(type == BTRFS_ORDERED_REGULAR ||
 	       type == BTRFS_ORDERED_NOCOW ||
 	       type == BTRFS_ORDERED_PREALLOC);
 	return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr,
 					  num_bytes, disk_num_bytes, type, 1,
 					  BTRFS_COMPRESS_NONE);
 }
 int btrfs_add_ordered_extent_compress(struct btrfs_inode *inode, u64 file_offset,
 				      u64 disk_bytenr, u64 num_bytes,
 				      u64 disk_num_bytes, int compress_type)
 {
 	ASSERT(compress_type != BTRFS_COMPRESS_NONE);
 	return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr,
 					  num_bytes, disk_num_bytes,
 					  BTRFS_ORDERED_COMPRESSED, 0,
 					  compress_type);
 }
 /*
 * Add a struct btrfs_ordered_sum into the list of checksums to be inserted
 * when an ordered extent is finished.  If the list covers more than one
@ -548,9 +522,15 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
 	spin_lock(&btrfs_inode->lock);
 	btrfs_mod_outstanding_extents(btrfs_inode, -1);
 	spin_unlock(&btrfs_inode->lock);
-	if (root != fs_info->tree_root)
+	if (root != fs_info->tree_root) {
-		btrfs_delalloc_release_metadata(btrfs_inode, entry->num_bytes,
+		u64 release;
-						false);
+
 		if (test_bit(BTRFS_ORDERED_ENCODED, &entry->flags))
 			release = entry->disk_num_bytes;
 		else
 			release = entry->num_bytes;
 		btrfs_delalloc_release_metadata(btrfs_inode, release, false);
 	}
 	percpu_counter_add_batch(&fs_info->ordered_bytes, -entry->num_bytes,
 				 fs_info->delalloc_batch);
@ -1052,42 +1032,18 @@ static int clone_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pos,
 	struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
 	u64 file_offset = ordered->file_offset + pos;
 	u64 disk_bytenr = ordered->disk_bytenr + pos;
-	u64 num_bytes = len;
+	unsigned long flags = ordered->flags & BTRFS_ORDERED_TYPE_FLAGS;
 	u64 disk_num_bytes = len;
 	int type;
 	unsigned long flags_masked = ordered->flags & ~(1 << BTRFS_ORDERED_DIRECT);
 	int compress_type = ordered->compress_type;
 	unsigned long weight;
 	int ret;
 	weight = hweight_long(flags_masked);
 	WARN_ON_ONCE(weight > 1);
 	if (!weight)
 		type = 0;
 	else
 		type = __ffs(flags_masked);
 	/*
-	 * The splitting extent is already counted and will be added again
+	 * The splitting extent is already counted and will be added again in
-	 * in btrfs_add_ordered_extent_*(). Subtract num_bytes to avoid
+	 * btrfs_add_ordered_extent_*(). Subtract len to avoid double counting.
 	 * double counting.
 	 */
-	percpu_counter_add_batch(&fs_info->ordered_bytes, -num_bytes,
+	percpu_counter_add_batch(&fs_info->ordered_bytes, -len,
 				 fs_info->delalloc_batch);
-	if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered->flags)) {
+	WARN_ON_ONCE(flags & (1 << BTRFS_ORDERED_COMPRESSED));
-		WARN_ON_ONCE(1);
+	return btrfs_add_ordered_extent(BTRFS_I(inode), file_offset, len, len,
-		ret = btrfs_add_ordered_extent_compress(BTRFS_I(inode),
+					disk_bytenr, len, 0, flags,
-				file_offset, disk_bytenr, num_bytes,
+					ordered->compress_type);
 				disk_num_bytes, compress_type);
 	} else if (test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) {
 		ret = btrfs_add_ordered_extent_dio(BTRFS_I(inode), file_offset,
 				disk_bytenr, num_bytes, disk_num_bytes, type);
 	} else {
 		ret = btrfs_add_ordered_extent(BTRFS_I(inode), file_offset,
 				disk_bytenr, num_bytes, disk_num_bytes, type);
 	}
 	return ret;
 }
 int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pre,
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@ -74,8 +74,18 @@ enum {
 	BTRFS_ORDERED_LOGGED_CSUM,
 	/* We wait for this extent to complete in the current transaction */
 	BTRFS_ORDERED_PENDING,
 	/* BTRFS_IOC_ENCODED_WRITE */
 	BTRFS_ORDERED_ENCODED,
 };
 /* BTRFS_ORDERED_* flags that specify the type of the extent. */
 #define BTRFS_ORDERED_TYPE_FLAGS ((1UL << BTRFS_ORDERED_REGULAR) |	\
 				  (1UL << BTRFS_ORDERED_NOCOW) |	\
 				  (1UL << BTRFS_ORDERED_PREALLOC) |	\
 				  (1UL << BTRFS_ORDERED_COMPRESSED) |	\
 				  (1UL << BTRFS_ORDERED_DIRECT) |	\
 				  (1UL << BTRFS_ORDERED_ENCODED))
 struct btrfs_ordered_extent {
 	/* logical offset in the file */
 	u64 file_offset;
@ -84,9 +94,11 @@ struct btrfs_ordered_extent {
 	 * These fields directly correspond to the same fields in
 	 * btrfs_file_extent_item.
 	 */
 	u64 disk_bytenr;
 	u64 num_bytes;
 	u64 ram_bytes;
 	u64 disk_bytenr;
 	u64 disk_num_bytes;
 	u64 offset;
 	/* number of bytes that still need writing */
 	u64 bytes_left;
@ -179,14 +191,9 @@ bool btrfs_dec_test_ordered_pending(struct btrfs_inode *inode,
 				    struct btrfs_ordered_extent **cached,
 				    u64 file_offset, u64 io_size);
 int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
-			     u64 disk_bytenr, u64 num_bytes, u64 disk_num_bytes,
+			     u64 num_bytes, u64 ram_bytes, u64 disk_bytenr,
-			     int type);
+			     u64 disk_num_bytes, u64 offset, unsigned flags,
-int btrfs_add_ordered_extent_dio(struct btrfs_inode *inode, u64 file_offset,
+			     int compress_type);
 				 u64 disk_bytenr, u64 num_bytes,
 				 u64 disk_num_bytes, int type);
 int btrfs_add_ordered_extent_compress(struct btrfs_inode *inode, u64 file_offset,
 				      u64 disk_bytenr, u64 num_bytes,
 				      u64 disk_num_bytes, int compress_type);
 void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
 			   struct btrfs_ordered_sum *sum);
 struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *inode,
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@ -23,6 +23,7 @@ static const struct root_name_map root_map[] = {
 	{ BTRFS_QUOTA_TREE_OBJECTID,		"QUOTA_TREE"		},
 	{ BTRFS_UUID_TREE_OBJECTID,		"UUID_TREE"		},
 	{ BTRFS_FREE_SPACE_TREE_OBJECTID,	"FREE_SPACE_TREE"	},
 	{ BTRFS_BLOCK_GROUP_TREE_OBJECTID,	"BLOCK_GROUP_TREE"	},
 	{ BTRFS_DATA_RELOC_TREE_OBJECTID,	"DATA_RELOC_TREE"	},
 };
@ -391,9 +392,9 @@ void btrfs_print_tree(struct extent_buffer *c, bool follow)
 				       btrfs_header_owner(c),
 				       btrfs_node_ptr_generation(c, i),
 				       level - 1, &first_key);
-		if (IS_ERR(next)) {
+		if (IS_ERR(next))
 			continue;
-		} else if (!extent_buffer_uptodate(next)) {
+		if (!extent_buffer_uptodate(next)) {
 			free_extent_buffer(next);
 			continue;
 		}
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@ -25,18 +25,6 @@
 #include "sysfs.h"
 #include "tree-mod-log.h"
 /* TODO XXX FIXME
 *  - subvol delete -> delete when ref goes to 0? delete limits also?
 *  - reorganize keys
 *  - compressed
 *  - sync
 *  - copy also limits on subvol creation
 *  - limit
 *  - caches for ulists
 *  - performance benchmarks
 *  - check all ioctl parameters
 */
 /*
 * Helpers to access qgroup reservation
 *
@ -258,16 +246,19 @@ static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
 	return 0;
 }
-/* must be called with qgroup_lock held */
+/*
-static int add_relation_rb(struct btrfs_fs_info *fs_info,
+ * Add relation specified by two qgroups.
-			   u64 memberid, u64 parentid)
+ *
 * Must be called with qgroup_lock held.
 *
 * Return: 0        on success
 *         -ENOENT  if one of the qgroups is NULL
 *         <0       other errors
 */
 static int __add_relation_rb(struct btrfs_qgroup *member, struct btrfs_qgroup *parent)
 {
 	struct btrfs_qgroup *member;
 	struct btrfs_qgroup *parent;
 	struct btrfs_qgroup_list *list;
 	member = find_qgroup_rb(fs_info, memberid);
 	parent = find_qgroup_rb(fs_info, parentid);
 	if (!member || !parent)
 		return -ENOENT;
@ -283,7 +274,27 @@ static int add_relation_rb(struct btrfs_fs_info *fs_info,
 	return 0;
 }
-/* must be called with qgroup_lock held */
+/*
 * Add relation specified by two qgoup ids.
 *
 * Must be called with qgroup_lock held.
 *
 * Return: 0        on success
 *         -ENOENT  if one of the ids does not exist
 *         <0       other errors
 */
 static int add_relation_rb(struct btrfs_fs_info *fs_info, u64 memberid, u64 parentid)
 {
 	struct btrfs_qgroup *member;
 	struct btrfs_qgroup *parent;
 	member = find_qgroup_rb(fs_info, memberid);
 	parent = find_qgroup_rb(fs_info, parentid);
 	return __add_relation_rb(member, parent);
 }
 /* Must be called with qgroup_lock held */
 static int del_relation_rb(struct btrfs_fs_info *fs_info,
 			   u64 memberid, u64 parentid)
 {
@ -948,6 +959,12 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
 	 */
 	lockdep_assert_held_write(&fs_info->subvol_sem);
 	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
 		btrfs_err(fs_info,
 			  "qgroups are currently unsupported in extent tree v2");
 		return -EINVAL;
 	}
 	mutex_lock(&fs_info->qgroup_ioctl_lock);
 	if (fs_info->quota_root)
 		goto out;
@ -1451,7 +1468,7 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
 	}
 	spin_lock(&fs_info->qgroup_lock);
-	ret = add_relation_rb(fs_info, src, dst);
+	ret = __add_relation_rb(member, parent);
 	if (ret < 0) {
 		spin_unlock(&fs_info->qgroup_lock);
 		goto out;
@ -3268,7 +3285,8 @@ static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans,
 static bool rescan_should_stop(struct btrfs_fs_info *fs_info)
 {
 	return btrfs_fs_closing(fs_info) ||
-		test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state);
+		test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state) ||
 		!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
 }
 static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
@ -3298,11 +3316,9 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
 			err = PTR_ERR(trans);
 			break;
 		}
-		if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
+
-			err = -EINTR;
+		err = qgroup_rescan_leaf(trans, path);
-		} else {
+
 			err = qgroup_rescan_leaf(trans, path);
 		}
 		if (err > 0)
 			btrfs_commit_transaction(trans);
 		else
@ -3316,7 +3332,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
 	if (err > 0 &&
 	    fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
 		fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
-	} else if (err < 0) {
+	} else if (err < 0 || stopped) {
 		fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
 	}
 	mutex_unlock(&fs_info->qgroup_rescan_lock);
--- a/fs/btrfs/reflink.c
+++ b/fs/btrfs/reflink.c
@ -277,7 +277,7 @@ static int clone_copy_inline_extent(struct inode *dst,
 						  path->slots[0]),
 			    size);
 	btrfs_update_inode_bytes(BTRFS_I(dst), datal, drop_args.bytes_found);
-	set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(dst)->runtime_flags);
+	btrfs_set_inode_full_sync(BTRFS_I(dst));
 	ret = btrfs_inode_set_file_extent_range(BTRFS_I(dst), 0, aligned_end);
 out:
 	if (!ret && !trans) {
@ -494,7 +494,8 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
 					&clone_info, &trans);
 			if (ret)
 				goto out;
-		} else if (type == BTRFS_FILE_EXTENT_INLINE) {
+		} else {
 			ASSERT(type == BTRFS_FILE_EXTENT_INLINE);
 			/*
 			 * Inline extents always have to start at file offset 0
 			 * and can never be bigger then the sector size. We can
@ -505,8 +506,12 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
 			 */
 			ASSERT(key.offset == 0);
 			ASSERT(datal <= fs_info->sectorsize);
-			if (key.offset != 0 || datal > fs_info->sectorsize)
+			if (WARN_ON(type != BTRFS_FILE_EXTENT_INLINE) ||
-				return -EUCLEAN;
+			    WARN_ON(key.offset != 0) ||
 			    WARN_ON(datal > fs_info->sectorsize)) {
 				ret = -EUCLEAN;
 				goto out;
 			}
 			ret = clone_copy_inline_extent(inode, path, &new_key,
 						       drop_start, datal, size,
@ -518,17 +523,22 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
 		btrfs_release_path(path);
 		/*
-		 * If this is a new extent update the last_reflink_trans of both
+		 * Whenever we share an extent we update the last_reflink_trans
-		 * inodes. This is used by fsync to make sure it does not log
+		 * of each inode to the current transaction. This is needed to
-		 * multiple checksum items with overlapping ranges. For older
+		 * make sure fsync does not log multiple checksum items with
-		 * extents we don't need to do it since inode logging skips the
+		 * overlapping ranges (because some extent items might refer
-		 * checksums for older extents. Also ignore holes and inline
+		 * only to sections of the original extent). For the destination
-		 * extents because they don't have checksums in the csum tree.
+		 * inode we do this regardless of the generation of the extents
 		 * or even if they are inline extents or explicit holes, to make
 		 * sure a full fsync does not skip them. For the source inode,
 		 * we only need to update last_reflink_trans in case it's a new
 		 * extent that is not a hole or an inline extent, to deal with
 		 * the checksums problem on fsync.
 		 */
-		if (extent_gen == trans->transid && disko > 0) {
+		if (extent_gen == trans->transid && disko > 0)
 			BTRFS_I(src)->last_reflink_trans = trans->transid;
-			BTRFS_I(inode)->last_reflink_trans = trans->transid;
+
-		}
+		BTRFS_I(inode)->last_reflink_trans = trans->transid;
 		last_dest_end = ALIGN(new_key.offset + datal,
 				      fs_info->sectorsize);
@ -575,8 +585,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
 		 * replaced file extent items.
 		 */
 		if (last_dest_end >= i_size_read(inode))
-			set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+			btrfs_set_inode_full_sync(BTRFS_I(inode));
 				&BTRFS_I(inode)->runtime_flags);
 		ret = btrfs_replace_file_extents(BTRFS_I(inode), path,
 				last_dest_end, destoff + len - 1, NULL, &trans);
@ -772,9 +781,7 @@ static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in,
 		if (btrfs_root_readonly(root_out))
 			return -EROFS;
-		if (file_in->f_path.mnt != file_out->f_path.mnt ||
+		ASSERT(inode_in->i_sb == inode_out->i_sb);
 		    inode_in->i_sb != inode_out->i_sb)
 			return -EXDEV;
 	}
 	/* Don't make the dst file partly checksummed */
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@ -2599,9 +2599,9 @@ static int get_tree_block_key(struct btrfs_fs_info *fs_info,
 	eb = read_tree_block(fs_info, block->bytenr, block->owner,
 			     block->key.offset, block->level, NULL);
-	if (IS_ERR(eb)) {
+	if (IS_ERR(eb))
 		return PTR_ERR(eb);
-	} else if (!extent_buffer_uptodate(eb)) {
+	if (!extent_buffer_uptodate(eb)) {
 		free_extent_buffer(eb);
 		return -EIO;
 	}
@ -2997,7 +2997,7 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
 		/* Reserve metadata for this range */
 		ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
-						      clamped_len);
+						      clamped_len, clamped_len);
 		if (ret)
 			goto release_page;
@ -4123,9 +4123,8 @@ static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
 * this function resumes merging reloc trees with corresponding fs trees.
 * this is important for keeping the sharing of tree blocks
 */
-int btrfs_recover_relocation(struct btrfs_root *root)
+int btrfs_recover_relocation(struct btrfs_fs_info *fs_info)
 {
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	LIST_HEAD(reloc_roots);
 	struct btrfs_key key;
 	struct btrfs_root *fs_root;
@ -4166,7 +4165,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
 		    key.type != BTRFS_ROOT_ITEM_KEY)
 			break;
-		reloc_root = btrfs_read_tree_root(root, &key);
+		reloc_root = btrfs_read_tree_root(fs_info->tree_root, &key);
 		if (IS_ERR(reloc_root)) {
 			err = PTR_ERR(reloc_root);
 			goto out;
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@ -3190,7 +3190,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
 	u64 generation;
 	int mirror_num;
 	struct btrfs_key key;
-	u64 increment = map->stripe_len;
+	u64 increment;
 	u64 offset;
 	u64 extent_logical;
 	u64 extent_physical;
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@ -528,17 +528,12 @@ static int fs_path_add_from_extent_buffer(struct fs_path *p,
 static int fs_path_copy(struct fs_path *p, struct fs_path *from)
 {
 	int ret;
 	p->reversed = from->reversed;
 	fs_path_reset(p);
-	ret = fs_path_add_path(p, from);
+	return fs_path_add_path(p, from);
 	return ret;
 }
 static void fs_path_unreverse(struct fs_path *p)
 {
 	char *tmp;
@ -7477,10 +7472,10 @@ static void dedupe_in_progress_warn(const struct btrfs_root *root)
 		      root->root_key.objectid, root->dedupe_in_progress);
 }
-long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
+long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg)
 {
 	int ret = 0;
-	struct btrfs_root *send_root = BTRFS_I(file_inode(mnt_file))->root;
+	struct btrfs_root *send_root = BTRFS_I(inode)->root;
 	struct btrfs_fs_info *fs_info = send_root->fs_info;
 	struct btrfs_root *clone_root;
 	struct send_ctx *sctx = NULL;
--- a/fs/btrfs/send.h
+++ b/fs/btrfs/send.h
@ -126,7 +126,7 @@ enum {
 #define BTRFS_SEND_A_MAX (__BTRFS_SEND_A_MAX - 1)
 #ifdef __KERNEL__
-long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg);
+long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg);
 #endif
 #endif
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@ -737,6 +737,8 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
 	u64 thresh = div_factor_fine(space_info->total_bytes, 90);
 	u64 used;
 	lockdep_assert_held(&space_info->lock);
 	/* If we're just plain full then async reclaim just slows us down. */
 	if ((space_info->bytes_used + space_info->bytes_reserved +
 	     global_rsv_size) >= thresh)
@ -1061,7 +1063,6 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
 			trans_rsv->reserved;
 		if (block_rsv_size < space_info->bytes_may_use)
 			delalloc_size = space_info->bytes_may_use - block_rsv_size;
 		spin_unlock(&space_info->lock);
 		/*
 		 * We don't want to include the global_rsv in our calculation,
@ -1092,6 +1093,8 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
 			flush = FLUSH_DELAYED_REFS_NR;
 		}
 		spin_unlock(&space_info->lock);
 		/*
 		 * We don't want to reclaim everything, just a portion, so scale
 		 * down the to_reclaim by 1/4.  If it takes us down to 0,
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@ -66,6 +66,52 @@ static struct file_system_type btrfs_root_fs_type;
 static int btrfs_remount(struct super_block *sb, int *flags, char *data);
 #ifdef CONFIG_PRINTK
 #define STATE_STRING_PREFACE	": state "
 #define STATE_STRING_BUF_LEN	(sizeof(STATE_STRING_PREFACE) + BTRFS_FS_STATE_COUNT)
 /*
 * Characters to print to indicate error conditions or uncommon filesystem sate.
 * RO is not an error.
 */
 static const char fs_state_chars[] = {
 	[BTRFS_FS_STATE_ERROR]			= 'E',
 	[BTRFS_FS_STATE_REMOUNTING]		= 'M',
 	[BTRFS_FS_STATE_RO]			= 0,
 	[BTRFS_FS_STATE_TRANS_ABORTED]		= 'A',
 	[BTRFS_FS_STATE_DEV_REPLACING]		= 'R',
 	[BTRFS_FS_STATE_DUMMY_FS_INFO]		= 0,
 	[BTRFS_FS_STATE_NO_CSUMS]		= 'C',
 	[BTRFS_FS_STATE_LOG_CLEANUP_ERROR]	= 'L',
 };
 static void btrfs_state_to_string(const struct btrfs_fs_info *info, char *buf)
 {
 	unsigned int bit;
 	bool states_printed = false;
 	unsigned long fs_state = READ_ONCE(info->fs_state);
 	char *curr = buf;
 	memcpy(curr, STATE_STRING_PREFACE, sizeof(STATE_STRING_PREFACE));
 	curr += sizeof(STATE_STRING_PREFACE) - 1;
 	for_each_set_bit(bit, &fs_state, sizeof(fs_state)) {
 		WARN_ON_ONCE(bit >= BTRFS_FS_STATE_COUNT);
 		if ((bit < BTRFS_FS_STATE_COUNT) && fs_state_chars[bit]) {
 			*curr++ = fs_state_chars[bit];
 			states_printed = true;
 		}
 	}
 	/* If no states were printed, reset the buffer */
 	if (!states_printed)
 		curr = buf;
 	*curr++ = 0;
 }
 #endif
 /*
 * Generally the error codes correspond to their respective errors, but there
 * are a few special cases.
@ -128,6 +174,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
 {
 	struct super_block *sb = fs_info->sb;
 #ifdef CONFIG_PRINTK
 	char statestr[STATE_STRING_BUF_LEN];
 	const char *errstr;
 #endif
@ -140,6 +187,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
 #ifdef CONFIG_PRINTK
 	errstr = btrfs_decode_error(errno);
 	btrfs_state_to_string(fs_info, statestr);
 	if (fmt) {
 		struct va_format vaf;
 		va_list args;
@ -148,12 +196,12 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
 		vaf.fmt = fmt;
 		vaf.va = &args;
-		pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n",
+		pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s (%pV)\n",
-			sb->s_id, function, line, errno, errstr, &vaf);
+			sb->s_id, statestr, function, line, errno, errstr, &vaf);
 		va_end(args);
 	} else {
-		pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s\n",
+		pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s\n",
-			sb->s_id, function, line, errno, errstr);
+			sb->s_id, statestr, function, line, errno, errstr);
 	}
 #endif
@ -240,11 +288,15 @@ void __cold btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, .
 	vaf.va = &args;
 	if (__ratelimit(ratelimit)) {
-		if (fs_info)
+		if (fs_info) {
-			printk("%sBTRFS %s (device %s): %pV\n", lvl, type,
+			char statestr[STATE_STRING_BUF_LEN];
-				fs_info->sb->s_id, &vaf);
+
-		else
+			btrfs_state_to_string(fs_info, statestr);
 			printk("%sBTRFS %s (device %s%s): %pV\n", lvl, type,
 				fs_info->sb->s_id, statestr, &vaf);
 		} else {
 			printk("%sBTRFS %s: %pV\n", lvl, type, &vaf);
 		}
 	}
 	va_end(args);
@ -861,6 +913,14 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
 			break;
 		case Opt_space_cache:
 		case Opt_space_cache_version:
 			/*
 			 * We already set FREE_SPACE_TREE above because we have
 			 * compat_ro(FREE_SPACE_TREE) set, and we aren't going
 			 * to allow v1 to be set for extent tree v2, simply
 			 * ignore this setting if we're extent tree v2.
 			 */
 			if (btrfs_fs_incompat(info, EXTENT_TREE_V2))
 				break;
 			if (token == Opt_space_cache ||
 			    strcmp(args[0].from, "v1") == 0) {
 				btrfs_clear_opt(info->mount_opt,
@ -881,6 +941,12 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
 			btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE);
 			break;
 		case Opt_no_space_cache:
 			/*
 			 * We cannot operate without the free space tree with
 			 * extent tree v2, ignore this option.
 			 */
 			if (btrfs_fs_incompat(info, EXTENT_TREE_V2))
 				break;
 			if (btrfs_test_opt(info, SPACE_CACHE)) {
 				btrfs_clear_and_info(info, SPACE_CACHE,
 					     "disabling disk space caching");
@ -896,6 +962,12 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
 	"the 'inode_cache' option is deprecated and has no effect since 5.11");
 			break;
 		case Opt_clear_cache:
 			/*
 			 * We cannot clear the free space tree with extent tree
 			 * v2, ignore this option.
 			 */
 			if (btrfs_fs_incompat(info, EXTENT_TREE_V2))
 				break;
 			btrfs_set_and_info(info, CLEAR_CACHE,
 					   "force clearing of disk cache");
 			break;
@ -2383,6 +2455,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
 {
 	struct btrfs_ioctl_vol_args *vol;
 	struct btrfs_device *device = NULL;
 	dev_t devt = 0;
 	int ret = -ENOTTY;
 	if (!capable(CAP_SYS_ADMIN))
@ -2402,7 +2475,12 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
 		mutex_unlock(&uuid_mutex);
 		break;
 	case BTRFS_IOC_FORGET_DEV:
-		ret = btrfs_forget_devices(vol->name);
+		if (vol->name[0] != 0) {
 			ret = lookup_bdev(vol->name, &devt);
 			if (ret)
 				break;
 		}
 		ret = btrfs_forget_devices(devt);
 		break;
 	case BTRFS_IOC_DEVICES_READY:
 		mutex_lock(&uuid_mutex);
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@ -283,9 +283,11 @@ BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
 BTRFS_FEAT_ATTR_INCOMPAT(metadata_uuid, METADATA_UUID);
 BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE);
 BTRFS_FEAT_ATTR_INCOMPAT(raid1c34, RAID1C34);
 /* Remove once support for zoned allocation is feature complete */
 #ifdef CONFIG_BTRFS_DEBUG
 /* Remove once support for zoned allocation is feature complete */
 BTRFS_FEAT_ATTR_INCOMPAT(zoned, ZONED);
 /* Remove once support for extent tree v2 is feature complete */
 BTRFS_FEAT_ATTR_INCOMPAT(extent_tree_v2, EXTENT_TREE_V2);
 #endif
 #ifdef CONFIG_FS_VERITY
 BTRFS_FEAT_ATTR_COMPAT_RO(verity, VERITY);
@ -314,6 +316,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = {
 	BTRFS_FEAT_ATTR_PTR(raid1c34),
 #ifdef CONFIG_BTRFS_DEBUG
 	BTRFS_FEAT_ATTR_PTR(zoned),
 	BTRFS_FEAT_ATTR_PTR(extent_tree_v2),
 #endif
 #ifdef CONFIG_FS_VERITY
 	BTRFS_FEAT_ATTR_PTR(verity),
@ -1104,6 +1107,11 @@ static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj)
 static char btrfs_unknown_feature_names[FEAT_MAX][NUM_FEATURE_BITS][BTRFS_FEATURE_NAME_MAX];
 static struct btrfs_feature_attr btrfs_feature_attrs[FEAT_MAX][NUM_FEATURE_BITS];
 static_assert(ARRAY_SIZE(btrfs_unknown_feature_names) ==
 	      ARRAY_SIZE(btrfs_feature_attrs));
 static_assert(ARRAY_SIZE(btrfs_unknown_feature_names[0]) ==
 	      ARRAY_SIZE(btrfs_feature_attrs[0]));
 static const u64 supported_feature_masks[FEAT_MAX] = {
 	[FEAT_COMPAT]    = BTRFS_FEATURE_COMPAT_SUPP,
 	[FEAT_COMPAT_RO] = BTRFS_FEATURE_COMPAT_RO_SUPP,
@ -1272,11 +1280,6 @@ static void init_feature_attrs(void)
 	struct btrfs_feature_attr *fa;
 	int set, i;
 	BUILD_BUG_ON(ARRAY_SIZE(btrfs_unknown_feature_names) !=
 		     ARRAY_SIZE(btrfs_feature_attrs));
 	BUILD_BUG_ON(ARRAY_SIZE(btrfs_unknown_feature_names[0]) !=
 		     ARRAY_SIZE(btrfs_feature_attrs[0]));
 	memset(btrfs_feature_attrs, 0, sizeof(btrfs_feature_attrs));
 	memset(btrfs_unknown_feature_names, 0,
 	       sizeof(btrfs_unknown_feature_names));
--- a/fs/btrfs/tests/extent-map-tests.c
+++ b/fs/btrfs/tests/extent-map-tests.c
@ -15,6 +15,7 @@ static void free_extent_map_tree(struct extent_map_tree *em_tree)
 	struct extent_map *em;
 	struct rb_node *node;
 	write_lock(&em_tree->lock);
 	while (!RB_EMPTY_ROOT(&em_tree->map.rb_root)) {
 		node = rb_first_cached(&em_tree->map);
 		em = rb_entry(node, struct extent_map, rb_node);
@ -32,6 +33,7 @@ static void free_extent_map_tree(struct extent_map_tree *em_tree)
 #endif
 		free_extent_map(em);
 	}
 	write_unlock(&em_tree->lock);
 }
 /*
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@ -1911,6 +1911,14 @@ static void update_super_roots(struct btrfs_fs_info *fs_info)
 		super->cache_generation = 0;
 	if (test_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags))
 		super->uuid_tree_generation = root_item->generation;
 	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
 		root_item = &fs_info->block_group_root->root_item;
 		super->block_group_root = root_item->bytenr;
 		super->block_group_root_generation = root_item->generation;
 		super->block_group_root_level = root_item->level;
 	}
 }
 int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
@ -2362,6 +2370,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 	list_add_tail(&fs_info->chunk_root->dirty_list,
 		      &cur_trans->switch_commits);
 	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
 		btrfs_set_root_node(&fs_info->block_group_root->root_item,
 				    fs_info->block_group_root->node);
 		list_add_tail(&fs_info->block_group_root->dirty_list,
 			      &cur_trans->switch_commits);
 	}
 	switch_commit_roots(trans);
 	ASSERT(list_empty(&cur_trans->dirty_bgs));
@ -2490,10 +2505,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
 * because btrfs_commit_super will poke cleaner thread and it will process it a
 * few seconds later.
 */
-int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root)
+int btrfs_clean_one_deleted_snapshot(struct btrfs_fs_info *fs_info)
 {
 	struct btrfs_root *root;
 	int ret;
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	spin_lock(&fs_info->trans_lock);
 	if (list_empty(&fs_info->dead_roots)) {
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@ -217,7 +217,7 @@ int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid);
 void btrfs_add_dead_root(struct btrfs_root *root);
 int btrfs_defrag_root(struct btrfs_root *root);
 void btrfs_maybe_wake_unfinished_drop(struct btrfs_fs_info *fs_info);
-int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root);
+int btrfs_clean_one_deleted_snapshot(struct btrfs_fs_info *fs_info);
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans);
 void btrfs_commit_transaction_async(struct btrfs_trans_handle *trans);
 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans);
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@ -639,8 +639,10 @@ static void block_group_err(const struct extent_buffer *eb, int slot,
 static int check_block_group_item(struct extent_buffer *leaf,
 				  struct btrfs_key *key, int slot)
 {
 	struct btrfs_fs_info *fs_info = leaf->fs_info;
 	struct btrfs_block_group_item bgi;
 	u32 item_size = btrfs_item_size(leaf, slot);
 	u64 chunk_objectid;
 	u64 flags;
 	u64 type;
@ -663,8 +665,23 @@ static int check_block_group_item(struct extent_buffer *leaf,
 	read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
 			   sizeof(bgi));
-	if (unlikely(btrfs_stack_block_group_chunk_objectid(&bgi) !=
+	chunk_objectid = btrfs_stack_block_group_chunk_objectid(&bgi);
-		     BTRFS_FIRST_CHUNK_TREE_OBJECTID)) {
+	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
 		/*
 		 * We don't init the nr_global_roots until we load the global
 		 * roots, so this could be 0 at mount time.  If it's 0 we'll
 		 * just assume we're fine, and later we'll check against our
 		 * actual value.
 		 */
 		if (unlikely(fs_info->nr_global_roots &&
 			     chunk_objectid >= fs_info->nr_global_roots)) {
 			block_group_err(leaf, slot,
 	"invalid block group global root id, have %llu, needs to be <= %llu",
 					chunk_objectid,
 					fs_info->nr_global_roots);
 			return -EUCLEAN;
 		}
 	} else if (unlikely(chunk_objectid != BTRFS_FIRST_CHUNK_TREE_OBJECTID)) {
 		block_group_err(leaf, slot,
 		"invalid block group chunk objectid, have %llu expect %llu",
 				btrfs_stack_block_group_chunk_objectid(&bgi),
@ -1648,7 +1665,6 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
 		/* These trees must never be empty */
 		if (unlikely(owner == BTRFS_ROOT_TREE_OBJECTID ||
 			     owner == BTRFS_CHUNK_TREE_OBJECTID ||
 			     owner == BTRFS_EXTENT_TREE_OBJECTID ||
 			     owner == BTRFS_DEV_TREE_OBJECTID ||
 			     owner == BTRFS_FS_TREE_OBJECTID ||
 			     owner == BTRFS_DATA_RELOC_TREE_OBJECTID)) {
@ -1657,12 +1673,25 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
 				    owner);
 			return -EUCLEAN;
 		}
 		/* Unknown tree */
 		if (unlikely(owner == 0)) {
 			generic_err(leaf, 0,
 				"invalid owner, root 0 is not defined");
 			return -EUCLEAN;
 		}
 		/* EXTENT_TREE_V2 can have empty extent trees. */
 		if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
 			return 0;
 		if (unlikely(owner == BTRFS_EXTENT_TREE_OBJECTID)) {
 			generic_err(leaf, 0,
 			"invalid root, root %llu must never be empty",
 				    owner);
 			return -EUCLEAN;
 		}
 		return 0;
 	}
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@ -17,6 +17,8 @@ struct btrfs_log_ctx {
 	int log_transid;
 	bool log_new_dentries;
 	bool logging_new_name;
 	/* Indicate if the inode being logged was logged before. */
 	bool logged_before;
 	/* Tracks the last logged dir item/index key offset. */
 	u64 last_dir_item_offset;
 	struct inode *inode;
@ -32,6 +34,7 @@ static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx,
 	ctx->log_transid = 0;
 	ctx->log_new_dentries = false;
 	ctx->logging_new_name = false;
 	ctx->logged_before = false;
 	ctx->inode = inode;
 	INIT_LIST_HEAD(&ctx->list);
 	INIT_LIST_HEAD(&ctx->ordered_extents);
@ -86,7 +89,7 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
 void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
 				   struct btrfs_inode *dir);
 void btrfs_log_new_name(struct btrfs_trans_handle *trans,
-			struct btrfs_inode *inode, struct btrfs_inode *old_dir,
+			struct dentry *old_dentry, struct btrfs_inode *old_dir,
-			struct dentry *parent);
+			u64 old_dir_index, struct dentry *parent);
 #endif
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@ -534,30 +534,20 @@ btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
 	return ret;
 }
-static bool device_path_matched(const char *path, struct btrfs_device *device)
+/**
-{
+ *  Search and remove all stale devices (which are not mounted).
 	int found;
 	rcu_read_lock();
 	found = strcmp(rcu_str_deref(device->name), path);
 	rcu_read_unlock();
 	return found == 0;
 }
 /*
 *  Search and remove all stale (devices which are not mounted) devices.
 *  When both inputs are NULL, it will search and release all stale devices.
- *  path:	Optional. When provided will it release all unmounted devices
+ *
- *		matching this path only.
+ *  @devt:	Optional. When provided will it release all unmounted devices
- *  skip_dev:	Optional. Will skip this device when searching for the stale
+ *		matching this devt only.
 *  @skip_device:  Optional. Will skip this device when searching for the stale
 *		devices.
- *  Return:	0 for success or if @path is NULL.
+ *
- * 		-EBUSY if @path is a mounted device.
+ *  Return:	0 for success or if @devt is 0.
- * 		-ENOENT if @path does not match any device in the list.
+ *		-EBUSY if @devt is a mounted device.
 *		-ENOENT if @devt does not match any device in the list.
 */
-static int btrfs_free_stale_devices(const char *path,
+static int btrfs_free_stale_devices(dev_t devt, struct btrfs_device *skip_device)
 				     struct btrfs_device *skip_device)
 {
 	struct btrfs_fs_devices *fs_devices, *tmp_fs_devices;
 	struct btrfs_device *device, *tmp_device;
@ -565,7 +555,7 @@ static int btrfs_free_stale_devices(const char *path,
 	lockdep_assert_held(&uuid_mutex);
-	if (path)
+	if (devt)
 		ret = -ENOENT;
 	list_for_each_entry_safe(fs_devices, tmp_fs_devices, &fs_uuids, fs_list) {
@ -575,13 +565,11 @@ static int btrfs_free_stale_devices(const char *path,
 					 &fs_devices->devices, dev_list) {
 			if (skip_device && skip_device == device)
 				continue;
-			if (path && !device->name)
+			if (devt && devt != device->devt)
 				continue;
 			if (path && !device_path_matched(path, device))
 				continue;
 			if (fs_devices->opened) {
 				/* for an already deleted device return 0 */
-				if (path && ret != 0)
+				if (devt && ret != 0)
 					ret = -EBUSY;
 				break;
 			}
@ -614,7 +602,6 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
 			struct btrfs_device *device, fmode_t flags,
 			void *holder)
 {
 	struct request_queue *q;
 	struct block_device *bdev;
 	struct btrfs_super_block *disk_super;
 	u64 devid;
@ -656,8 +643,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
 			set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
 	}
-	q = bdev_get_queue(bdev);
+	if (!blk_queue_nonrot(bdev_get_queue(bdev)))
 	if (!blk_queue_nonrot(q))
 		fs_devices->rotating = true;
 	device->bdev = bdev;
@ -781,11 +767,17 @@ static noinline struct btrfs_device *device_list_add(const char *path,
 	struct rcu_string *name;
 	u64 found_transid = btrfs_super_generation(disk_super);
 	u64 devid = btrfs_stack_device_id(&disk_super->dev_item);
 	dev_t path_devt;
 	int error;
 	bool has_metadata_uuid = (btrfs_super_incompat_flags(disk_super) &
 		BTRFS_FEATURE_INCOMPAT_METADATA_UUID);
 	bool fsid_change_in_progress = (btrfs_super_flags(disk_super) &
 					BTRFS_SUPER_FLAG_CHANGING_FSID_V2);
 	error = lookup_bdev(path, &path_devt);
 	if (error)
 		return ERR_PTR(error);
 	if (fsid_change_in_progress) {
 		if (!has_metadata_uuid)
 			fs_devices = find_fsid_inprogress(disk_super);
@ -868,6 +860,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
 			return ERR_PTR(-ENOMEM);
 		}
 		rcu_assign_pointer(device->name, name);
 		device->devt = path_devt;
 		list_add_rcu(&device->dev_list, &fs_devices->devices);
 		fs_devices->num_devices++;
@ -928,25 +921,15 @@ static noinline struct btrfs_device *device_list_add(const char *path,
 		/*
 		 * We are going to replace the device path for a given devid,
 		 * make sure it's the same device if the device is mounted
 		 *
 		 * NOTE: the device->fs_info may not be reliable here so pass
 		 * in a NULL to message helpers instead. This avoids a possible
 		 * use-after-free when the fs_info and fs_info->sb are already
 		 * torn down.
 		 */
 		if (device->bdev) {
-			int error;
+			if (device->devt != path_devt) {
 			dev_t path_dev;
 			error = lookup_bdev(path, &path_dev);
 			if (error) {
 				mutex_unlock(&fs_devices->device_list_mutex);
 				return ERR_PTR(error);
 			}
 			if (device->bdev->bd_dev != path_dev) {
 				mutex_unlock(&fs_devices->device_list_mutex);
 				/*
 				 * device->fs_info may not be reliable here, so
 				 * pass in a NULL instead. This avoids a
 				 * possible use-after-free when the fs_info and
 				 * fs_info->sb are already torn down.
 				 */
 				btrfs_warn_in_rcu(NULL,
 	"duplicate device %s devid %llu generation %llu scanned by %s (%d)",
 						  path, devid, found_transid,
@ -954,7 +937,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
 						  task_pid_nr(current));
 				return ERR_PTR(-EEXIST);
 			}
-			btrfs_info_in_rcu(device->fs_info,
+			btrfs_info_in_rcu(NULL,
 	"devid %llu device path %s changed to %s scanned by %s (%d)",
 					  devid, rcu_str_deref(device->name),
 					  path, current->comm,
@ -972,6 +955,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
 			fs_devices->missing_devices--;
 			clear_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
 		}
 		device->devt = path_devt;
 	}
 	/*
@ -1331,12 +1315,12 @@ static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev
 	return disk_super;
 }
-int btrfs_forget_devices(const char *path)
+int btrfs_forget_devices(dev_t devt)
 {
 	int ret;
 	mutex_lock(&uuid_mutex);
-	ret = btrfs_free_stale_devices(strlen(path) ? path : NULL, NULL);
+	ret = btrfs_free_stale_devices(devt, NULL);
 	mutex_unlock(&uuid_mutex);
 	return ret;
@ -1385,10 +1369,8 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
 	}
 	device = device_list_add(path, disk_super, &new_device_added);
-	if (!IS_ERR(device)) {
+	if (!IS_ERR(device) && new_device_added)
-		if (new_device_added)
+		btrfs_free_stale_devices(device->devt, device);
 			btrfs_free_stale_devices(path, device);
 	}
 	btrfs_release_disk_super(disk_super);
@ -2102,6 +2084,11 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
 	u64 num_devices;
 	int ret = 0;
 	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
 		btrfs_err(fs_info, "device remove not supported on extent tree v2 yet");
 		return -EINVAL;
 	}
 	/*
 	 * The device list in fs_devices is accessed without locks (neither
 	 * uuid_mutex nor device_list_mutex) as it won't change on a mounted
@ -2606,7 +2593,6 @@ static int btrfs_finish_sprout(struct btrfs_trans_handle *trans)
 int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path)
 {
 	struct btrfs_root *root = fs_info->dev_root;
 	struct request_queue *q;
 	struct btrfs_trans_handle *trans;
 	struct btrfs_device *device;
 	struct block_device *bdev;
@ -2668,6 +2654,9 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 	device->fs_info = fs_info;
 	device->bdev = bdev;
 	ret = lookup_bdev(device_path, &device->devt);
 	if (ret)
 		goto error_free_device;
 	ret = btrfs_get_dev_zone_info(device, false);
 	if (ret)
@ -2679,7 +2668,6 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 		goto error_free_zone;
 	}
 	q = bdev_get_queue(bdev);
 	set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
 	device->generation = trans->transid;
 	device->io_width = fs_info->sectorsize;
@ -2727,7 +2715,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 	atomic64_add(device->total_bytes, &fs_info->free_chunk_space);
-	if (!blk_queue_nonrot(q))
+	if (!blk_queue_nonrot(bdev_get_queue(bdev)))
 		fs_devices->rotating = true;
 	orig_super_total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
@ -2814,7 +2802,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 	 * We can ignore the return value as it typically returns -EINVAL and
 	 * only succeeds if the device was an alien.
 	 */
-	btrfs_forget_devices(device_path);
+	btrfs_forget_devices(device->devt);
 	/* Update ctime/mtime for blkid or udev */
 	update_dev_time(device_path);
@ -3251,6 +3239,12 @@ int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
 	u64 length;
 	int ret;
 	if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
 		btrfs_err(fs_info,
 			  "relocate: not supported on extent tree v2 yet");
 		return -EINVAL;
 	}
 	/*
 	 * Prevent races with automatic removal of unused block groups.
 	 * After we relocate and before we remove the chunk with offset
@ -7060,6 +7054,27 @@ static void warn_32bit_meta_chunk(struct btrfs_fs_info *fs_info,
 }
 #endif
 static struct btrfs_device *handle_missing_device(struct btrfs_fs_info *fs_info,
 						  u64 devid, u8 *uuid)
 {
 	struct btrfs_device *dev;
 	if (!btrfs_test_opt(fs_info, DEGRADED)) {
 		btrfs_report_missing_device(fs_info, devid, uuid, true);
 		return ERR_PTR(-ENOENT);
 	}
 	dev = add_missing_dev(fs_info->fs_devices, devid, uuid);
 	if (IS_ERR(dev)) {
 		btrfs_err(fs_info, "failed to init missing device %llu: %ld",
 			  devid, PTR_ERR(dev));
 		return dev;
 	}
 	btrfs_report_missing_device(fs_info, devid, uuid, false);
 	return dev;
 }
 static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
 			  struct btrfs_chunk *chunk)
 {
@ -7147,28 +7162,17 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
 				   BTRFS_UUID_SIZE);
 		args.uuid = uuid;
 		map->stripes[i].dev = btrfs_find_device(fs_info->fs_devices, &args);
 		if (!map->stripes[i].dev &&
 		    !btrfs_test_opt(fs_info, DEGRADED)) {
 			free_extent_map(em);
 			btrfs_report_missing_device(fs_info, devid, uuid, true);
 			return -ENOENT;
 		}
 		if (!map->stripes[i].dev) {
-			map->stripes[i].dev =
+			map->stripes[i].dev = handle_missing_device(fs_info,
-				add_missing_dev(fs_info->fs_devices, devid,
+								    devid, uuid);
 						uuid);
 			if (IS_ERR(map->stripes[i].dev)) {
 				free_extent_map(em);
 				btrfs_err(fs_info,
 					"failed to init missing dev %llu: %ld",
 					devid, PTR_ERR(map->stripes[i].dev));
 				return PTR_ERR(map->stripes[i].dev);
 			}
 			btrfs_report_missing_device(fs_info, devid, uuid, false);
 		}
 		set_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
 				&(map->stripes[i].dev->dev_state));
 	}
 	write_lock(&map_tree->lock);
@ -8299,10 +8303,12 @@ static int relocating_repair_kthread(void *data)
 	target = cache->start;
 	btrfs_put_block_group(cache);
 	sb_start_write(fs_info->sb);
 	if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
 		btrfs_info(fs_info,
 			   "zoned: skip relocating block group %llu to repair: EBUSY",
 			   target);
 		sb_end_write(fs_info->sb);
 		return -EBUSY;
 	}
@ -8330,6 +8336,7 @@ static int relocating_repair_kthread(void *data)
 		btrfs_put_block_group(cache);
 	mutex_unlock(&fs_info->reclaim_bgs_lock);
 	btrfs_exclop_finish(fs_info);
 	sb_end_write(fs_info->sb);
 	return ret;
 }
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@ -72,6 +72,11 @@ struct btrfs_device {
 	/* the mode sent to blkdev_get */
 	fmode_t mode;
 	/*
 	 * Device's major-minor number. Must be set even if the device is not
 	 * opened (bdev == NULL), unless the device is missing.
 	 */
 	dev_t devt;
 	unsigned long dev_state;
 	blk_status_t last_flush_error;
@ -505,7 +510,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 		       fmode_t flags, void *holder);
 struct btrfs_device *btrfs_scan_one_device(const char *path,
 					   fmode_t flags, void *holder);
-int btrfs_forget_devices(const char *path);
+int btrfs_forget_devices(dev_t devt);
 void btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
 void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices);
 void btrfs_assign_next_active_device(struct btrfs_device *device,
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@ -652,8 +652,7 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
 		if (model == BLK_ZONED_HM ||
 		    (model == BLK_ZONED_HA && incompat_zoned) ||
 		    (model == BLK_ZONED_NONE && incompat_zoned)) {
-			struct btrfs_zoned_device_info *zone_info =
+			struct btrfs_zoned_device_info *zone_info;
 				device->zone_info;
 			zone_info = device->zone_info;
 			zoned_devices++;
@ -1215,12 +1214,12 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
 	struct btrfs_device *device;
 	u64 logical = cache->start;
 	u64 length = cache->length;
 	u64 physical = 0;
 	int ret;
 	int i;
 	unsigned int nofs_flag;
 	u64 *alloc_offsets = NULL;
 	u64 *caps = NULL;
 	u64 *physical = NULL;
 	unsigned long *active = NULL;
 	u64 last_alloc = 0;
 	u32 num_sequential = 0, num_conventional = 0;
@ -1264,6 +1263,12 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
 		goto out;
 	}
 	physical = kcalloc(map->num_stripes, sizeof(*physical), GFP_NOFS);
 	if (!physical) {
 		ret = -ENOMEM;
 		goto out;
 	}
 	active = bitmap_zalloc(map->num_stripes, GFP_NOFS);
 	if (!active) {
 		ret = -ENOMEM;
@ -1277,14 +1282,14 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
 		int dev_replace_is_ongoing = 0;
 		device = map->stripes[i].dev;
-		physical = map->stripes[i].physical;
+		physical[i] = map->stripes[i].physical;
 		if (device->bdev == NULL) {
 			alloc_offsets[i] = WP_MISSING_DEV;
 			continue;
 		}
-		is_sequential = btrfs_dev_is_sequential(device, physical);
+		is_sequential = btrfs_dev_is_sequential(device, physical[i]);
 		if (is_sequential)
 			num_sequential++;
 		else
@ -1299,21 +1304,21 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
 		 * This zone will be used for allocation, so mark this zone
 		 * non-empty.
 		 */
-		btrfs_dev_clear_zone_empty(device, physical);
+		btrfs_dev_clear_zone_empty(device, physical[i]);
 		down_read(&dev_replace->rwsem);
 		dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
 		if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL)
-			btrfs_dev_clear_zone_empty(dev_replace->tgtdev, physical);
+			btrfs_dev_clear_zone_empty(dev_replace->tgtdev, physical[i]);
 		up_read(&dev_replace->rwsem);
 		/*
 		 * The group is mapped to a sequential zone. Get the zone write
 		 * pointer to determine the allocation offset within the zone.
 		 */
-		WARN_ON(!IS_ALIGNED(physical, fs_info->zone_size));
+		WARN_ON(!IS_ALIGNED(physical[i], fs_info->zone_size));
 		nofs_flag = memalloc_nofs_save();
-		ret = btrfs_get_dev_zone(device, physical, &zone);
+		ret = btrfs_get_dev_zone(device, physical[i], &zone);
 		memalloc_nofs_restore(nofs_flag);
 		if (ret == -EIO || ret == -EOPNOTSUPP) {
 			ret = 0;
@ -1339,7 +1344,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
 		case BLK_ZONE_COND_READONLY:
 			btrfs_err(fs_info,
 		"zoned: offline/readonly zone %llu on device %s (devid %llu)",
-				  physical >> device->zone_info->zone_size_shift,
+				  physical[i] >> device->zone_info->zone_size_shift,
 				  rcu_str_deref(device->name), device->devid);
 			alloc_offsets[i] = WP_MISSING_DEV;
 			break;
@ -1404,7 +1409,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
 		if (alloc_offsets[0] == WP_MISSING_DEV) {
 			btrfs_err(fs_info,
 			"zoned: cannot recover write pointer for zone %llu",
-				physical);
+				physical[0]);
 			ret = -EIO;
 			goto out;
 		}
@ -1413,6 +1418,42 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
 		cache->zone_is_active = test_bit(0, active);
 		break;
 	case BTRFS_BLOCK_GROUP_DUP:
 		if (map->type & BTRFS_BLOCK_GROUP_DATA) {
 			btrfs_err(fs_info, "zoned: profile DUP not yet supported on data bg");
 			ret = -EINVAL;
 			goto out;
 		}
 		if (alloc_offsets[0] == WP_MISSING_DEV) {
 			btrfs_err(fs_info,
 			"zoned: cannot recover write pointer for zone %llu",
 				physical[0]);
 			ret = -EIO;
 			goto out;
 		}
 		if (alloc_offsets[1] == WP_MISSING_DEV) {
 			btrfs_err(fs_info,
 			"zoned: cannot recover write pointer for zone %llu",
 				physical[1]);
 			ret = -EIO;
 			goto out;
 		}
 		if (alloc_offsets[0] != alloc_offsets[1]) {
 			btrfs_err(fs_info,
 			"zoned: write pointer offset mismatch of zones in DUP profile");
 			ret = -EIO;
 			goto out;
 		}
 		if (test_bit(0, active) != test_bit(1, active)) {
 			if (!btrfs_zone_activate(cache)) {
 				ret = -EIO;
 				goto out;
 			}
 		} else {
 			cache->zone_is_active = test_bit(0, active);
 		}
 		cache->alloc_offset = alloc_offsets[0];
 		cache->zone_capacity = min(caps[0], caps[1]);
 		break;
 	case BTRFS_BLOCK_GROUP_RAID1:
 	case BTRFS_BLOCK_GROUP_RAID0:
 	case BTRFS_BLOCK_GROUP_RAID10:
@ -1465,6 +1506,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
 		cache->physical_map = NULL;
 	}
 	bitmap_free(active);
 	kfree(physical);
 	kfree(caps);
 	kfree(alloc_offsets);
 	free_extent_map(em);
@ -1781,50 +1823,55 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
 	struct btrfs_device *device;
 	u64 physical;
 	bool ret;
 	int i;
 	if (!btrfs_is_zoned(block_group->fs_info))
 		return true;
 	map = block_group->physical_map;
 	/* Currently support SINGLE profile only */
 	ASSERT(map->num_stripes == 1);
 	device = map->stripes[0].dev;
 	physical = map->stripes[0].physical;
 	if (device->zone_info->max_active_zones == 0)
 		return true;
 	spin_lock(&block_group->lock);
 	if (block_group->zone_is_active) {
 		ret = true;
 		goto out_unlock;
 	}
-	/* No space left */
+	for (i = 0; i < map->num_stripes; i++) {
-	if (block_group->alloc_offset == block_group->zone_capacity) {
+		device = map->stripes[i].dev;
-		ret = false;
+		physical = map->stripes[i].physical;
-		goto out_unlock;
+
 		if (device->zone_info->max_active_zones == 0)
 			continue;
 		/* No space left */
 		if (block_group->alloc_offset == block_group->zone_capacity) {
 			ret = false;
 			goto out_unlock;
 		}
 		if (!btrfs_dev_set_active_zone(device, physical)) {
 			/* Cannot activate the zone */
 			ret = false;
 			goto out_unlock;
 		}
 		/* Successfully activated all the zones */
 		if (i == map->num_stripes - 1)
 			block_group->zone_is_active = 1;
 	}
 	if (!btrfs_dev_set_active_zone(device, physical)) {
 		/* Cannot activate the zone */
 		ret = false;
 		goto out_unlock;
 	}
 	/* Successfully activated all the zones */
 	block_group->zone_is_active = 1;
 	spin_unlock(&block_group->lock);
-	/* For the active block group list */
+	if (block_group->zone_is_active) {
-	btrfs_get_block_group(block_group);
+		/* For the active block group list */
 		btrfs_get_block_group(block_group);
-	spin_lock(&fs_info->zone_active_bgs_lock);
+		spin_lock(&fs_info->zone_active_bgs_lock);
-	ASSERT(list_empty(&block_group->active_bg_list));
+		list_add_tail(&block_group->active_bg_list,
-	list_add_tail(&block_group->active_bg_list, &fs_info->zone_active_bgs);
+			      &fs_info->zone_active_bgs);
-	spin_unlock(&fs_info->zone_active_bgs_lock);
+		spin_unlock(&fs_info->zone_active_bgs_lock);
 	}
 	return true;
@ -1840,19 +1887,12 @@ int btrfs_zone_finish(struct btrfs_block_group *block_group)
 	struct btrfs_device *device;
 	u64 physical;
 	int ret = 0;
 	int i;
 	if (!btrfs_is_zoned(fs_info))
 		return 0;
 	map = block_group->physical_map;
 	/* Currently support SINGLE profile only */
 	ASSERT(map->num_stripes == 1);
 	device = map->stripes[0].dev;
 	physical = map->stripes[0].physical;
 	if (device->zone_info->max_active_zones == 0)
 		return 0;
 	spin_lock(&block_group->lock);
 	if (!block_group->zone_is_active) {
@ -1904,25 +1944,34 @@ int btrfs_zone_finish(struct btrfs_block_group *block_group)
 	btrfs_clear_data_reloc_bg(block_group);
 	spin_unlock(&block_group->lock);
-	ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
+	for (i = 0; i < map->num_stripes; i++) {
-			       physical >> SECTOR_SHIFT,
+		device = map->stripes[i].dev;
-			       device->zone_info->zone_size >> SECTOR_SHIFT,
+		physical = map->stripes[i].physical;
-			       GFP_NOFS);
+
 		if (device->zone_info->max_active_zones == 0)
 			continue;
 		ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
 				       physical >> SECTOR_SHIFT,
 				       device->zone_info->zone_size >> SECTOR_SHIFT,
 				       GFP_NOFS);
 		if (ret)
 			return ret;
 		btrfs_dev_clear_active_zone(device, physical);
 	}
 	btrfs_dec_block_group_ro(block_group);
-	if (!ret) {
+	spin_lock(&fs_info->zone_active_bgs_lock);
-		btrfs_dev_clear_active_zone(device, physical);
+	ASSERT(!list_empty(&block_group->active_bg_list));
 	list_del_init(&block_group->active_bg_list);
 	spin_unlock(&fs_info->zone_active_bgs_lock);
-		spin_lock(&fs_info->zone_active_bgs_lock);
+	/* For active_bg_list */
-		ASSERT(!list_empty(&block_group->active_bg_list));
+	btrfs_put_block_group(block_group);
 		list_del_init(&block_group->active_bg_list);
 		spin_unlock(&fs_info->zone_active_bgs_lock);
-		/* For active_bg_list */
+	return 0;
 		btrfs_put_block_group(block_group);
 	}
 	return ret;
 }
 bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
--- a/fs/internal.h
+++ b/fs/internal.h
@ -157,11 +157,6 @@ extern char *simple_dname(struct dentry *, char *, int);
 extern void dput_to_list(struct dentry *, struct list_head *);
 extern void shrink_dentry_list(struct list_head *);
 /*
 * read_write.c
 */
 extern int rw_verify_area(int, struct file *, const loff_t *, size_t);
 /*
 * pipe.c
 */
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@ -236,9 +236,6 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
 	if (!src_file.file)
 		return -EBADF;
 	ret = -EXDEV;
 	if (src_file.file->f_path.mnt != dst_file->f_path.mnt)
 		goto fdput;
 	cloned = vfs_clone_file_range(src_file.file, off, dst_file, destoff,
 				      olen, 0);
 	if (cloned < 0)
@ -247,7 +244,6 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
 		ret = -EINVAL;
 	else
 		ret = 0;
 fdput:
 	fdput(src_file);
 	return ret;
 }
--- a/fs/read_write.c
+++ b/fs/read_write.c
@ -385,6 +385,7 @@ int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t
 	return security_file_permission(file,
 				read_write == READ ? MAY_READ : MAY_WRITE);
 }
 EXPORT_SYMBOL(rw_verify_area);
 static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
 {
@ -1617,24 +1618,16 @@ int generic_write_check_limits(struct file *file, loff_t pos, loff_t *count)
 	return 0;
 }
-/*
+/* Like generic_write_checks(), but takes size of write instead of iter. */
- * Performs necessary checks before doing a write
+int generic_write_checks_count(struct kiocb *iocb, loff_t *count)
 *
 * Can adjust writing position or amount of bytes to write.
 * Returns appropriate error code that caller should return or
 * zero in case that write should be allowed.
 */
 ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
 	loff_t count;
 	int ret;
 	if (IS_SWAPFILE(inode))
 		return -ETXTBSY;
-	if (!iov_iter_count(from))
+	if (!*count)
 		return 0;
 	/* FIXME: this is for backwards compatibility with 2.4 */
@ -1644,8 +1637,23 @@ ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
 	if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
 		return -EINVAL;
-	count = iov_iter_count(from);
+	return generic_write_check_limits(iocb->ki_filp, iocb->ki_pos, count);
-	ret = generic_write_check_limits(file, iocb->ki_pos, &count);
+}
 EXPORT_SYMBOL(generic_write_checks_count);
 /*
 * Performs necessary checks before doing a write
 *
 * Can adjust writing position or amount of bytes to write.
 * Returns appropriate error code that caller should return or
 * zero in case that write should be allowed.
 */
 ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
 {
 	loff_t count = iov_iter_count(from);
 	int ret;
 	ret = generic_write_checks_count(iocb, &count);
 	if (ret)
 		return ret;
--- a/fs/remap_range.c
+++ b/fs/remap_range.c
@ -362,11 +362,6 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
 	WARN_ON_ONCE(remap_flags & REMAP_FILE_DEDUP);
 	/*
 	 * FICLONE/FICLONERANGE ioctls enforce that src and dest files are on
 	 * the same mount. Practically, they only need to be on the same file
 	 * system.
 	 */
 	if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
 		return -EXDEV;
@ -458,7 +453,7 @@ loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
 		goto out_drop_write;
 	ret = -EXDEV;
-	if (src_file->f_path.mnt != dst_file->f_path.mnt)
+	if (file_inode(src_file)->i_sb != file_inode(dst_file)->i_sb)
 		goto out_drop_write;
 	ret = -EISDIR;
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@ -3130,6 +3130,7 @@ extern int sb_min_blocksize(struct super_block *, int);
 extern int generic_file_mmap(struct file *, struct vm_area_struct *);
 extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
 extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *);
 int generic_write_checks_count(struct kiocb *iocb, loff_t *count);
 extern int generic_write_check_limits(struct file *file, loff_t pos,
 		loff_t *count);
 extern int generic_file_rw_checks(struct file *file_in, struct file *file_out);
@ -3173,6 +3174,7 @@ extern loff_t fixed_size_llseek(struct file *file, loff_t offset,
 		int whence, loff_t size);
 extern loff_t no_seek_end_llseek_size(struct file *, loff_t, int, loff_t);
 extern loff_t no_seek_end_llseek(struct file *, loff_t, int);
 int rw_verify_area(int, struct file *, const loff_t *, size_t);
 extern int generic_file_open(struct inode * inode, struct file * filp);
 extern int nonseekable_open(struct inode * inode, struct file * filp);
 extern int stream_open(struct inode * inode, struct file * filp);
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@ -53,6 +53,7 @@ struct btrfs_space_info;
 		{ BTRFS_TREE_RELOC_OBJECTID,	"TREE_RELOC"	},	\
 		{ BTRFS_UUID_TREE_OBJECTID,	"UUID_TREE"	},	\
 		{ BTRFS_FREE_SPACE_TREE_OBJECTID, "FREE_SPACE_TREE" },	\
 		{ BTRFS_BLOCK_GROUP_TREE_OBJECTID, "BLOCK_GROUP_TREE" },\
 		{ BTRFS_DATA_RELOC_TREE_OBJECTID, "DATA_RELOC_TREE" })
 #define show_root_type(obj)						\
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@ -309,6 +309,7 @@ struct btrfs_ioctl_fs_info_args {
 #define BTRFS_FEATURE_INCOMPAT_METADATA_UUID	(1ULL << 10)
 #define BTRFS_FEATURE_INCOMPAT_RAID1C34		(1ULL << 11)
 #define BTRFS_FEATURE_INCOMPAT_ZONED		(1ULL << 12)
 #define BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2	(1ULL << 13)
 struct btrfs_ioctl_feature_flags {
 	__u64 compat_flags;
@ -868,6 +869,134 @@ struct btrfs_ioctl_get_subvol_rootref_args {
 		__u8 align[7];
 };
 /*
 * Data and metadata for an encoded read or write.
 *
 * Encoded I/O bypasses any encoding automatically done by the filesystem (e.g.,
 * compression). This can be used to read the compressed contents of a file or
 * write pre-compressed data directly to a file.
 *
 * BTRFS_IOC_ENCODED_READ and BTRFS_IOC_ENCODED_WRITE are essentially
 * preadv/pwritev with additional metadata about how the data is encoded and the
 * size of the unencoded data.
 *
 * BTRFS_IOC_ENCODED_READ fills the given iovecs with the encoded data, fills
 * the metadata fields, and returns the size of the encoded data. It reads one
 * extent per call. It can also read data which is not encoded.
 *
 * BTRFS_IOC_ENCODED_WRITE uses the metadata fields, writes the encoded data
 * from the iovecs, and returns the size of the encoded data. Note that the
 * encoded data is not validated when it is written; if it is not valid (e.g.,
 * it cannot be decompressed), then a subsequent read may return an error.
 *
 * Since the filesystem page cache contains decoded data, encoded I/O bypasses
 * the page cache. Encoded I/O requires CAP_SYS_ADMIN.
 */
 struct btrfs_ioctl_encoded_io_args {
 	/* Input parameters for both reads and writes. */
 	/*
 	 * iovecs containing encoded data.
 	 *
 	 * For reads, if the size of the encoded data is larger than the sum of
 	 * iov[n].iov_len for 0 <= n < iovcnt, then the ioctl fails with
 	 * ENOBUFS.
 	 *
 	 * For writes, the size of the encoded data is the sum of iov[n].iov_len
 	 * for 0 <= n < iovcnt. This must be less than 128 KiB (this limit may
 	 * increase in the future). This must also be less than or equal to
 	 * unencoded_len.
 	 */
 	const struct iovec __user *iov;
 	/* Number of iovecs. */
 	unsigned long iovcnt;
 	/*
 	 * Offset in file.
 	 *
 	 * For writes, must be aligned to the sector size of the filesystem.
 	 */
 	__s64 offset;
 	/* Currently must be zero. */
 	__u64 flags;
 	/*
 	 * For reads, the following members are output parameters that will
 	 * contain the returned metadata for the encoded data.
 	 * For writes, the following members must be set to the metadata for the
 	 * encoded data.
 	 */
 	/*
 	 * Length of the data in the file.
 	 *
 	 * Must be less than or equal to unencoded_len - unencoded_offset. For
 	 * writes, must be aligned to the sector size of the filesystem unless
 	 * the data ends at or beyond the current end of the file.
 	 */
 	__u64 len;
 	/*
 	 * Length of the unencoded (i.e., decrypted and decompressed) data.
 	 *
 	 * For writes, must be no more than 128 KiB (this limit may increase in
 	 * the future). If the unencoded data is actually longer than
 	 * unencoded_len, then it is truncated; if it is shorter, then it is
 	 * extended with zeroes.
 	 */
 	__u64 unencoded_len;
 	/*
 	 * Offset from the first byte of the unencoded data to the first byte of
 	 * logical data in the file.
 	 *
 	 * Must be less than unencoded_len.
 	 */
 	__u64 unencoded_offset;
 	/*
 	 * BTRFS_ENCODED_IO_COMPRESSION_* type.
 	 *
 	 * For writes, must not be BTRFS_ENCODED_IO_COMPRESSION_NONE.
 	 */
 	__u32 compression;
 	/* Currently always BTRFS_ENCODED_IO_ENCRYPTION_NONE. */
 	__u32 encryption;
 	/*
 	 * Reserved for future expansion.
 	 *
 	 * For reads, always returned as zero. Users should check for non-zero
 	 * bytes. If there are any, then the kernel has a newer version of this
 	 * structure with additional information that the user definition is
 	 * missing.
 	 *
 	 * For writes, must be zeroed.
 	 */
 	__u8 reserved[64];
 };
 /* Data is not compressed. */
 #define BTRFS_ENCODED_IO_COMPRESSION_NONE 0
 /* Data is compressed as a single zlib stream. */
 #define BTRFS_ENCODED_IO_COMPRESSION_ZLIB 1
 /*
 * Data is compressed as a single zstd frame with the windowLog compression
 * parameter set to no more than 17.
 */
 #define BTRFS_ENCODED_IO_COMPRESSION_ZSTD 2
 /*
 * Data is compressed sector by sector (using the sector size indicated by the
 * name of the constant) with LZO1X and wrapped in the format documented in
 * fs/btrfs/lzo.c. For writes, the compression sector size must match the
 * filesystem sector size.
 */
 #define BTRFS_ENCODED_IO_COMPRESSION_LZO_4K 3
 #define BTRFS_ENCODED_IO_COMPRESSION_LZO_8K 4
 #define BTRFS_ENCODED_IO_COMPRESSION_LZO_16K 5
 #define BTRFS_ENCODED_IO_COMPRESSION_LZO_32K 6
 #define BTRFS_ENCODED_IO_COMPRESSION_LZO_64K 7
 #define BTRFS_ENCODED_IO_COMPRESSION_TYPES 8
 /* Data is not encrypted. */
 #define BTRFS_ENCODED_IO_ENCRYPTION_NONE 0
 #define BTRFS_ENCODED_IO_ENCRYPTION_TYPES 1
 /* Error codes as returned by the kernel */
 enum btrfs_err_code {
 	BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET = 1,
@ -996,5 +1125,9 @@ enum btrfs_err_code {
 				struct btrfs_ioctl_ino_lookup_user_args)
 #define BTRFS_IOC_SNAP_DESTROY_V2 _IOW(BTRFS_IOCTL_MAGIC, 63, \
 				struct btrfs_ioctl_vol_args_v2)
 #define BTRFS_IOC_ENCODED_READ _IOR(BTRFS_IOCTL_MAGIC, 64, \
 				    struct btrfs_ioctl_encoded_io_args)
 #define BTRFS_IOC_ENCODED_WRITE _IOW(BTRFS_IOCTL_MAGIC, 64, \
 				     struct btrfs_ioctl_encoded_io_args)
 #endif /* _UAPI_LINUX_BTRFS_H */
--- a/include/uapi/linux/btrfs_tree.h
+++ b/include/uapi/linux/btrfs_tree.h
@ -53,6 +53,9 @@
 /* tracks free space in block groups. */
 #define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL
 /* Holds the block group items for extent tree v2. */
 #define BTRFS_BLOCK_GROUP_TREE_OBJECTID 11ULL
 /* device stats in the device tree */
 #define BTRFS_DEV_STATS_OBJECTID 0ULL