for-5.18-tag

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmI44SgACgkQxWXV+ddt
 WDtzyg//YgMKr05jRsU3I/pIQ9znuKZmmllThwF63ZRG4PvKz2QfzvKdrMuzNjru
 5kHbG59iJqtLmU/aVsdp8mL6mmg5U3Ym2bIRsrW5m4HTtTowKdirvL/lQ3/tWm8j
 CSDJhUdCL2SwFjpru+4cxOeHLXNSfsk4BoCu8nsLitL+oXv/EPo/dkmu6nPjiMY3
 RjsIDBeDEf7J20KOuP/qJuN2YOAT7TeISPD3Ow4aDsmndWQ8n6KehEmAZb7QuqZQ
 SYubZ2wTb9HuPH/qpiTIA7innBIr+JkYtUYlz2xxixM2BUWNfqD6oKHw9RgOY5Sg
 CULFssw0i7cgGKsvuPJw1zdM002uG4wwXKigGiyljTVWvxneyr4mNDWiGad+LyFJ
 XWhnABPidkLs/1zbUkJ23DVub5VlfZsypkFDJAUXI0nGu3VrhjDfTYMa8eCe2L/F
 YuGG6CrAC+5K/arKAWTVj7hOb+52UzBTEBJz60LJJ6dS9eQoBy857V6pfo7w7ukZ
 t/tqA6q75O4tk/G3Ix3V1CjuAH3kJE6qXrvBxhpu8aZNjofopneLyGqS5oahpcE8
 8edtT+ZZhNuU9sLSEJCJATVxXRDdNzpQ8CHgOR5HOUbmM/vwKNzHPfRQzDnImznw
 UaUlFaaHwK17M6Y/6CnMecz26U2nVSJ7pyh39mb784XYe2a1efE=
 =YARd
 -----END PGP SIGNATURE-----

Merge tag 'for-5.18-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs updates from David Sterba:
 "This contains feature updates, performance improvements, preparatory
  and core work and some related VFS updates:

  Features:

   - encoded read/write ioctls, allows user space to read or write raw
     data directly to extents (now compressed, encrypted in the future),
     will be used by send/receive v2 where it saves processing time

   - zoned mode now works with metadata DUP (the mkfs.btrfs default)

   - error message header updates:
      - print error state: transaction abort, other error, log tree
        errors
      - print transient filesystem state: remount, device replace,
        ignored checksum verifications

   - tree-checker: verify the transaction id of the to-be-written dirty
     extent buffer

  Performance improvements for fsync:

   - directory logging speedups (up to -90% run time)

   - avoid logging all directory changes during renames (up to -60% run
     time)

   - avoid inode logging during rename and link when possible (up to
     -60% run time)

   - prepare extents to be logged before locking a log tree path
     (throughput +7%)

   - stop copying old file extents when doing a full fsync()

   - improved logging of old extents after truncate

  Core, fixes:

   - improved stale device identification by dev_t and not just path
     (for devices that are behind other layers like device mapper)

   - continued extent tree v2 preparatory work
      - disable features that won't work yet
      - add wrappers and abstractions for new tree roots

   - improved error handling

   - add super block write annotations around background block group
     reclaim

   - fix device scanning messages potentially accessing stale pointer

   - cleanups and refactoring

  VFS:

   - allow reflinks/deduplication from two different mounts of the same
     filesystem

   - export and add helpers for read/write range verification, for the
     encoded ioctls"

* tag 'for-5.18-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (98 commits)
  btrfs: zoned: put block group after final usage
  btrfs: don't access possibly stale fs_info data in device_list_add
  btrfs: add lockdep_assert_held to need_preemptive_reclaim
  btrfs: verify the tranisd of the to-be-written dirty extent buffer
  btrfs: unify the error handling of btrfs_read_buffer()
  btrfs: unify the error handling pattern for read_tree_block()
  btrfs: factor out do_free_extent_accounting helper
  btrfs: remove last_ref from the extent freeing code
  btrfs: add a alloc_reserved_extent helper
  btrfs: remove BUG_ON(ret) in alloc_reserved_tree_block
  btrfs: add and use helper for unlinking inode during log replay
  btrfs: extend locking to all space_info members accesses
  btrfs: zoned: mark relocation as writing
  fs: allow cross-vfsmount reflink/dedupe
  btrfs: remove the cross file system checks from remap
  btrfs: pass btrfs_fs_info to btrfs_recover_relocation
  btrfs: pass btrfs_fs_info for deleting snapshots and cleaner
  btrfs: add filesystems state details to error messages
  btrfs: deal with unexpected extent type during reflinking
  btrfs: fix unexpected error path when reflinking an inline extent
  ...
This commit is contained in:
Linus Torvalds 2022-03-22 10:51:40 -07:00
commit 5191290407
50 changed files with 3113 additions and 1335 deletions

View File

@ -789,11 +789,13 @@ static int add_missing_keys(struct btrfs_fs_info *fs_info,
if (IS_ERR(eb)) { if (IS_ERR(eb)) {
free_pref(ref); free_pref(ref);
return PTR_ERR(eb); return PTR_ERR(eb);
} else if (!extent_buffer_uptodate(eb)) { }
if (!extent_buffer_uptodate(eb)) {
free_pref(ref); free_pref(ref);
free_extent_buffer(eb); free_extent_buffer(eb);
return -EIO; return -EIO;
} }
if (lock) if (lock)
btrfs_tree_read_lock(eb); btrfs_tree_read_lock(eb);
if (btrfs_header_level(eb) == 0) if (btrfs_header_level(eb) == 0)
@ -1335,7 +1337,8 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans,
if (IS_ERR(eb)) { if (IS_ERR(eb)) {
ret = PTR_ERR(eb); ret = PTR_ERR(eb);
goto out; goto out;
} else if (!extent_buffer_uptodate(eb)) { }
if (!extent_buffer_uptodate(eb)) {
free_extent_buffer(eb); free_extent_buffer(eb);
ret = -EIO; ret = -EIO;
goto out; goto out;

View File

@ -1522,8 +1522,12 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags)) if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
return; return;
if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) sb_start_write(fs_info->sb);
if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
sb_end_write(fs_info->sb);
return; return;
}
/* /*
* Long running balances can keep us blocked here for eternity, so * Long running balances can keep us blocked here for eternity, so
@ -1531,6 +1535,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
*/ */
if (!mutex_trylock(&fs_info->reclaim_bgs_lock)) { if (!mutex_trylock(&fs_info->reclaim_bgs_lock)) {
btrfs_exclop_finish(fs_info); btrfs_exclop_finish(fs_info);
sb_end_write(fs_info->sb);
return; return;
} }
@ -1605,6 +1610,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
spin_unlock(&fs_info->unused_bgs_lock); spin_unlock(&fs_info->unused_bgs_lock);
mutex_unlock(&fs_info->reclaim_bgs_lock); mutex_unlock(&fs_info->reclaim_bgs_lock);
btrfs_exclop_finish(fs_info); btrfs_exclop_finish(fs_info);
sb_end_write(fs_info->sb);
} }
void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info) void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info)
@ -2006,6 +2012,7 @@ static int read_one_block_group(struct btrfs_fs_info *info,
cache->length = key->offset; cache->length = key->offset;
cache->used = btrfs_stack_block_group_used(bgi); cache->used = btrfs_stack_block_group_used(bgi);
cache->flags = btrfs_stack_block_group_flags(bgi); cache->flags = btrfs_stack_block_group_flags(bgi);
cache->global_root_id = btrfs_stack_block_group_chunk_objectid(bgi);
set_free_space_tree_thresholds(cache); set_free_space_tree_thresholds(cache);
@ -2288,7 +2295,7 @@ static int insert_block_group_item(struct btrfs_trans_handle *trans,
spin_lock(&block_group->lock); spin_lock(&block_group->lock);
btrfs_set_stack_block_group_used(&bgi, block_group->used); btrfs_set_stack_block_group_used(&bgi, block_group->used);
btrfs_set_stack_block_group_chunk_objectid(&bgi, btrfs_set_stack_block_group_chunk_objectid(&bgi,
BTRFS_FIRST_CHUNK_TREE_OBJECTID); block_group->global_root_id);
btrfs_set_stack_block_group_flags(&bgi, block_group->flags); btrfs_set_stack_block_group_flags(&bgi, block_group->flags);
key.objectid = block_group->start; key.objectid = block_group->start;
key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
@ -2444,6 +2451,27 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
btrfs_trans_release_chunk_metadata(trans); btrfs_trans_release_chunk_metadata(trans);
} }
/*
* For extent tree v2 we use the block_group_item->chunk_offset to point at our
* global root id. For v1 it's always set to BTRFS_FIRST_CHUNK_TREE_OBJECTID.
*/
static u64 calculate_global_root_id(struct btrfs_fs_info *fs_info, u64 offset)
{
u64 div = SZ_1G;
u64 index;
if (!btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
return BTRFS_FIRST_CHUNK_TREE_OBJECTID;
/* If we have a smaller fs index based on 128MiB. */
if (btrfs_super_total_bytes(fs_info->super_copy) <= (SZ_1G * 10ULL))
div = SZ_128M;
offset = div64_u64(offset, div);
div64_u64_rem(offset, fs_info->nr_global_roots, &index);
return index;
}
struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *trans, struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *trans,
u64 bytes_used, u64 type, u64 bytes_used, u64 type,
u64 chunk_offset, u64 size) u64 chunk_offset, u64 size)
@ -2464,6 +2492,8 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
cache->flags = type; cache->flags = type;
cache->last_byte_to_unpin = (u64)-1; cache->last_byte_to_unpin = (u64)-1;
cache->cached = BTRFS_CACHE_FINISHED; cache->cached = BTRFS_CACHE_FINISHED;
cache->global_root_id = calculate_global_root_id(fs_info, cache->start);
if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE))
cache->needs_free_space = 1; cache->needs_free_space = 1;
@ -2693,7 +2723,7 @@ static int update_block_group_item(struct btrfs_trans_handle *trans,
bi = btrfs_item_ptr_offset(leaf, path->slots[0]); bi = btrfs_item_ptr_offset(leaf, path->slots[0]);
btrfs_set_stack_block_group_used(&bgi, cache->used); btrfs_set_stack_block_group_used(&bgi, cache->used);
btrfs_set_stack_block_group_chunk_objectid(&bgi, btrfs_set_stack_block_group_chunk_objectid(&bgi,
BTRFS_FIRST_CHUNK_TREE_OBJECTID); cache->global_root_id);
btrfs_set_stack_block_group_flags(&bgi, cache->flags); btrfs_set_stack_block_group_flags(&bgi, cache->flags);
write_extent_buffer(leaf, &bgi, bi, sizeof(bgi)); write_extent_buffer(leaf, &bgi, bi, sizeof(bgi));
btrfs_mark_buffer_dirty(leaf); btrfs_mark_buffer_dirty(leaf);

View File

@ -68,6 +68,7 @@ struct btrfs_block_group {
u64 bytes_super; u64 bytes_super;
u64 flags; u64 flags;
u64 cache_generation; u64 cache_generation;
u64 global_root_id;
/* /*
* If the free space extent count exceeds this number, convert the block * If the free space extent count exceeds this number, convert the block

View File

@ -13,6 +13,13 @@
#include "ordered-data.h" #include "ordered-data.h"
#include "delayed-inode.h" #include "delayed-inode.h"
/*
* Since we search a directory based on f_pos (struct dir_context::pos) we have
* to start at 2 since '.' and '..' have f_pos of 0 and 1 respectively, so
* everybody else has to start at 2 (see btrfs_real_readdir() and dir_emit_dots()).
*/
#define BTRFS_DIR_START_INDEX 2
/* /*
* ordered_data_close is set by truncate when a file that used * ordered_data_close is set by truncate when a file that used
* to have good data has been truncated to zero. When it is set * to have good data has been truncated to zero. When it is set
@ -173,8 +180,9 @@ struct btrfs_inode {
u64 disk_i_size; u64 disk_i_size;
/* /*
* if this is a directory then index_cnt is the counter for the index * If this is a directory then index_cnt is the counter for the index
* number for new files that are created * number for new files that are created. For an empty directory, this
* must be initialized to BTRFS_DIR_START_INDEX.
*/ */
u64 index_cnt; u64 index_cnt;
@ -333,6 +341,36 @@ static inline void btrfs_set_inode_last_sub_trans(struct btrfs_inode *inode)
spin_unlock(&inode->lock); spin_unlock(&inode->lock);
} }
/*
* Should be called while holding the inode's VFS lock in exclusive mode or in a
* context where no one else can access the inode concurrently (during inode
* creation or when loading an inode from disk).
*/
static inline void btrfs_set_inode_full_sync(struct btrfs_inode *inode)
{
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
/*
* The inode may have been part of a reflink operation in the last
* transaction that modified it, and then a fsync has reset the
* last_reflink_trans to avoid subsequent fsyncs in the same
* transaction to do unnecessary work. So update last_reflink_trans
* to the last_trans value (we have to be pessimistic and assume a
* reflink happened).
*
* The ->last_trans is protected by the inode's spinlock and we can
* have a concurrent ordered extent completion update it. Also set
* last_reflink_trans to ->last_trans only if the former is less than
* the later, because we can be called in a context where
* last_reflink_trans was set to the current transaction generation
* while ->last_trans was not yet updated in the current transaction,
* and therefore has a lower value.
*/
spin_lock(&inode->lock);
if (inode->last_reflink_trans < inode->last_trans)
inode->last_reflink_trans = inode->last_trans;
spin_unlock(&inode->lock);
}
static inline bool btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation) static inline bool btrfs_inode_in_log(struct btrfs_inode *inode, u64 generation)
{ {
bool ret = false; bool ret = false;

View File

@ -219,7 +219,7 @@ static bool dec_and_test_compressed_bio(struct compressed_bio *cb, struct bio *b
bi_size += bvec->bv_len; bi_size += bvec->bv_len;
if (bio->bi_status) if (bio->bi_status)
cb->errors = 1; cb->status = bio->bi_status;
ASSERT(bi_size && bi_size <= cb->compressed_len); ASSERT(bi_size && bi_size <= cb->compressed_len);
last_io = refcount_sub_and_test(bi_size >> fs_info->sectorsize_bits, last_io = refcount_sub_and_test(bi_size >> fs_info->sectorsize_bits,
@ -234,7 +234,7 @@ static bool dec_and_test_compressed_bio(struct compressed_bio *cb, struct bio *b
return last_io; return last_io;
} }
static void finish_compressed_bio_read(struct compressed_bio *cb, struct bio *bio) static void finish_compressed_bio_read(struct compressed_bio *cb)
{ {
unsigned int index; unsigned int index;
struct page *page; struct page *page;
@ -247,19 +247,18 @@ static void finish_compressed_bio_read(struct compressed_bio *cb, struct bio *bi
} }
/* Do io completion on the original bio */ /* Do io completion on the original bio */
if (cb->errors) { if (cb->status != BLK_STS_OK) {
bio_io_error(cb->orig_bio); cb->orig_bio->bi_status = cb->status;
bio_endio(cb->orig_bio);
} else { } else {
struct bio_vec *bvec; struct bio_vec *bvec;
struct bvec_iter_all iter_all; struct bvec_iter_all iter_all;
ASSERT(bio);
ASSERT(!bio->bi_status);
/* /*
* We have verified the checksum already, set page checked so * We have verified the checksum already, set page checked so
* the end_io handlers know about it * the end_io handlers know about it
*/ */
ASSERT(!bio_flagged(bio, BIO_CLONED)); ASSERT(!bio_flagged(cb->orig_bio, BIO_CLONED));
bio_for_each_segment_all(bvec, cb->orig_bio, iter_all) { bio_for_each_segment_all(bvec, cb->orig_bio, iter_all) {
u64 bvec_start = page_offset(bvec->bv_page) + u64 bvec_start = page_offset(bvec->bv_page) +
bvec->bv_offset; bvec->bv_offset;
@ -308,7 +307,7 @@ static void end_compressed_bio_read(struct bio *bio)
* Some IO in this cb have failed, just skip checksum as there * Some IO in this cb have failed, just skip checksum as there
* is no way it could be correct. * is no way it could be correct.
*/ */
if (cb->errors == 1) if (cb->status != BLK_STS_OK)
goto csum_failed; goto csum_failed;
inode = cb->inode; inode = cb->inode;
@ -324,8 +323,8 @@ static void end_compressed_bio_read(struct bio *bio)
csum_failed: csum_failed:
if (ret) if (ret)
cb->errors = 1; cb->status = errno_to_blk_status(ret);
finish_compressed_bio_read(cb, bio); finish_compressed_bio_read(cb);
out: out:
bio_put(bio); bio_put(bio);
} }
@ -342,11 +341,12 @@ static noinline void end_compressed_writeback(struct inode *inode,
unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT; unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT;
struct page *pages[16]; struct page *pages[16];
unsigned long nr_pages = end_index - index + 1; unsigned long nr_pages = end_index - index + 1;
const int errno = blk_status_to_errno(cb->status);
int i; int i;
int ret; int ret;
if (cb->errors) if (errno)
mapping_set_error(inode->i_mapping, -EIO); mapping_set_error(inode->i_mapping, errno);
while (nr_pages > 0) { while (nr_pages > 0) {
ret = find_get_pages_contig(inode->i_mapping, index, ret = find_get_pages_contig(inode->i_mapping, index,
@ -358,7 +358,7 @@ static noinline void end_compressed_writeback(struct inode *inode,
continue; continue;
} }
for (i = 0; i < ret; i++) { for (i = 0; i < ret; i++) {
if (cb->errors) if (errno)
SetPageError(pages[i]); SetPageError(pages[i]);
btrfs_page_clamp_clear_writeback(fs_info, pages[i], btrfs_page_clamp_clear_writeback(fs_info, pages[i],
cb->start, cb->len); cb->start, cb->len);
@ -381,9 +381,10 @@ static void finish_compressed_bio_write(struct compressed_bio *cb)
*/ */
btrfs_writepage_endio_finish_ordered(BTRFS_I(inode), NULL, btrfs_writepage_endio_finish_ordered(BTRFS_I(inode), NULL,
cb->start, cb->start + cb->len - 1, cb->start, cb->start + cb->len - 1,
!cb->errors); cb->status == BLK_STS_OK);
end_compressed_writeback(inode, cb); if (cb->writeback)
end_compressed_writeback(inode, cb);
/* Note, our inode could be gone now */ /* Note, our inode could be gone now */
/* /*
@ -506,7 +507,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
struct page **compressed_pages, struct page **compressed_pages,
unsigned int nr_pages, unsigned int nr_pages,
unsigned int write_flags, unsigned int write_flags,
struct cgroup_subsys_state *blkcg_css) struct cgroup_subsys_state *blkcg_css,
bool writeback)
{ {
struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct bio *bio = NULL; struct bio *bio = NULL;
@ -524,13 +526,14 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
if (!cb) if (!cb)
return BLK_STS_RESOURCE; return BLK_STS_RESOURCE;
refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits); refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
cb->errors = 0; cb->status = BLK_STS_OK;
cb->inode = &inode->vfs_inode; cb->inode = &inode->vfs_inode;
cb->start = start; cb->start = start;
cb->len = len; cb->len = len;
cb->mirror_num = 0; cb->mirror_num = 0;
cb->compressed_pages = compressed_pages; cb->compressed_pages = compressed_pages;
cb->compressed_len = compressed_len; cb->compressed_len = compressed_len;
cb->writeback = writeback;
cb->orig_bio = NULL; cb->orig_bio = NULL;
cb->nr_pages = nr_pages; cb->nr_pages = nr_pages;
@ -591,7 +594,7 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
if (submit) { if (submit) {
if (!skip_sum) { if (!skip_sum) {
ret = btrfs_csum_one_bio(inode, bio, start, 1); ret = btrfs_csum_one_bio(inode, bio, start, true);
if (ret) if (ret)
goto finish_cb; goto finish_cb;
} }
@ -808,7 +811,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
u64 em_len; u64 em_len;
u64 em_start; u64 em_start;
struct extent_map *em; struct extent_map *em;
blk_status_t ret = BLK_STS_RESOURCE; blk_status_t ret;
int faili = 0; int faili = 0;
u8 *sums; u8 *sums;
@ -821,17 +824,21 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
read_lock(&em_tree->lock); read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, file_offset, fs_info->sectorsize); em = lookup_extent_mapping(em_tree, file_offset, fs_info->sectorsize);
read_unlock(&em_tree->lock); read_unlock(&em_tree->lock);
if (!em) if (!em) {
return BLK_STS_IOERR; ret = BLK_STS_IOERR;
goto out;
}
ASSERT(em->compress_type != BTRFS_COMPRESS_NONE); ASSERT(em->compress_type != BTRFS_COMPRESS_NONE);
compressed_len = em->block_len; compressed_len = em->block_len;
cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS); cb = kmalloc(compressed_bio_size(fs_info, compressed_len), GFP_NOFS);
if (!cb) if (!cb) {
ret = BLK_STS_RESOURCE;
goto out; goto out;
}
refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits); refcount_set(&cb->pending_sectors, compressed_len >> fs_info->sectorsize_bits);
cb->errors = 0; cb->status = BLK_STS_OK;
cb->inode = inode; cb->inode = inode;
cb->mirror_num = mirror_num; cb->mirror_num = mirror_num;
sums = cb->sums; sums = cb->sums;
@ -851,8 +858,10 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
nr_pages = DIV_ROUND_UP(compressed_len, PAGE_SIZE); nr_pages = DIV_ROUND_UP(compressed_len, PAGE_SIZE);
cb->compressed_pages = kcalloc(nr_pages, sizeof(struct page *), cb->compressed_pages = kcalloc(nr_pages, sizeof(struct page *),
GFP_NOFS); GFP_NOFS);
if (!cb->compressed_pages) if (!cb->compressed_pages) {
ret = BLK_STS_RESOURCE;
goto fail1; goto fail1;
}
for (pg_index = 0; pg_index < nr_pages; pg_index++) { for (pg_index = 0; pg_index < nr_pages; pg_index++) {
cb->compressed_pages[pg_index] = alloc_page(GFP_NOFS); cb->compressed_pages[pg_index] = alloc_page(GFP_NOFS);
@ -938,7 +947,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
comp_bio = NULL; comp_bio = NULL;
} }
} }
return 0; return BLK_STS_OK;
fail2: fail2:
while (faili >= 0) { while (faili >= 0) {
@ -951,6 +960,8 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
kfree(cb); kfree(cb);
out: out:
free_extent_map(em); free_extent_map(em);
bio->bi_status = ret;
bio_endio(bio);
return ret; return ret;
finish_cb: finish_cb:
if (comp_bio) { if (comp_bio) {
@ -970,7 +981,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
*/ */
ASSERT(refcount_read(&cb->pending_sectors)); ASSERT(refcount_read(&cb->pending_sectors));
/* Now we are the only one referring @cb, can finish it safely. */ /* Now we are the only one referring @cb, can finish it safely. */
finish_compressed_bio_read(cb, NULL); finish_compressed_bio_read(cb);
return ret; return ret;
} }

View File

@ -22,6 +22,8 @@ struct btrfs_inode;
/* Maximum length of compressed data stored on disk */ /* Maximum length of compressed data stored on disk */
#define BTRFS_MAX_COMPRESSED (SZ_128K) #define BTRFS_MAX_COMPRESSED (SZ_128K)
static_assert((BTRFS_MAX_COMPRESSED % PAGE_SIZE) == 0);
/* Maximum size of data before compression */ /* Maximum size of data before compression */
#define BTRFS_MAX_UNCOMPRESSED (SZ_128K) #define BTRFS_MAX_UNCOMPRESSED (SZ_128K)
@ -52,8 +54,11 @@ struct compressed_bio {
/* The compression algorithm for this bio */ /* The compression algorithm for this bio */
u8 compress_type; u8 compress_type;
/* Whether this is a write for writeback. */
bool writeback;
/* IO errors */ /* IO errors */
u8 errors; blk_status_t status;
int mirror_num; int mirror_num;
/* for reads, this is the bio we are copying the data into */ /* for reads, this is the bio we are copying the data into */
@ -95,7 +100,8 @@ blk_status_t btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
struct page **compressed_pages, struct page **compressed_pages,
unsigned int nr_pages, unsigned int nr_pages,
unsigned int write_flags, unsigned int write_flags,
struct cgroup_subsys_state *blkcg_css); struct cgroup_subsys_state *blkcg_css,
bool writeback);
blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
int mirror_num, unsigned long bio_flags); int mirror_num, unsigned long bio_flags);

View File

@ -846,9 +846,11 @@ struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent,
btrfs_header_owner(parent), btrfs_header_owner(parent),
btrfs_node_ptr_generation(parent, slot), btrfs_node_ptr_generation(parent, slot),
level - 1, &first_key); level - 1, &first_key);
if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) { if (IS_ERR(eb))
return eb;
if (!extent_buffer_uptodate(eb)) {
free_extent_buffer(eb); free_extent_buffer(eb);
eb = ERR_PTR(-EIO); return ERR_PTR(-EIO);
} }
return eb; return eb;
@ -1436,13 +1438,13 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
/* now we're allowed to do a blocking uptodate check */ /* now we're allowed to do a blocking uptodate check */
ret = btrfs_read_buffer(tmp, gen, parent_level - 1, &first_key); ret = btrfs_read_buffer(tmp, gen, parent_level - 1, &first_key);
if (!ret) { if (ret) {
*eb_ret = tmp; free_extent_buffer(tmp);
return 0; btrfs_release_path(p);
return -EIO;
} }
free_extent_buffer(tmp); *eb_ret = tmp;
btrfs_release_path(p); return 0;
return -EIO;
} }
/* /*
@ -1460,19 +1462,19 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
ret = -EAGAIN; ret = -EAGAIN;
tmp = read_tree_block(fs_info, blocknr, root->root_key.objectid, tmp = read_tree_block(fs_info, blocknr, root->root_key.objectid,
gen, parent_level - 1, &first_key); gen, parent_level - 1, &first_key);
if (!IS_ERR(tmp)) { if (IS_ERR(tmp)) {
/* btrfs_release_path(p);
* If the read above didn't mark this buffer up to date, return PTR_ERR(tmp);
* it will never end up being up to date. Set ret to EIO now
* and give up so that our caller doesn't loop forever
* on our EAGAINs.
*/
if (!extent_buffer_uptodate(tmp))
ret = -EIO;
free_extent_buffer(tmp);
} else {
ret = PTR_ERR(tmp);
} }
/*
* If the read above didn't mark this buffer up to date,
* it will never end up being up to date. Set ret to EIO now
* and give up so that our caller doesn't loop forever
* on our EAGAINs.
*/
if (!extent_buffer_uptodate(tmp))
ret = -EIO;
free_extent_buffer(tmp);
btrfs_release_path(p); btrfs_release_path(p);
return ret; return ret;
@ -2990,16 +2992,11 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
if (free_space < data_size) if (free_space < data_size)
goto out_unlock; goto out_unlock;
/* cow and double check */
ret = btrfs_cow_block(trans, root, right, upper, ret = btrfs_cow_block(trans, root, right, upper,
slot + 1, &right, BTRFS_NESTING_RIGHT_COW); slot + 1, &right, BTRFS_NESTING_RIGHT_COW);
if (ret) if (ret)
goto out_unlock; goto out_unlock;
free_space = btrfs_leaf_free_space(right);
if (free_space < data_size)
goto out_unlock;
left_nritems = btrfs_header_nritems(left); left_nritems = btrfs_header_nritems(left);
if (left_nritems == 0) if (left_nritems == 0)
goto out_unlock; goto out_unlock;
@ -3224,7 +3221,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
goto out; goto out;
} }
/* cow and double check */
ret = btrfs_cow_block(trans, root, left, ret = btrfs_cow_block(trans, root, left,
path->nodes[1], slot - 1, &left, path->nodes[1], slot - 1, &left,
BTRFS_NESTING_LEFT_COW); BTRFS_NESTING_LEFT_COW);
@ -3235,12 +3231,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
goto out; goto out;
} }
free_space = btrfs_leaf_free_space(left);
if (free_space < data_size) {
ret = 1;
goto out;
}
if (check_sibling_keys(left, right)) { if (check_sibling_keys(left, right)) {
ret = -EUCLEAN; ret = -EUCLEAN;
goto out; goto out;
@ -4170,24 +4160,22 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
{ {
struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *leaf; struct extent_buffer *leaf;
u32 last_off;
u32 dsize = 0;
int ret = 0; int ret = 0;
int wret; int wret;
int i;
u32 nritems; u32 nritems;
leaf = path->nodes[0]; leaf = path->nodes[0];
last_off = btrfs_item_offset(leaf, slot + nr - 1);
for (i = 0; i < nr; i++)
dsize += btrfs_item_size(leaf, slot + i);
nritems = btrfs_header_nritems(leaf); nritems = btrfs_header_nritems(leaf);
if (slot + nr != nritems) { if (slot + nr != nritems) {
int data_end = leaf_data_end(leaf); const u32 last_off = btrfs_item_offset(leaf, slot + nr - 1);
const int data_end = leaf_data_end(leaf);
struct btrfs_map_token token; struct btrfs_map_token token;
u32 dsize = 0;
int i;
for (i = 0; i < nr; i++)
dsize += btrfs_item_size(leaf, slot + i);
memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET + memmove_extent_buffer(leaf, BTRFS_LEAF_DATA_OFFSET +
data_end + dsize, data_end + dsize,
@ -4227,24 +4215,50 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
fixup_low_keys(path, &disk_key, 1); fixup_low_keys(path, &disk_key, 1);
} }
/* delete the leaf if it is mostly empty */ /*
* Try to delete the leaf if it is mostly empty. We do this by
* trying to move all its items into its left and right neighbours.
* If we can't move all the items, then we don't delete it - it's
* not ideal, but future insertions might fill the leaf with more
* items, or items from other leaves might be moved later into our
* leaf due to deletions on those leaves.
*/
if (used < BTRFS_LEAF_DATA_SIZE(fs_info) / 3) { if (used < BTRFS_LEAF_DATA_SIZE(fs_info) / 3) {
u32 min_push_space;
/* push_leaf_left fixes the path. /* push_leaf_left fixes the path.
* make sure the path still points to our leaf * make sure the path still points to our leaf
* for possible call to del_ptr below * for possible call to del_ptr below
*/ */
slot = path->slots[1]; slot = path->slots[1];
atomic_inc(&leaf->refs); atomic_inc(&leaf->refs);
/*
wret = push_leaf_left(trans, root, path, 1, 1, * We want to be able to at least push one item to the
1, (u32)-1); * left neighbour leaf, and that's the first item.
*/
min_push_space = sizeof(struct btrfs_item) +
btrfs_item_size(leaf, 0);
wret = push_leaf_left(trans, root, path, 0,
min_push_space, 1, (u32)-1);
if (wret < 0 && wret != -ENOSPC) if (wret < 0 && wret != -ENOSPC)
ret = wret; ret = wret;
if (path->nodes[0] == leaf && if (path->nodes[0] == leaf &&
btrfs_header_nritems(leaf)) { btrfs_header_nritems(leaf)) {
wret = push_leaf_right(trans, root, path, 1, /*
1, 1, 0); * If we were not able to push all items from our
* leaf to its left neighbour, then attempt to
* either push all the remaining items to the
* right neighbour or none. There's no advantage
* in pushing only some items, instead of all, as
* it's pointless to end up with a leaf having
* too few items while the neighbours can be full
* or nearly full.
*/
nritems = btrfs_header_nritems(leaf);
min_push_space = leaf_space_used(leaf, 0, nritems);
wret = push_leaf_right(trans, root, path, 0,
min_push_space, 1, 0);
if (wret < 0 && wret != -ENOSPC) if (wret < 0 && wret != -ENOSPC)
ret = wret; ret = wret;
} }

View File

@ -49,6 +49,7 @@ extern struct kmem_cache *btrfs_free_space_bitmap_cachep;
struct btrfs_ordered_sum; struct btrfs_ordered_sum;
struct btrfs_ref; struct btrfs_ref;
struct btrfs_bio; struct btrfs_bio;
struct btrfs_ioctl_encoded_io_args;
#define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */ #define BTRFS_MAGIC 0x4D5F53665248425FULL /* ascii _BHRfS_M, no null */
@ -148,6 +149,8 @@ enum {
/* Indicates there was an error cleaning up a log tree. */ /* Indicates there was an error cleaning up a log tree. */
BTRFS_FS_STATE_LOG_CLEANUP_ERROR, BTRFS_FS_STATE_LOG_CLEANUP_ERROR,
BTRFS_FS_STATE_COUNT
}; };
#define BTRFS_BACKREF_REV_MAX 256 #define BTRFS_BACKREF_REV_MAX 256
@ -274,8 +277,14 @@ struct btrfs_super_block {
/* the UUID written into btree blocks */ /* the UUID written into btree blocks */
u8 metadata_uuid[BTRFS_FSID_SIZE]; u8 metadata_uuid[BTRFS_FSID_SIZE];
/* Extent tree v2 */
__le64 block_group_root;
__le64 block_group_root_generation;
u8 block_group_root_level;
/* future expansion */ /* future expansion */
__le64 reserved[28]; u8 reserved8[7];
__le64 reserved[25];
u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE]; u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS]; struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS];
@ -300,6 +309,26 @@ static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE);
#define BTRFS_FEATURE_COMPAT_RO_SAFE_SET 0ULL #define BTRFS_FEATURE_COMPAT_RO_SAFE_SET 0ULL
#define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR 0ULL #define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR 0ULL
#ifdef CONFIG_BTRFS_DEBUG
/*
* Extent tree v2 supported only with CONFIG_BTRFS_DEBUG
*/
#define BTRFS_FEATURE_INCOMPAT_SUPP \
(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \
BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \
BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \
BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD | \
BTRFS_FEATURE_INCOMPAT_RAID56 | \
BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \
BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \
BTRFS_FEATURE_INCOMPAT_NO_HOLES | \
BTRFS_FEATURE_INCOMPAT_METADATA_UUID | \
BTRFS_FEATURE_INCOMPAT_RAID1C34 | \
BTRFS_FEATURE_INCOMPAT_ZONED | \
BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2)
#else
#define BTRFS_FEATURE_INCOMPAT_SUPP \ #define BTRFS_FEATURE_INCOMPAT_SUPP \
(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \
BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \
@ -314,6 +343,7 @@ static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE);
BTRFS_FEATURE_INCOMPAT_METADATA_UUID | \ BTRFS_FEATURE_INCOMPAT_METADATA_UUID | \
BTRFS_FEATURE_INCOMPAT_RAID1C34 | \ BTRFS_FEATURE_INCOMPAT_RAID1C34 | \
BTRFS_FEATURE_INCOMPAT_ZONED) BTRFS_FEATURE_INCOMPAT_ZONED)
#endif
#define BTRFS_FEATURE_INCOMPAT_SAFE_SET \ #define BTRFS_FEATURE_INCOMPAT_SAFE_SET \
(BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF) (BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF)
@ -636,6 +666,7 @@ struct btrfs_fs_info {
struct btrfs_root *quota_root; struct btrfs_root *quota_root;
struct btrfs_root *uuid_root; struct btrfs_root *uuid_root;
struct btrfs_root *data_reloc_root; struct btrfs_root *data_reloc_root;
struct btrfs_root *block_group_root;
/* the log root tree is a directory of all the other log roots */ /* the log root tree is a directory of all the other log roots */
struct btrfs_root *log_root_tree; struct btrfs_root *log_root_tree;
@ -1030,6 +1061,8 @@ struct btrfs_fs_info {
spinlock_t relocation_bg_lock; spinlock_t relocation_bg_lock;
u64 data_reloc_bg; u64 data_reloc_bg;
u64 nr_global_roots;
spinlock_t zone_active_bgs_lock; spinlock_t zone_active_bgs_lock;
struct list_head zone_active_bgs; struct list_head zone_active_bgs;
@ -1609,25 +1642,25 @@ DECLARE_BTRFS_SETGET_BITS(64)
static inline u##bits btrfs_##name(const struct extent_buffer *eb, \ static inline u##bits btrfs_##name(const struct extent_buffer *eb, \
const type *s) \ const type *s) \
{ \ { \
BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \ static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \
return btrfs_get_##bits(eb, s, offsetof(type, member)); \ return btrfs_get_##bits(eb, s, offsetof(type, member)); \
} \ } \
static inline void btrfs_set_##name(const struct extent_buffer *eb, type *s, \ static inline void btrfs_set_##name(const struct extent_buffer *eb, type *s, \
u##bits val) \ u##bits val) \
{ \ { \
BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \ static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \
btrfs_set_##bits(eb, s, offsetof(type, member), val); \ btrfs_set_##bits(eb, s, offsetof(type, member), val); \
} \ } \
static inline u##bits btrfs_token_##name(struct btrfs_map_token *token, \ static inline u##bits btrfs_token_##name(struct btrfs_map_token *token, \
const type *s) \ const type *s) \
{ \ { \
BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \ static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \
return btrfs_get_token_##bits(token, s, offsetof(type, member));\ return btrfs_get_token_##bits(token, s, offsetof(type, member));\
} \ } \
static inline void btrfs_set_token_##name(struct btrfs_map_token *token,\ static inline void btrfs_set_token_##name(struct btrfs_map_token *token,\
type *s, u##bits val) \ type *s, u##bits val) \
{ \ { \
BUILD_BUG_ON(sizeof(u##bits) != sizeof(((type *)0))->member); \ static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \
btrfs_set_token_##bits(token, s, offsetof(type, member), val); \ btrfs_set_token_##bits(token, s, offsetof(type, member), val); \
} }
@ -1658,8 +1691,8 @@ static inline void btrfs_set_##name(type *s, u##bits val) \
static inline u64 btrfs_device_total_bytes(const struct extent_buffer *eb, static inline u64 btrfs_device_total_bytes(const struct extent_buffer *eb,
struct btrfs_dev_item *s) struct btrfs_dev_item *s)
{ {
BUILD_BUG_ON(sizeof(u64) != static_assert(sizeof(u64) ==
sizeof(((struct btrfs_dev_item *)0))->total_bytes); sizeof(((struct btrfs_dev_item *)0))->total_bytes);
return btrfs_get_64(eb, s, offsetof(struct btrfs_dev_item, return btrfs_get_64(eb, s, offsetof(struct btrfs_dev_item,
total_bytes)); total_bytes));
} }
@ -1667,8 +1700,8 @@ static inline void btrfs_set_device_total_bytes(const struct extent_buffer *eb,
struct btrfs_dev_item *s, struct btrfs_dev_item *s,
u64 val) u64 val)
{ {
BUILD_BUG_ON(sizeof(u64) != static_assert(sizeof(u64) ==
sizeof(((struct btrfs_dev_item *)0))->total_bytes); sizeof(((struct btrfs_dev_item *)0))->total_bytes);
WARN_ON(!IS_ALIGNED(val, eb->fs_info->sectorsize)); WARN_ON(!IS_ALIGNED(val, eb->fs_info->sectorsize));
btrfs_set_64(eb, s, offsetof(struct btrfs_dev_item, total_bytes), val); btrfs_set_64(eb, s, offsetof(struct btrfs_dev_item, total_bytes), val);
} }
@ -2328,6 +2361,17 @@ BTRFS_SETGET_STACK_FUNCS(backup_bytes_used, struct btrfs_root_backup,
BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup, BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup,
num_devices, 64); num_devices, 64);
/*
* For extent tree v2 we overload the extent root with the block group root, as
* we will have multiple extent roots.
*/
BTRFS_SETGET_STACK_FUNCS(backup_block_group_root, struct btrfs_root_backup,
extent_root, 64);
BTRFS_SETGET_STACK_FUNCS(backup_block_group_root_gen, struct btrfs_root_backup,
extent_root_gen, 64);
BTRFS_SETGET_STACK_FUNCS(backup_block_group_root_level,
struct btrfs_root_backup, extent_root_level, 8);
/* struct btrfs_balance_item */ /* struct btrfs_balance_item */
BTRFS_SETGET_FUNCS(balance_flags, struct btrfs_balance_item, flags, 64); BTRFS_SETGET_FUNCS(balance_flags, struct btrfs_balance_item, flags, 64);
@ -2462,6 +2506,13 @@ BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
BTRFS_SETGET_STACK_FUNCS(super_magic, struct btrfs_super_block, magic, 64); BTRFS_SETGET_STACK_FUNCS(super_magic, struct btrfs_super_block, magic, 64);
BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block, BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block,
uuid_tree_generation, 64); uuid_tree_generation, 64);
BTRFS_SETGET_STACK_FUNCS(super_block_group_root, struct btrfs_super_block,
block_group_root, 64);
BTRFS_SETGET_STACK_FUNCS(super_block_group_root_generation,
struct btrfs_super_block,
block_group_root_generation, 64);
BTRFS_SETGET_STACK_FUNCS(super_block_group_root_level, struct btrfs_super_block,
block_group_root_level, 8);
int btrfs_super_csum_size(const struct btrfs_super_block *s); int btrfs_super_csum_size(const struct btrfs_super_block *s);
const char *btrfs_super_csum_name(u16 csum_type); const char *btrfs_super_csum_name(u16 csum_type);
@ -2839,7 +2890,8 @@ void btrfs_subvolume_release_metadata(struct btrfs_root *root,
struct btrfs_block_rsv *rsv); struct btrfs_block_rsv *rsv);
void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes); void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes);
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes); int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes,
u64 disk_num_bytes);
u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info, int btrfs_error_unpin_extent_range(struct btrfs_fs_info *fs_info,
u64 start, u64 end); u64 start, u64 end);
@ -3155,7 +3207,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
struct btrfs_ordered_sum *sums); struct btrfs_ordered_sum *sums);
blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio, blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
u64 file_start, int contig); u64 offset, bool one_ordered);
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
struct list_head *list, int search_commit); struct list_head *list, int search_commit);
void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode, void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
@ -3256,6 +3308,11 @@ int btrfs_writepage_cow_fixup(struct page *page);
void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode, void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
struct page *page, u64 start, struct page *page, u64 start,
u64 end, bool uptodate); u64 end, bool uptodate);
ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
struct btrfs_ioctl_encoded_io_args *encoded);
ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
const struct btrfs_ioctl_encoded_io_args *encoded);
extern const struct dentry_operations btrfs_dentry_operations; extern const struct dentry_operations btrfs_dentry_operations;
extern const struct iomap_ops btrfs_dio_iomap_ops; extern const struct iomap_ops btrfs_dio_iomap_ops;
extern const struct iomap_dio_ops btrfs_dio_ops; extern const struct iomap_dio_ops btrfs_dio_ops;
@ -3318,6 +3375,8 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
struct btrfs_trans_handle **trans_out); struct btrfs_trans_handle **trans_out);
int btrfs_mark_extent_written(struct btrfs_trans_handle *trans, int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode, u64 start, u64 end); struct btrfs_inode *inode, u64 start, u64 end);
ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
const struct btrfs_ioctl_encoded_io_args *encoded);
int btrfs_release_file(struct inode *inode, struct file *file); int btrfs_release_file(struct inode *inode, struct file *file);
int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages, int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
size_t num_pages, loff_t pos, size_t write_bytes, size_t num_pages, loff_t pos, size_t write_bytes,
@ -3774,7 +3833,7 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root); struct btrfs_root *root);
int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root); struct btrfs_root *root);
int btrfs_recover_relocation(struct btrfs_root *root); int btrfs_recover_relocation(struct btrfs_fs_info *fs_info);
int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len); int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len);
int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *buf, struct btrfs_root *root, struct extent_buffer *buf,

View File

@ -270,11 +270,11 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info,
} }
static void calc_inode_reservations(struct btrfs_fs_info *fs_info, static void calc_inode_reservations(struct btrfs_fs_info *fs_info,
u64 num_bytes, u64 *meta_reserve, u64 num_bytes, u64 disk_num_bytes,
u64 *qgroup_reserve) u64 *meta_reserve, u64 *qgroup_reserve)
{ {
u64 nr_extents = count_max_extents(num_bytes); u64 nr_extents = count_max_extents(num_bytes);
u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, num_bytes); u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, disk_num_bytes);
u64 inode_update = btrfs_calc_metadata_size(fs_info, 1); u64 inode_update = btrfs_calc_metadata_size(fs_info, 1);
*meta_reserve = btrfs_calc_insert_metadata_size(fs_info, *meta_reserve = btrfs_calc_insert_metadata_size(fs_info,
@ -288,7 +288,8 @@ static void calc_inode_reservations(struct btrfs_fs_info *fs_info,
*qgroup_reserve = nr_extents * fs_info->nodesize; *qgroup_reserve = nr_extents * fs_info->nodesize;
} }
int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes) int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes,
u64 disk_num_bytes)
{ {
struct btrfs_root *root = inode->root; struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_fs_info *fs_info = root->fs_info;
@ -318,6 +319,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
} }
num_bytes = ALIGN(num_bytes, fs_info->sectorsize); num_bytes = ALIGN(num_bytes, fs_info->sectorsize);
disk_num_bytes = ALIGN(disk_num_bytes, fs_info->sectorsize);
/* /*
* We always want to do it this way, every other way is wrong and ends * We always want to do it this way, every other way is wrong and ends
@ -329,8 +331,8 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
* everything out and try again, which is bad. This way we just * everything out and try again, which is bad. This way we just
* over-reserve slightly, and clean up the mess when we are done. * over-reserve slightly, and clean up the mess when we are done.
*/ */
calc_inode_reservations(fs_info, num_bytes, &meta_reserve, calc_inode_reservations(fs_info, num_bytes, disk_num_bytes,
&qgroup_reserve); &meta_reserve, &qgroup_reserve);
ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true); ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserve, true);
if (ret) if (ret)
return ret; return ret;
@ -349,7 +351,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes)
spin_lock(&inode->lock); spin_lock(&inode->lock);
nr_extents = count_max_extents(num_bytes); nr_extents = count_max_extents(num_bytes);
btrfs_mod_outstanding_extents(inode, nr_extents); btrfs_mod_outstanding_extents(inode, nr_extents);
inode->csum_bytes += num_bytes; inode->csum_bytes += disk_num_bytes;
btrfs_calculate_inode_block_rsv_size(fs_info, inode); btrfs_calculate_inode_block_rsv_size(fs_info, inode);
spin_unlock(&inode->lock); spin_unlock(&inode->lock);
@ -454,7 +456,7 @@ int btrfs_delalloc_reserve_space(struct btrfs_inode *inode,
ret = btrfs_check_data_free_space(inode, reserved, start, len); ret = btrfs_check_data_free_space(inode, reserved, start, len);
if (ret < 0) if (ret < 0)
return ret; return ret;
ret = btrfs_delalloc_reserve_metadata(inode, len); ret = btrfs_delalloc_reserve_metadata(inode, len, len);
if (ret < 0) { if (ret < 0) {
btrfs_free_reserved_data_space(inode, *reserved, start, len); btrfs_free_reserved_data_space(inode, *reserved, start, len);
extent_changeset_free(*reserved); extent_changeset_free(*reserved);

View File

@ -243,6 +243,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
struct btrfs_device *srcdev, struct btrfs_device *srcdev,
struct btrfs_device **device_out) struct btrfs_device **device_out)
{ {
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
struct btrfs_device *device; struct btrfs_device *device;
struct block_device *bdev; struct block_device *bdev;
struct rcu_string *name; struct rcu_string *name;
@ -271,7 +272,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
sync_blockdev(bdev); sync_blockdev(bdev);
list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) { list_for_each_entry(device, &fs_devices->devices, dev_list) {
if (device->bdev == bdev) { if (device->bdev == bdev) {
btrfs_err(fs_info, btrfs_err(fs_info,
"target device is in the filesystem!"); "target device is in the filesystem!");
@ -302,6 +303,9 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
goto error; goto error;
} }
rcu_assign_pointer(device->name, name); rcu_assign_pointer(device->name, name);
ret = lookup_bdev(device_path, &device->devt);
if (ret)
goto error;
set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
device->generation = 0; device->generation = 0;
@ -320,17 +324,17 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
device->mode = FMODE_EXCL; device->mode = FMODE_EXCL;
device->dev_stats_valid = 1; device->dev_stats_valid = 1;
set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE); set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
device->fs_devices = fs_info->fs_devices; device->fs_devices = fs_devices;
ret = btrfs_get_dev_zone_info(device, false); ret = btrfs_get_dev_zone_info(device, false);
if (ret) if (ret)
goto error; goto error;
mutex_lock(&fs_info->fs_devices->device_list_mutex); mutex_lock(&fs_devices->device_list_mutex);
list_add(&device->dev_list, &fs_info->fs_devices->devices); list_add(&device->dev_list, &fs_devices->devices);
fs_info->fs_devices->num_devices++; fs_devices->num_devices++;
fs_info->fs_devices->open_devices++; fs_devices->open_devices++;
mutex_unlock(&fs_info->fs_devices->device_list_mutex); mutex_unlock(&fs_devices->device_list_mutex);
*device_out = device; *device_out = device;
return 0; return 0;

View File

@ -441,17 +441,31 @@ static int csum_one_extent_buffer(struct extent_buffer *eb)
else else
ret = btrfs_check_leaf_full(eb); ret = btrfs_check_leaf_full(eb);
if (ret < 0) { if (ret < 0)
btrfs_print_tree(eb, 0); goto error;
/*
* Also check the generation, the eb reached here must be newer than
* last committed. Or something seriously wrong happened.
*/
if (unlikely(btrfs_header_generation(eb) <= fs_info->last_trans_committed)) {
ret = -EUCLEAN;
btrfs_err(fs_info, btrfs_err(fs_info,
"block=%llu write time tree block corruption detected", "block=%llu bad generation, have %llu expect > %llu",
eb->start); eb->start, btrfs_header_generation(eb),
WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); fs_info->last_trans_committed);
return ret; goto error;
} }
write_extent_buffer(eb, result, 0, fs_info->csum_size); write_extent_buffer(eb, result, 0, fs_info->csum_size);
return 0; return 0;
error:
btrfs_print_tree(eb, 0);
btrfs_err(fs_info, "block=%llu write time tree block corruption detected",
eb->start);
WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
return ret;
} }
/* Checksum all dirty extent buffers in one bio_vec */ /* Checksum all dirty extent buffers in one bio_vec */
@ -1289,12 +1303,33 @@ struct btrfs_root *btrfs_global_root(struct btrfs_fs_info *fs_info,
return root; return root;
} }
static u64 btrfs_global_root_id(struct btrfs_fs_info *fs_info, u64 bytenr)
{
struct btrfs_block_group *block_group;
u64 ret;
if (!btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
return 0;
if (bytenr)
block_group = btrfs_lookup_block_group(fs_info, bytenr);
else
block_group = btrfs_lookup_first_block_group(fs_info, bytenr);
ASSERT(block_group);
if (!block_group)
return 0;
ret = block_group->global_root_id;
btrfs_put_block_group(block_group);
return ret;
}
struct btrfs_root *btrfs_csum_root(struct btrfs_fs_info *fs_info, u64 bytenr) struct btrfs_root *btrfs_csum_root(struct btrfs_fs_info *fs_info, u64 bytenr)
{ {
struct btrfs_key key = { struct btrfs_key key = {
.objectid = BTRFS_CSUM_TREE_OBJECTID, .objectid = BTRFS_CSUM_TREE_OBJECTID,
.type = BTRFS_ROOT_ITEM_KEY, .type = BTRFS_ROOT_ITEM_KEY,
.offset = 0, .offset = btrfs_global_root_id(fs_info, bytenr),
}; };
return btrfs_global_root(fs_info, &key); return btrfs_global_root(fs_info, &key);
@ -1305,7 +1340,7 @@ struct btrfs_root *btrfs_extent_root(struct btrfs_fs_info *fs_info, u64 bytenr)
struct btrfs_key key = { struct btrfs_key key = {
.objectid = BTRFS_EXTENT_TREE_OBJECTID, .objectid = BTRFS_EXTENT_TREE_OBJECTID,
.type = BTRFS_ROOT_ITEM_KEY, .type = BTRFS_ROOT_ITEM_KEY,
.offset = 0, .offset = btrfs_global_root_id(fs_info, bytenr),
}; };
return btrfs_global_root(fs_info, &key); return btrfs_global_root(fs_info, &key);
@ -1522,7 +1557,8 @@ static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root,
ret = PTR_ERR(root->node); ret = PTR_ERR(root->node);
root->node = NULL; root->node = NULL;
goto fail; goto fail;
} else if (!btrfs_buffer_uptodate(root->node, generation, 0)) { }
if (!btrfs_buffer_uptodate(root->node, generation, 0)) {
ret = -EIO; ret = -EIO;
goto fail; goto fail;
} }
@ -1727,6 +1763,7 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
btrfs_put_root(fs_info->uuid_root); btrfs_put_root(fs_info->uuid_root);
btrfs_put_root(fs_info->fs_root); btrfs_put_root(fs_info->fs_root);
btrfs_put_root(fs_info->data_reloc_root); btrfs_put_root(fs_info->data_reloc_root);
btrfs_put_root(fs_info->block_group_root);
btrfs_check_leaked_roots(fs_info); btrfs_check_leaked_roots(fs_info);
btrfs_extent_buffer_leak_debug_check(fs_info); btrfs_extent_buffer_leak_debug_check(fs_info);
kfree(fs_info->super_copy); kfree(fs_info->super_copy);
@ -1925,8 +1962,7 @@ static void end_workqueue_fn(struct btrfs_work *work)
static int cleaner_kthread(void *arg) static int cleaner_kthread(void *arg)
{ {
struct btrfs_root *root = arg; struct btrfs_fs_info *fs_info = (struct btrfs_fs_info *)arg;
struct btrfs_fs_info *fs_info = root->fs_info;
int again; int again;
while (1) { while (1) {
@ -1959,7 +1995,7 @@ static int cleaner_kthread(void *arg)
btrfs_run_delayed_iputs(fs_info); btrfs_run_delayed_iputs(fs_info);
again = btrfs_clean_one_deleted_snapshot(root); again = btrfs_clean_one_deleted_snapshot(fs_info);
mutex_unlock(&fs_info->cleaner_mutex); mutex_unlock(&fs_info->cleaner_mutex);
/* /*
@ -2095,8 +2131,6 @@ static void backup_super_roots(struct btrfs_fs_info *info)
{ {
const int next_backup = info->backup_root_index; const int next_backup = info->backup_root_index;
struct btrfs_root_backup *root_backup; struct btrfs_root_backup *root_backup;
struct btrfs_root *extent_root = btrfs_extent_root(info, 0);
struct btrfs_root *csum_root = btrfs_csum_root(info, 0);
root_backup = info->super_for_commit->super_roots + next_backup; root_backup = info->super_for_commit->super_roots + next_backup;
@ -2121,11 +2155,30 @@ static void backup_super_roots(struct btrfs_fs_info *info)
btrfs_set_backup_chunk_root_level(root_backup, btrfs_set_backup_chunk_root_level(root_backup,
btrfs_header_level(info->chunk_root->node)); btrfs_header_level(info->chunk_root->node));
btrfs_set_backup_extent_root(root_backup, extent_root->node->start); if (btrfs_fs_incompat(info, EXTENT_TREE_V2)) {
btrfs_set_backup_extent_root_gen(root_backup, btrfs_set_backup_block_group_root(root_backup,
btrfs_header_generation(extent_root->node)); info->block_group_root->node->start);
btrfs_set_backup_extent_root_level(root_backup, btrfs_set_backup_block_group_root_gen(root_backup,
btrfs_header_level(extent_root->node)); btrfs_header_generation(info->block_group_root->node));
btrfs_set_backup_block_group_root_level(root_backup,
btrfs_header_level(info->block_group_root->node));
} else {
struct btrfs_root *extent_root = btrfs_extent_root(info, 0);
struct btrfs_root *csum_root = btrfs_csum_root(info, 0);
btrfs_set_backup_extent_root(root_backup,
extent_root->node->start);
btrfs_set_backup_extent_root_gen(root_backup,
btrfs_header_generation(extent_root->node));
btrfs_set_backup_extent_root_level(root_backup,
btrfs_header_level(extent_root->node));
btrfs_set_backup_csum_root(root_backup, csum_root->node->start);
btrfs_set_backup_csum_root_gen(root_backup,
btrfs_header_generation(csum_root->node));
btrfs_set_backup_csum_root_level(root_backup,
btrfs_header_level(csum_root->node));
}
/* /*
* we might commit during log recovery, which happens before we set * we might commit during log recovery, which happens before we set
@ -2146,12 +2199,6 @@ static void backup_super_roots(struct btrfs_fs_info *info)
btrfs_set_backup_dev_root_level(root_backup, btrfs_set_backup_dev_root_level(root_backup,
btrfs_header_level(info->dev_root->node)); btrfs_header_level(info->dev_root->node));
btrfs_set_backup_csum_root(root_backup, csum_root->node->start);
btrfs_set_backup_csum_root_gen(root_backup,
btrfs_header_generation(csum_root->node));
btrfs_set_backup_csum_root_level(root_backup,
btrfs_header_level(csum_root->node));
btrfs_set_backup_total_bytes(root_backup, btrfs_set_backup_total_bytes(root_backup,
btrfs_super_total_bytes(info->super_copy)); btrfs_super_total_bytes(info->super_copy));
btrfs_set_backup_bytes_used(root_backup, btrfs_set_backup_bytes_used(root_backup,
@ -2269,6 +2316,7 @@ static void free_root_pointers(struct btrfs_fs_info *info, bool free_chunk_root)
free_root_extent_buffers(info->uuid_root); free_root_extent_buffers(info->uuid_root);
free_root_extent_buffers(info->fs_root); free_root_extent_buffers(info->fs_root);
free_root_extent_buffers(info->data_reloc_root); free_root_extent_buffers(info->data_reloc_root);
free_root_extent_buffers(info->block_group_root);
if (free_chunk_root) if (free_chunk_root)
free_root_extent_buffers(info->chunk_root); free_root_extent_buffers(info->chunk_root);
} }
@ -2504,11 +2552,13 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
log_tree_root->node = NULL; log_tree_root->node = NULL;
btrfs_put_root(log_tree_root); btrfs_put_root(log_tree_root);
return ret; return ret;
} else if (!extent_buffer_uptodate(log_tree_root->node)) { }
if (!extent_buffer_uptodate(log_tree_root->node)) {
btrfs_err(fs_info, "failed to read log tree"); btrfs_err(fs_info, "failed to read log tree");
btrfs_put_root(log_tree_root); btrfs_put_root(log_tree_root);
return -EIO; return -EIO;
} }
/* returns with log_tree_root freed on success */ /* returns with log_tree_root freed on success */
ret = btrfs_recover_log_trees(log_tree_root); ret = btrfs_recover_log_trees(log_tree_root);
if (ret) { if (ret) {
@ -2533,6 +2583,7 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
{ {
struct btrfs_fs_info *fs_info = tree_root->fs_info; struct btrfs_fs_info *fs_info = tree_root->fs_info;
struct btrfs_root *root; struct btrfs_root *root;
u64 max_global_id = 0;
int ret; int ret;
struct btrfs_key key = { struct btrfs_key key = {
.objectid = objectid, .objectid = objectid,
@ -2568,6 +2619,13 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
break; break;
btrfs_release_path(path); btrfs_release_path(path);
/*
* Just worry about this for extent tree, it'll be the same for
* everybody.
*/
if (objectid == BTRFS_EXTENT_TREE_OBJECTID)
max_global_id = max(max_global_id, key.offset);
found = true; found = true;
root = read_tree_root_path(tree_root, path, &key); root = read_tree_root_path(tree_root, path, &key);
if (IS_ERR(root)) { if (IS_ERR(root)) {
@ -2585,6 +2643,9 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
} }
btrfs_release_path(path); btrfs_release_path(path);
if (objectid == BTRFS_EXTENT_TREE_OBJECTID)
fs_info->nr_global_roots = max_global_id + 1;
if (!found || ret) { if (!found || ret) {
if (objectid == BTRFS_CSUM_TREE_OBJECTID) if (objectid == BTRFS_CSUM_TREE_OBJECTID)
set_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state); set_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
@ -2930,6 +2991,56 @@ static int btrfs_validate_write_super(struct btrfs_fs_info *fs_info,
return ret; return ret;
} }
static int load_super_root(struct btrfs_root *root, u64 bytenr, u64 gen, int level)
{
int ret = 0;
root->node = read_tree_block(root->fs_info, bytenr,
root->root_key.objectid, gen, level, NULL);
if (IS_ERR(root->node)) {
ret = PTR_ERR(root->node);
root->node = NULL;
return ret;
}
if (!extent_buffer_uptodate(root->node)) {
free_extent_buffer(root->node);
root->node = NULL;
return -EIO;
}
btrfs_set_root_node(&root->root_item, root->node);
root->commit_root = btrfs_root_node(root);
btrfs_set_root_refs(&root->root_item, 1);
return ret;
}
static int load_important_roots(struct btrfs_fs_info *fs_info)
{
struct btrfs_super_block *sb = fs_info->super_copy;
u64 gen, bytenr;
int level, ret;
bytenr = btrfs_super_root(sb);
gen = btrfs_super_generation(sb);
level = btrfs_super_root_level(sb);
ret = load_super_root(fs_info->tree_root, bytenr, gen, level);
if (ret) {
btrfs_warn(fs_info, "couldn't read tree root");
return ret;
}
if (!btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
return 0;
bytenr = btrfs_super_block_group_root(sb);
gen = btrfs_super_block_group_root_generation(sb);
level = btrfs_super_block_group_root_level(sb);
ret = load_super_root(fs_info->block_group_root, bytenr, gen, level);
if (ret)
btrfs_warn(fs_info, "couldn't read block group root");
return ret;
}
static int __cold init_tree_roots(struct btrfs_fs_info *fs_info) static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
{ {
int backup_index = find_newest_super_backup(fs_info); int backup_index = find_newest_super_backup(fs_info);
@ -2939,10 +3050,17 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
int ret = 0; int ret = 0;
int i; int i;
for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) { if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
u64 generation; struct btrfs_root *root;
int level;
root = btrfs_alloc_root(fs_info, BTRFS_BLOCK_GROUP_TREE_OBJECTID,
GFP_KERNEL);
if (!root)
return -ENOMEM;
fs_info->block_group_root = root;
}
for (i = 0; i < BTRFS_NUM_BACKUP_ROOTS; i++) {
if (handle_error) { if (handle_error) {
if (!IS_ERR(tree_root->node)) if (!IS_ERR(tree_root->node))
free_extent_buffer(tree_root->node); free_extent_buffer(tree_root->node);
@ -2967,29 +3085,13 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
if (ret < 0) if (ret < 0)
return ret; return ret;
} }
generation = btrfs_super_generation(sb);
level = btrfs_super_root_level(sb);
tree_root->node = read_tree_block(fs_info, btrfs_super_root(sb),
BTRFS_ROOT_TREE_OBJECTID,
generation, level, NULL);
if (IS_ERR(tree_root->node)) {
handle_error = true;
ret = PTR_ERR(tree_root->node);
tree_root->node = NULL;
btrfs_warn(fs_info, "couldn't read tree root");
continue;
} else if (!extent_buffer_uptodate(tree_root->node)) { ret = load_important_roots(fs_info);
if (ret) {
handle_error = true; handle_error = true;
ret = -EIO;
btrfs_warn(fs_info, "error while reading tree root");
continue; continue;
} }
btrfs_set_root_node(&tree_root->root_item, tree_root->node);
tree_root->commit_root = btrfs_root_node(tree_root);
btrfs_set_root_refs(&tree_root->root_item, 1);
/* /*
* No need to hold btrfs_root::objectid_mutex since the fs * No need to hold btrfs_root::objectid_mutex since the fs
* hasn't been fully initialised and we are the only user * hasn't been fully initialised and we are the only user
@ -3009,8 +3111,8 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
} }
/* All successful */ /* All successful */
fs_info->generation = generation; fs_info->generation = btrfs_header_generation(tree_root->node);
fs_info->last_trans_committed = generation; fs_info->last_trans_committed = fs_info->generation;
fs_info->last_reloc_trans = 0; fs_info->last_reloc_trans = 0;
/* Always begin writing backup roots after the one being used */ /* Always begin writing backup roots after the one being used */
@ -3293,7 +3395,7 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info)
up_read(&fs_info->cleanup_work_sem); up_read(&fs_info->cleanup_work_sem);
mutex_lock(&fs_info->cleaner_mutex); mutex_lock(&fs_info->cleaner_mutex);
ret = btrfs_recover_relocation(fs_info->tree_root); ret = btrfs_recover_relocation(fs_info);
mutex_unlock(&fs_info->cleaner_mutex); mutex_unlock(&fs_info->cleaner_mutex);
if (ret < 0) { if (ret < 0) {
btrfs_warn(fs_info, "failed to recover relocation: %d", ret); btrfs_warn(fs_info, "failed to recover relocation: %d", ret);
@ -3594,21 +3696,12 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
generation = btrfs_super_chunk_root_generation(disk_super); generation = btrfs_super_chunk_root_generation(disk_super);
level = btrfs_super_chunk_root_level(disk_super); level = btrfs_super_chunk_root_level(disk_super);
ret = load_super_root(chunk_root, btrfs_super_chunk_root(disk_super),
chunk_root->node = read_tree_block(fs_info, generation, level);
btrfs_super_chunk_root(disk_super), if (ret) {
BTRFS_CHUNK_TREE_OBJECTID,
generation, level, NULL);
if (IS_ERR(chunk_root->node) ||
!extent_buffer_uptodate(chunk_root->node)) {
btrfs_err(fs_info, "failed to read chunk root"); btrfs_err(fs_info, "failed to read chunk root");
if (!IS_ERR(chunk_root->node))
free_extent_buffer(chunk_root->node);
chunk_root->node = NULL;
goto fail_tree_roots; goto fail_tree_roots;
} }
btrfs_set_root_node(&chunk_root->root_item, chunk_root->node);
chunk_root->commit_root = btrfs_root_node(chunk_root);
read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid, read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid,
offsetof(struct btrfs_header, chunk_tree_uuid), offsetof(struct btrfs_header, chunk_tree_uuid),
@ -3728,7 +3821,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
goto fail_sysfs; goto fail_sysfs;
} }
fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, fs_info->cleaner_kthread = kthread_run(cleaner_kthread, fs_info,
"btrfs-cleaner"); "btrfs-cleaner");
if (IS_ERR(fs_info->cleaner_kthread)) if (IS_ERR(fs_info->cleaner_kthread))
goto fail_sysfs; goto fail_sysfs;

View File

@ -111,6 +111,8 @@ static inline struct btrfs_root *btrfs_grab_root(struct btrfs_root *root)
static inline struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info) static inline struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info)
{ {
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
return fs_info->block_group_root;
return btrfs_extent_root(fs_info, 0); return btrfs_extent_root(fs_info, 0);
} }

View File

@ -598,7 +598,7 @@ static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans,
static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
struct btrfs_path *path, struct btrfs_path *path,
int refs_to_drop, int *last_ref) int refs_to_drop)
{ {
struct btrfs_key key; struct btrfs_key key;
struct btrfs_extent_data_ref *ref1 = NULL; struct btrfs_extent_data_ref *ref1 = NULL;
@ -631,7 +631,6 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
if (num_refs == 0) { if (num_refs == 0) {
ret = btrfs_del_item(trans, root, path); ret = btrfs_del_item(trans, root, path);
*last_ref = 1;
} else { } else {
if (key.type == BTRFS_EXTENT_DATA_REF_KEY) if (key.type == BTRFS_EXTENT_DATA_REF_KEY)
btrfs_set_extent_data_ref_count(leaf, ref1, num_refs); btrfs_set_extent_data_ref_count(leaf, ref1, num_refs);
@ -1072,8 +1071,7 @@ static noinline_for_stack
void update_inline_extent_backref(struct btrfs_path *path, void update_inline_extent_backref(struct btrfs_path *path,
struct btrfs_extent_inline_ref *iref, struct btrfs_extent_inline_ref *iref,
int refs_to_mod, int refs_to_mod,
struct btrfs_delayed_extent_op *extent_op, struct btrfs_delayed_extent_op *extent_op)
int *last_ref)
{ {
struct extent_buffer *leaf = path->nodes[0]; struct extent_buffer *leaf = path->nodes[0];
struct btrfs_extent_item *ei; struct btrfs_extent_item *ei;
@ -1121,7 +1119,6 @@ void update_inline_extent_backref(struct btrfs_path *path,
else else
btrfs_set_shared_data_ref_count(leaf, sref, refs); btrfs_set_shared_data_ref_count(leaf, sref, refs);
} else { } else {
*last_ref = 1;
size = btrfs_extent_inline_ref_size(type); size = btrfs_extent_inline_ref_size(type);
item_size = btrfs_item_size(leaf, path->slots[0]); item_size = btrfs_item_size(leaf, path->slots[0]);
ptr = (unsigned long)iref; ptr = (unsigned long)iref;
@ -1166,8 +1163,7 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
} }
return -EUCLEAN; return -EUCLEAN;
} }
update_inline_extent_backref(path, iref, refs_to_add, update_inline_extent_backref(path, iref, refs_to_add, extent_op);
extent_op, NULL);
} else if (ret == -ENOENT) { } else if (ret == -ENOENT) {
setup_inline_extent_backref(trans->fs_info, path, iref, parent, setup_inline_extent_backref(trans->fs_info, path, iref, parent,
root_objectid, owner, offset, root_objectid, owner, offset,
@ -1181,21 +1177,17 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct btrfs_root *root,
struct btrfs_path *path, struct btrfs_path *path,
struct btrfs_extent_inline_ref *iref, struct btrfs_extent_inline_ref *iref,
int refs_to_drop, int is_data, int *last_ref) int refs_to_drop, int is_data)
{ {
int ret = 0; int ret = 0;
BUG_ON(!is_data && refs_to_drop != 1); BUG_ON(!is_data && refs_to_drop != 1);
if (iref) { if (iref)
update_inline_extent_backref(path, iref, -refs_to_drop, NULL, update_inline_extent_backref(path, iref, -refs_to_drop, NULL);
last_ref); else if (is_data)
} else if (is_data) { ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
ret = remove_extent_data_ref(trans, root, path, refs_to_drop, else
last_ref);
} else {
*last_ref = 1;
ret = btrfs_del_item(trans, root, path); ret = btrfs_del_item(trans, root, path);
}
return ret; return ret;
} }
@ -2766,12 +2758,11 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
spin_unlock(&cache->lock); spin_unlock(&cache->lock);
if (!readonly && return_free_space && if (!readonly && return_free_space &&
global_rsv->space_info == space_info) { global_rsv->space_info == space_info) {
u64 to_add = len;
spin_lock(&global_rsv->lock); spin_lock(&global_rsv->lock);
if (!global_rsv->full) { if (!global_rsv->full) {
to_add = min(len, global_rsv->size - u64 to_add = min(len, global_rsv->size -
global_rsv->reserved); global_rsv->reserved);
global_rsv->reserved += to_add; global_rsv->reserved += to_add;
btrfs_space_info_update_bytes_may_use(fs_info, btrfs_space_info_update_bytes_may_use(fs_info,
space_info, to_add); space_info, to_add);
@ -2862,6 +2853,35 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
return 0; return 0;
} }
static int do_free_extent_accounting(struct btrfs_trans_handle *trans,
u64 bytenr, u64 num_bytes, bool is_data)
{
int ret;
if (is_data) {
struct btrfs_root *csum_root;
csum_root = btrfs_csum_root(trans->fs_info, bytenr);
ret = btrfs_del_csums(trans, csum_root, bytenr, num_bytes);
if (ret) {
btrfs_abort_transaction(trans, ret);
return ret;
}
}
ret = add_to_free_space_tree(trans, bytenr, num_bytes);
if (ret) {
btrfs_abort_transaction(trans, ret);
return ret;
}
ret = btrfs_update_block_group(trans, bytenr, num_bytes, false);
if (ret)
btrfs_abort_transaction(trans, ret);
return ret;
}
/* /*
* Drop one or more refs of @node. * Drop one or more refs of @node.
* *
@ -2943,7 +2963,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
u64 refs; u64 refs;
u64 bytenr = node->bytenr; u64 bytenr = node->bytenr;
u64 num_bytes = node->num_bytes; u64 num_bytes = node->num_bytes;
int last_ref = 0;
bool skinny_metadata = btrfs_fs_incompat(info, SKINNY_METADATA); bool skinny_metadata = btrfs_fs_incompat(info, SKINNY_METADATA);
extent_root = btrfs_extent_root(info, bytenr); extent_root = btrfs_extent_root(info, bytenr);
@ -3010,8 +3029,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
} }
/* Must be SHARED_* item, remove the backref first */ /* Must be SHARED_* item, remove the backref first */
ret = remove_extent_backref(trans, extent_root, path, ret = remove_extent_backref(trans, extent_root, path,
NULL, refs_to_drop, is_data, NULL, refs_to_drop, is_data);
&last_ref);
if (ret) { if (ret) {
btrfs_abort_transaction(trans, ret); btrfs_abort_transaction(trans, ret);
goto out; goto out;
@ -3136,8 +3154,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
} }
if (found_extent) { if (found_extent) {
ret = remove_extent_backref(trans, extent_root, path, ret = remove_extent_backref(trans, extent_root, path,
iref, refs_to_drop, is_data, iref, refs_to_drop, is_data);
&last_ref);
if (ret) { if (ret) {
btrfs_abort_transaction(trans, ret); btrfs_abort_transaction(trans, ret);
goto out; goto out;
@ -3182,7 +3199,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
} }
} }
last_ref = 1;
ret = btrfs_del_items(trans, extent_root, path, path->slots[0], ret = btrfs_del_items(trans, extent_root, path, path->slots[0],
num_to_del); num_to_del);
if (ret) { if (ret) {
@ -3191,28 +3207,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
} }
btrfs_release_path(path); btrfs_release_path(path);
if (is_data) { ret = do_free_extent_accounting(trans, bytenr, num_bytes, is_data);
struct btrfs_root *csum_root;
csum_root = btrfs_csum_root(info, bytenr);
ret = btrfs_del_csums(trans, csum_root, bytenr,
num_bytes);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
}
}
ret = add_to_free_space_tree(trans, bytenr, num_bytes);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
}
ret = btrfs_update_block_group(trans, bytenr, num_bytes, false);
if (ret) {
btrfs_abort_transaction(trans, ret);
goto out;
}
} }
btrfs_release_path(path); btrfs_release_path(path);
@ -4605,6 +4600,28 @@ int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start,
return ret; return ret;
} }
static int alloc_reserved_extent(struct btrfs_trans_handle *trans, u64 bytenr,
u64 num_bytes)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
int ret;
ret = remove_from_free_space_tree(trans, bytenr, num_bytes);
if (ret)
return ret;
ret = btrfs_update_block_group(trans, bytenr, num_bytes, true);
if (ret) {
ASSERT(!ret);
btrfs_err(fs_info, "update block group failed for %llu %llu",
bytenr, num_bytes);
return ret;
}
trace_btrfs_reserved_extent_alloc(fs_info, bytenr, num_bytes);
return 0;
}
static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
u64 parent, u64 root_objectid, u64 parent, u64 root_objectid,
u64 flags, u64 owner, u64 offset, u64 flags, u64 owner, u64 offset,
@ -4665,18 +4682,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(path->nodes[0]); btrfs_mark_buffer_dirty(path->nodes[0]);
btrfs_free_path(path); btrfs_free_path(path);
ret = remove_from_free_space_tree(trans, ins->objectid, ins->offset); return alloc_reserved_extent(trans, ins->objectid, ins->offset);
if (ret)
return ret;
ret = btrfs_update_block_group(trans, ins->objectid, ins->offset, true);
if (ret) { /* -ENOENT, logic error */
btrfs_err(fs_info, "update block group failed for %llu %llu",
ins->objectid, ins->offset);
BUG();
}
trace_btrfs_reserved_extent_alloc(fs_info, ins->objectid, ins->offset);
return ret;
} }
static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
@ -4694,7 +4700,6 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf; struct extent_buffer *leaf;
struct btrfs_delayed_tree_ref *ref; struct btrfs_delayed_tree_ref *ref;
u32 size = sizeof(*extent_item) + sizeof(*iref); u32 size = sizeof(*extent_item) + sizeof(*iref);
u64 num_bytes;
u64 flags = extent_op->flags_to_set; u64 flags = extent_op->flags_to_set;
bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA); bool skinny_metadata = btrfs_fs_incompat(fs_info, SKINNY_METADATA);
@ -4704,12 +4709,10 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
if (skinny_metadata) { if (skinny_metadata) {
extent_key.offset = ref->level; extent_key.offset = ref->level;
extent_key.type = BTRFS_METADATA_ITEM_KEY; extent_key.type = BTRFS_METADATA_ITEM_KEY;
num_bytes = fs_info->nodesize;
} else { } else {
extent_key.offset = node->num_bytes; extent_key.offset = node->num_bytes;
extent_key.type = BTRFS_EXTENT_ITEM_KEY; extent_key.type = BTRFS_EXTENT_ITEM_KEY;
size += sizeof(*block_info); size += sizeof(*block_info);
num_bytes = node->num_bytes;
} }
path = btrfs_alloc_path(); path = btrfs_alloc_path();
@ -4754,22 +4757,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf); btrfs_mark_buffer_dirty(leaf);
btrfs_free_path(path); btrfs_free_path(path);
ret = remove_from_free_space_tree(trans, extent_key.objectid, return alloc_reserved_extent(trans, node->bytenr, fs_info->nodesize);
num_bytes);
if (ret)
return ret;
ret = btrfs_update_block_group(trans, extent_key.objectid,
fs_info->nodesize, true);
if (ret) { /* -ENOENT, logic error */
btrfs_err(fs_info, "update block group failed for %llu %llu",
extent_key.objectid, extent_key.offset);
BUG();
}
trace_btrfs_reserved_extent_alloc(fs_info, extent_key.objectid,
fs_info->nodesize);
return ret;
} }
int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,

View File

@ -2610,6 +2610,7 @@ static bool btrfs_check_repairable(struct inode *inode,
* a good copy of the failed sector and if we succeed, we have setup * a good copy of the failed sector and if we succeed, we have setup
* everything for repair_io_failure to do the rest for us. * everything for repair_io_failure to do the rest for us.
*/ */
ASSERT(failed_mirror);
failrec->failed_mirror = failed_mirror; failrec->failed_mirror = failed_mirror;
failrec->this_mirror++; failrec->this_mirror++;
if (failrec->this_mirror == failed_mirror) if (failrec->this_mirror == failed_mirror)
@ -2639,7 +2640,6 @@ int btrfs_repair_one_sector(struct inode *inode,
const int icsum = bio_offset >> fs_info->sectorsize_bits; const int icsum = bio_offset >> fs_info->sectorsize_bits;
struct bio *repair_bio; struct bio *repair_bio;
struct btrfs_bio *repair_bbio; struct btrfs_bio *repair_bbio;
blk_status_t status;
btrfs_debug(fs_info, btrfs_debug(fs_info,
"repair read error: read error at %llu", start); "repair read error: read error at %llu", start);
@ -2678,13 +2678,13 @@ int btrfs_repair_one_sector(struct inode *inode,
"repair read error: submitting new read to mirror %d", "repair read error: submitting new read to mirror %d",
failrec->this_mirror); failrec->this_mirror);
status = submit_bio_hook(inode, repair_bio, failrec->this_mirror, /*
failrec->bio_flags); * At this point we have a bio, so any errors from submit_bio_hook()
if (status) { * will be handled by the endio on the repair_bio, so we can't return an
free_io_failure(failure_tree, tree, failrec); * error here.
bio_put(repair_bio); */
} submit_bio_hook(inode, repair_bio, failrec->this_mirror, failrec->bio_flags);
return blk_status_to_errno(status); return BLK_STS_OK;
} }
static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len) static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
@ -3067,6 +3067,14 @@ static void end_bio_extent_readpage(struct bio *bio)
goto readpage_ok; goto readpage_ok;
if (is_data_inode(inode)) { if (is_data_inode(inode)) {
/*
* If we failed to submit the IO at all we'll have a
* mirror_num == 0, in which case we need to just mark
* the page with an error and unlock it and carry on.
*/
if (mirror == 0)
goto readpage_ok;
/* /*
* btrfs_submit_read_repair() will handle all the good * btrfs_submit_read_repair() will handle all the good
* and bad sectors, we just continue to the next bvec. * and bad sectors, we just continue to the next bvec.
@ -3534,7 +3542,7 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
} }
em = btrfs_get_extent(BTRFS_I(inode), page, pg_offset, start, len); em = btrfs_get_extent(BTRFS_I(inode), page, pg_offset, start, len);
if (em_cached && !IS_ERR_OR_NULL(em)) { if (em_cached && !IS_ERR(em)) {
BUG_ON(*em_cached); BUG_ON(*em_cached);
refcount_inc(&em->refs); refcount_inc(&em->refs);
*em_cached = em; *em_cached = em;
@ -3563,7 +3571,6 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
u64 cur_end; u64 cur_end;
struct extent_map *em; struct extent_map *em;
int ret = 0; int ret = 0;
int nr = 0;
size_t pg_offset = 0; size_t pg_offset = 0;
size_t iosize; size_t iosize;
size_t blocksize = inode->i_sb->s_blocksize; size_t blocksize = inode->i_sb->s_blocksize;
@ -3608,9 +3615,10 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
} }
em = __get_extent_map(inode, page, pg_offset, cur, em = __get_extent_map(inode, page, pg_offset, cur,
end - cur + 1, em_cached); end - cur + 1, em_cached);
if (IS_ERR_OR_NULL(em)) { if (IS_ERR(em)) {
unlock_extent(tree, cur, end); unlock_extent(tree, cur, end);
end_page_read(page, false, cur, end + 1 - cur); end_page_read(page, false, cur, end + 1 - cur);
ret = PTR_ERR(em);
break; break;
} }
extent_offset = cur - em->start; extent_offset = cur - em->start;
@ -3721,9 +3729,7 @@ int btrfs_do_readpage(struct page *page, struct extent_map **em_cached,
end_bio_extent_readpage, 0, end_bio_extent_readpage, 0,
this_bio_flag, this_bio_flag,
force_bio_submit); force_bio_submit);
if (!ret) { if (ret) {
nr++;
} else {
unlock_extent(tree, cur, cur + iosize - 1); unlock_extent(tree, cur, cur + iosize - 1);
end_page_read(page, false, cur, iosize); end_page_read(page, false, cur, iosize);
goto out; goto out;
@ -3951,7 +3957,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode,
} }
em = btrfs_get_extent(inode, NULL, 0, cur, end - cur + 1); em = btrfs_get_extent(inode, NULL, 0, cur, end - cur + 1);
if (IS_ERR_OR_NULL(em)) { if (IS_ERR(em)) {
btrfs_page_set_error(fs_info, page, cur, end - cur + 1); btrfs_page_set_error(fs_info, page, cur, end - cur + 1);
ret = PTR_ERR_OR_ZERO(em); ret = PTR_ERR_OR_ZERO(em);
break; break;
@ -4780,11 +4786,12 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc,
return ret; return ret;
} }
if (cache) { if (cache) {
/* Impiles write in zoned mode */ /*
btrfs_put_block_group(cache); * Implies write in zoned mode. Mark the last eb in a block group.
/* Mark the last eb in a block group */ */
if (cache->seq_zone && eb->start + eb->len == cache->zone_capacity) if (cache->seq_zone && eb->start + eb->len == cache->zone_capacity)
set_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags); set_bit(EXTENT_BUFFER_ZONE_FINISH, &eb->bflags);
btrfs_put_block_group(cache);
} }
ret = write_one_eb(eb, wbc, epd); ret = write_one_eb(eb, wbc, epd);
free_extent_buffer(eb); free_extent_buffer(eb);
@ -5390,7 +5397,7 @@ static struct extent_map *get_extent_skip_holes(struct btrfs_inode *inode,
break; break;
len = ALIGN(len, sectorsize); len = ALIGN(len, sectorsize);
em = btrfs_get_extent_fiemap(inode, offset, len); em = btrfs_get_extent_fiemap(inode, offset, len);
if (IS_ERR_OR_NULL(em)) if (IS_ERR(em))
return em; return em;
/* if this isn't a hole return it */ /* if this isn't a hole return it */

View File

@ -492,6 +492,8 @@ struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
*/ */
void remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) void remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
{ {
lockdep_assert_held_write(&tree->lock);
WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags)); WARN_ON(test_bit(EXTENT_FLAG_PINNED, &em->flags));
rb_erase_cached(&em->rb_node, &tree->map); rb_erase_cached(&em->rb_node, &tree->map);
if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags)) if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags))
@ -506,6 +508,8 @@ void replace_extent_mapping(struct extent_map_tree *tree,
struct extent_map *new, struct extent_map *new,
int modified) int modified)
{ {
lockdep_assert_held_write(&tree->lock);
WARN_ON(test_bit(EXTENT_FLAG_PINNED, &cur->flags)); WARN_ON(test_bit(EXTENT_FLAG_PINNED, &cur->flags));
ASSERT(extent_map_in_tree(cur)); ASSERT(extent_map_in_tree(cur));
if (!test_bit(EXTENT_FLAG_LOGGING, &cur->flags)) if (!test_bit(EXTENT_FLAG_LOGGING, &cur->flags))

View File

@ -305,7 +305,7 @@ static int search_csum_tree(struct btrfs_fs_info *fs_info,
read_extent_buffer(path->nodes[0], dst, (unsigned long)item, read_extent_buffer(path->nodes[0], dst, (unsigned long)item,
ret * csum_size); ret * csum_size);
out: out:
if (ret == -ENOENT) if (ret == -ENOENT || ret == -EFBIG)
ret = 0; ret = 0;
return ret; return ret;
} }
@ -368,6 +368,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
{ {
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct btrfs_bio *bbio = NULL;
struct btrfs_path *path; struct btrfs_path *path;
const u32 sectorsize = fs_info->sectorsize; const u32 sectorsize = fs_info->sectorsize;
const u32 csum_size = fs_info->csum_size; const u32 csum_size = fs_info->csum_size;
@ -377,6 +378,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
u8 *csum; u8 *csum;
const unsigned int nblocks = orig_len >> fs_info->sectorsize_bits; const unsigned int nblocks = orig_len >> fs_info->sectorsize_bits;
int count = 0; int count = 0;
blk_status_t ret = BLK_STS_OK;
if ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) || if ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) ||
test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state)) test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state))
@ -400,7 +402,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
return BLK_STS_RESOURCE; return BLK_STS_RESOURCE;
if (!dst) { if (!dst) {
struct btrfs_bio *bbio = btrfs_bio(bio); bbio = btrfs_bio(bio);
if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) { if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
bbio->csum = kmalloc_array(nblocks, csum_size, GFP_NOFS); bbio->csum = kmalloc_array(nblocks, csum_size, GFP_NOFS);
@ -456,21 +458,27 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
count = search_csum_tree(fs_info, path, cur_disk_bytenr, count = search_csum_tree(fs_info, path, cur_disk_bytenr,
search_len, csum_dst); search_len, csum_dst);
if (count <= 0) { if (count < 0) {
/* ret = errno_to_blk_status(count);
* Either we hit a critical error or we didn't find if (bbio)
* the csum. btrfs_bio_free_csum(bbio);
* Either way, we put zero into the csums dst, and skip break;
* to the next sector. }
*/
/*
* We didn't find a csum for this range. We need to make sure
* we complain loudly about this, because we are not NODATASUM.
*
* However for the DATA_RELOC inode we could potentially be
* relocating data extents for a NODATASUM inode, so the inode
* itself won't be marked with NODATASUM, but the extent we're
* copying is in fact NODATASUM. If we don't find a csum we
* assume this is the case.
*/
if (count == 0) {
memset(csum_dst, 0, csum_size); memset(csum_dst, 0, csum_size);
count = 1; count = 1;
/*
* For data reloc inode, we need to mark the range
* NODATASUM so that balance won't report false csum
* error.
*/
if (BTRFS_I(inode)->root->root_key.objectid == if (BTRFS_I(inode)->root->root_key.objectid ==
BTRFS_DATA_RELOC_TREE_OBJECTID) { BTRFS_DATA_RELOC_TREE_OBJECTID) {
u64 file_offset; u64 file_offset;
@ -491,7 +499,7 @@ blk_status_t btrfs_lookup_bio_sums(struct inode *inode, struct bio *bio, u8 *dst
} }
btrfs_free_path(path); btrfs_free_path(path);
return BLK_STS_OK; return ret;
} }
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
@ -612,32 +620,33 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
return ret; return ret;
} }
/* /**
* btrfs_csum_one_bio - Calculates checksums of the data contained inside a bio * Calculate checksums of the data contained inside a bio
*
* @inode: Owner of the data inside the bio * @inode: Owner of the data inside the bio
* @bio: Contains the data to be checksummed * @bio: Contains the data to be checksummed
* @file_start: offset in file this bio begins to describe * @offset: If (u64)-1, @bio may contain discontiguous bio vecs, so the
* @contig: Boolean. If true/1 means all bio vecs in this bio are * file offsets are determined from the page offsets in the bio.
* contiguous and they begin at @file_start in the file. False/0 * Otherwise, this is the starting file offset of the bio vecs in
* means this bio can contain potentially discontiguous bio vecs * @bio, which must be contiguous.
* so the logical offset of each should be calculated separately. * @one_ordered: If true, @bio only refers to one ordered extent.
*/ */
blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio, blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
u64 file_start, int contig) u64 offset, bool one_ordered)
{ {
struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_fs_info *fs_info = inode->root->fs_info;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
struct btrfs_ordered_sum *sums; struct btrfs_ordered_sum *sums;
struct btrfs_ordered_extent *ordered = NULL; struct btrfs_ordered_extent *ordered = NULL;
const bool use_page_offsets = (offset == (u64)-1);
char *data; char *data;
struct bvec_iter iter; struct bvec_iter iter;
struct bio_vec bvec; struct bio_vec bvec;
int index; int index;
int nr_sectors; unsigned int blockcount;
unsigned long total_bytes = 0; unsigned long total_bytes = 0;
unsigned long this_sum_bytes = 0; unsigned long this_sum_bytes = 0;
int i; int i;
u64 offset;
unsigned nofs_flag; unsigned nofs_flag;
nofs_flag = memalloc_nofs_save(); nofs_flag = memalloc_nofs_save();
@ -651,18 +660,13 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
sums->len = bio->bi_iter.bi_size; sums->len = bio->bi_iter.bi_size;
INIT_LIST_HEAD(&sums->list); INIT_LIST_HEAD(&sums->list);
if (contig)
offset = file_start;
else
offset = 0; /* shut up gcc */
sums->bytenr = bio->bi_iter.bi_sector << 9; sums->bytenr = bio->bi_iter.bi_sector << 9;
index = 0; index = 0;
shash->tfm = fs_info->csum_shash; shash->tfm = fs_info->csum_shash;
bio_for_each_segment(bvec, bio, iter) { bio_for_each_segment(bvec, bio, iter) {
if (!contig) if (use_page_offsets)
offset = page_offset(bvec.bv_page) + bvec.bv_offset; offset = page_offset(bvec.bv_page) + bvec.bv_offset;
if (!ordered) { if (!ordered) {
@ -681,13 +685,14 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_inode *inode, struct bio *bio,
} }
} }
nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, blockcount = BTRFS_BYTES_TO_BLKS(fs_info,
bvec.bv_len + fs_info->sectorsize bvec.bv_len + fs_info->sectorsize
- 1); - 1);
for (i = 0; i < nr_sectors; i++) { for (i = 0; i < blockcount; i++) {
if (offset >= ordered->file_offset + ordered->num_bytes || if (!one_ordered &&
offset < ordered->file_offset) { !in_range(offset, ordered->file_offset,
ordered->num_bytes)) {
unsigned long bytes_left; unsigned long bytes_left;
sums->len = this_sum_bytes; sums->len = this_sum_bytes;
@ -1211,6 +1216,7 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
extent_start = key.offset; extent_start = key.offset;
extent_end = btrfs_file_extent_end(path); extent_end = btrfs_file_extent_end(path);
em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);
em->generation = btrfs_file_extent_generation(leaf, fi);
if (type == BTRFS_FILE_EXTENT_REG || if (type == BTRFS_FILE_EXTENT_REG ||
type == BTRFS_FILE_EXTENT_PREALLOC) { type == BTRFS_FILE_EXTENT_PREALLOC) {
em->start = extent_start; em->start = extent_start;

View File

@ -691,7 +691,6 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
int modify_tree = -1; int modify_tree = -1;
int update_refs; int update_refs;
int found = 0; int found = 0;
int leafs_visited = 0;
struct btrfs_path *path = args->path; struct btrfs_path *path = args->path;
args->bytes_found = 0; args->bytes_found = 0;
@ -729,7 +728,6 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
path->slots[0]--; path->slots[0]--;
} }
ret = 0; ret = 0;
leafs_visited++;
next_slot: next_slot:
leaf = path->nodes[0]; leaf = path->nodes[0];
if (path->slots[0] >= btrfs_header_nritems(leaf)) { if (path->slots[0] >= btrfs_header_nritems(leaf)) {
@ -741,7 +739,6 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
ret = 0; ret = 0;
break; break;
} }
leafs_visited++;
leaf = path->nodes[0]; leaf = path->nodes[0];
recow = 1; recow = 1;
} }
@ -987,7 +984,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
* which case it unlocked our path, so check path->locks[0] matches a * which case it unlocked our path, so check path->locks[0] matches a
* write lock. * write lock.
*/ */
if (!ret && args->replace_extent && leafs_visited == 1 && if (!ret && args->replace_extent &&
path->locks[0] == BTRFS_WRITE_LOCK && path->locks[0] == BTRFS_WRITE_LOCK &&
btrfs_leaf_free_space(leaf) >= btrfs_leaf_free_space(leaf) >=
sizeof(struct btrfs_item) + args->extent_item_size) { sizeof(struct btrfs_item) + args->extent_item_size) {
@ -1722,7 +1719,8 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
fs_info->sectorsize); fs_info->sectorsize);
WARN_ON(reserve_bytes == 0); WARN_ON(reserve_bytes == 0);
ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
reserve_bytes); reserve_bytes,
reserve_bytes);
if (ret) { if (ret) {
if (!only_release_metadata) if (!only_release_metadata)
btrfs_free_reserved_data_space(BTRFS_I(inode), btrfs_free_reserved_data_space(BTRFS_I(inode),
@ -2039,12 +2037,43 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
return err < 0 ? err : written; return err < 0 ? err : written;
} }
static ssize_t btrfs_file_write_iter(struct kiocb *iocb, static ssize_t btrfs_encoded_write(struct kiocb *iocb, struct iov_iter *from,
struct iov_iter *from) const struct btrfs_ioctl_encoded_io_args *encoded)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
loff_t count;
ssize_t ret;
btrfs_inode_lock(inode, 0);
count = encoded->len;
ret = generic_write_checks_count(iocb, &count);
if (ret == 0 && count != encoded->len) {
/*
* The write got truncated by generic_write_checks_count(). We
* can't do a partial encoded write.
*/
ret = -EFBIG;
}
if (ret || encoded->len == 0)
goto out;
ret = btrfs_write_check(iocb, from, encoded->len);
if (ret < 0)
goto out;
ret = btrfs_do_encoded_write(iocb, from, encoded);
out:
btrfs_inode_unlock(inode, 0);
return ret;
}
ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
const struct btrfs_ioctl_encoded_io_args *encoded)
{ {
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
struct btrfs_inode *inode = BTRFS_I(file_inode(file)); struct btrfs_inode *inode = BTRFS_I(file_inode(file));
ssize_t num_written = 0; ssize_t num_written, num_sync;
const bool sync = iocb->ki_flags & IOCB_DSYNC; const bool sync = iocb->ki_flags & IOCB_DSYNC;
/* /*
@ -2055,22 +2084,28 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
if (BTRFS_FS_ERROR(inode->root->fs_info)) if (BTRFS_FS_ERROR(inode->root->fs_info))
return -EROFS; return -EROFS;
if (!(iocb->ki_flags & IOCB_DIRECT) && if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
(iocb->ki_flags & IOCB_NOWAIT))
return -EOPNOTSUPP; return -EOPNOTSUPP;
if (sync) if (sync)
atomic_inc(&inode->sync_writers); atomic_inc(&inode->sync_writers);
if (iocb->ki_flags & IOCB_DIRECT) if (encoded) {
num_written = btrfs_direct_write(iocb, from); num_written = btrfs_encoded_write(iocb, from, encoded);
else num_sync = encoded->len;
num_written = btrfs_buffered_write(iocb, from); } else if (iocb->ki_flags & IOCB_DIRECT) {
num_written = num_sync = btrfs_direct_write(iocb, from);
} else {
num_written = num_sync = btrfs_buffered_write(iocb, from);
}
btrfs_set_inode_last_sub_trans(inode); btrfs_set_inode_last_sub_trans(inode);
if (num_written > 0) if (num_sync > 0) {
num_written = generic_write_sync(iocb, num_written); num_sync = generic_write_sync(iocb, num_sync);
if (num_sync < 0)
num_written = num_sync;
}
if (sync) if (sync)
atomic_dec(&inode->sync_writers); atomic_dec(&inode->sync_writers);
@ -2079,6 +2114,11 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
return num_written; return num_written;
} }
static ssize_t btrfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
return btrfs_do_write_iter(iocb, from, NULL);
}
int btrfs_release_file(struct inode *inode, struct file *filp) int btrfs_release_file(struct inode *inode, struct file *filp)
{ {
struct btrfs_file_private *private = filp->private_data; struct btrfs_file_private *private = filp->private_data;
@ -2474,7 +2514,7 @@ static int fill_holes(struct btrfs_trans_handle *trans,
hole_em = alloc_extent_map(); hole_em = alloc_extent_map();
if (!hole_em) { if (!hole_em) {
btrfs_drop_extent_cache(inode, offset, end - 1, 0); btrfs_drop_extent_cache(inode, offset, end - 1, 0);
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags); btrfs_set_inode_full_sync(inode);
} else { } else {
hole_em->start = offset; hole_em->start = offset;
hole_em->len = end - offset; hole_em->len = end - offset;
@ -2495,8 +2535,7 @@ static int fill_holes(struct btrfs_trans_handle *trans,
} while (ret == -EEXIST); } while (ret == -EEXIST);
free_extent_map(hole_em); free_extent_map(hole_em);
if (ret) if (ret)
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, btrfs_set_inode_full_sync(inode);
&inode->runtime_flags);
} }
return 0; return 0;
@ -2850,7 +2889,7 @@ int btrfs_replace_file_extents(struct btrfs_inode *inode,
* maps for the replacement extents (or holes). * maps for the replacement extents (or holes).
*/ */
if (extent_info && !extent_info->is_new_extent) if (extent_info && !extent_info->is_new_extent)
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags); btrfs_set_inode_full_sync(inode);
if (ret) if (ret)
goto out_trans; goto out_trans;

View File

@ -25,6 +25,8 @@ static struct btrfs_root *btrfs_free_space_root(
.offset = 0, .offset = 0,
}; };
if (btrfs_fs_incompat(block_group->fs_info, EXTENT_TREE_V2))
key.offset = block_group->global_root_id;
return btrfs_global_root(block_group->fs_info, &key); return btrfs_global_root(block_group->fs_info, &key);
} }

File diff suppressed because it is too large Load Diff

View File

@ -28,6 +28,7 @@
#include <linux/iversion.h> #include <linux/iversion.h>
#include <linux/fileattr.h> #include <linux/fileattr.h>
#include <linux/fsverity.h> #include <linux/fsverity.h>
#include <linux/sched/xacct.h>
#include "ctree.h" #include "ctree.h"
#include "disk-io.h" #include "disk-io.h"
#include "export.h" #include "export.h"
@ -88,6 +89,24 @@ struct btrfs_ioctl_send_args_32 {
#define BTRFS_IOC_SEND_32 _IOW(BTRFS_IOCTL_MAGIC, 38, \ #define BTRFS_IOC_SEND_32 _IOW(BTRFS_IOCTL_MAGIC, 38, \
struct btrfs_ioctl_send_args_32) struct btrfs_ioctl_send_args_32)
struct btrfs_ioctl_encoded_io_args_32 {
compat_uptr_t iov;
compat_ulong_t iovcnt;
__s64 offset;
__u64 flags;
__u64 len;
__u64 unencoded_len;
__u64 unencoded_offset;
__u32 compression;
__u32 encryption;
__u8 reserved[64];
};
#define BTRFS_IOC_ENCODED_READ_32 _IOR(BTRFS_IOCTL_MAGIC, 64, \
struct btrfs_ioctl_encoded_io_args_32)
#define BTRFS_IOC_ENCODED_WRITE_32 _IOW(BTRFS_IOCTL_MAGIC, 64, \
struct btrfs_ioctl_encoded_io_args_32)
#endif #endif
/* Mask out flags that are inappropriate for the given type of inode. */ /* Mask out flags that are inappropriate for the given type of inode. */
@ -440,10 +459,8 @@ void btrfs_exclop_balance(struct btrfs_fs_info *fs_info,
} }
} }
static int btrfs_ioctl_getversion(struct file *file, int __user *arg) static int btrfs_ioctl_getversion(struct inode *inode, int __user *arg)
{ {
struct inode *inode = file_inode(file);
return put_user(inode->i_generation, arg); return put_user(inode->i_generation, arg);
} }
@ -753,6 +770,13 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
struct btrfs_trans_handle *trans; struct btrfs_trans_handle *trans;
int ret; int ret;
/* We do not support snapshotting right now. */
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
btrfs_warn(fs_info,
"extent tree v2 doesn't support snapshotting yet");
return -EOPNOTSUPP;
}
if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) if (!test_bit(BTRFS_ROOT_SHAREABLE, &root->state))
return -EINVAL; return -EINVAL;
@ -1522,6 +1546,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
} }
#define CLUSTER_SIZE (SZ_256K) #define CLUSTER_SIZE (SZ_256K)
static_assert(IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE));
/* /*
* Defrag one contiguous target range. * Defrag one contiguous target range.
@ -1667,7 +1692,6 @@ static int defrag_one_cluster(struct btrfs_inode *inode,
LIST_HEAD(target_list); LIST_HEAD(target_list);
int ret; int ret;
BUILD_BUG_ON(!IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE));
ret = defrag_collect_targets(inode, start, len, extent_thresh, ret = defrag_collect_targets(inode, start, len, extent_thresh,
newer_than, do_compress, false, newer_than, do_compress, false,
&target_list, NULL); &target_list, NULL);
@ -1810,9 +1834,6 @@ int btrfs_defrag_file(struct inode *inode, struct file_ra_state *ra,
u64 last_scanned = cur; u64 last_scanned = cur;
u64 cluster_end; u64 cluster_end;
/* The cluster size 256K should always be page aligned */
BUILD_BUG_ON(!IS_ALIGNED(CLUSTER_SIZE, PAGE_SIZE));
if (btrfs_defrag_cancelled(fs_info)) { if (btrfs_defrag_cancelled(fs_info)) {
ret = -EAGAIN; ret = -EAGAIN;
break; break;
@ -2229,10 +2250,9 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
return ret; return ret;
} }
static noinline int btrfs_ioctl_subvol_getflags(struct file *file, static noinline int btrfs_ioctl_subvol_getflags(struct inode *inode,
void __user *arg) void __user *arg)
{ {
struct inode *inode = file_inode(file);
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_root *root = BTRFS_I(inode)->root;
int ret = 0; int ret = 0;
@ -2562,12 +2582,11 @@ static noinline int search_ioctl(struct inode *inode,
return ret; return ret;
} }
static noinline int btrfs_ioctl_tree_search(struct file *file, static noinline int btrfs_ioctl_tree_search(struct inode *inode,
void __user *argp) void __user *argp)
{ {
struct btrfs_ioctl_search_args __user *uargs; struct btrfs_ioctl_search_args __user *uargs;
struct btrfs_ioctl_search_key sk; struct btrfs_ioctl_search_key sk;
struct inode *inode;
int ret; int ret;
size_t buf_size; size_t buf_size;
@ -2581,7 +2600,6 @@ static noinline int btrfs_ioctl_tree_search(struct file *file,
buf_size = sizeof(uargs->buf); buf_size = sizeof(uargs->buf);
inode = file_inode(file);
ret = search_ioctl(inode, &sk, &buf_size, uargs->buf); ret = search_ioctl(inode, &sk, &buf_size, uargs->buf);
/* /*
@ -2596,12 +2614,11 @@ static noinline int btrfs_ioctl_tree_search(struct file *file,
return ret; return ret;
} }
static noinline int btrfs_ioctl_tree_search_v2(struct file *file, static noinline int btrfs_ioctl_tree_search_v2(struct inode *inode,
void __user *argp) void __user *argp)
{ {
struct btrfs_ioctl_search_args_v2 __user *uarg; struct btrfs_ioctl_search_args_v2 __user *uarg;
struct btrfs_ioctl_search_args_v2 args; struct btrfs_ioctl_search_args_v2 args;
struct inode *inode;
int ret; int ret;
size_t buf_size; size_t buf_size;
const size_t buf_limit = SZ_16M; const size_t buf_limit = SZ_16M;
@ -2620,7 +2637,6 @@ static noinline int btrfs_ioctl_tree_search_v2(struct file *file,
if (buf_size > buf_limit) if (buf_size > buf_limit)
buf_size = buf_limit; buf_size = buf_limit;
inode = file_inode(file);
ret = search_ioctl(inode, &args.key, &buf_size, ret = search_ioctl(inode, &args.key, &buf_size,
(char __user *)(&uarg->buf[0])); (char __user *)(&uarg->buf[0]));
if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key))) if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key)))
@ -2871,25 +2887,22 @@ static int btrfs_search_path_in_tree_user(struct user_namespace *mnt_userns,
return ret; return ret;
} }
static noinline int btrfs_ioctl_ino_lookup(struct file *file, static noinline int btrfs_ioctl_ino_lookup(struct btrfs_root *root,
void __user *argp) void __user *argp)
{ {
struct btrfs_ioctl_ino_lookup_args *args; struct btrfs_ioctl_ino_lookup_args *args;
struct inode *inode;
int ret = 0; int ret = 0;
args = memdup_user(argp, sizeof(*args)); args = memdup_user(argp, sizeof(*args));
if (IS_ERR(args)) if (IS_ERR(args))
return PTR_ERR(args); return PTR_ERR(args);
inode = file_inode(file);
/* /*
* Unprivileged query to obtain the containing subvolume root id. The * Unprivileged query to obtain the containing subvolume root id. The
* path is reset so it's consistent with btrfs_search_path_in_tree. * path is reset so it's consistent with btrfs_search_path_in_tree.
*/ */
if (args->treeid == 0) if (args->treeid == 0)
args->treeid = BTRFS_I(inode)->root->root_key.objectid; args->treeid = root->root_key.objectid;
if (args->objectid == BTRFS_FIRST_FREE_OBJECTID) { if (args->objectid == BTRFS_FIRST_FREE_OBJECTID) {
args->name[0] = 0; args->name[0] = 0;
@ -2901,7 +2914,7 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
goto out; goto out;
} }
ret = btrfs_search_path_in_tree(BTRFS_I(inode)->root->fs_info, ret = btrfs_search_path_in_tree(root->fs_info,
args->treeid, args->objectid, args->treeid, args->objectid,
args->name); args->name);
@ -2957,7 +2970,7 @@ static int btrfs_ioctl_ino_lookup_user(struct file *file, void __user *argp)
} }
/* Get the subvolume information in BTRFS_ROOT_ITEM and BTRFS_ROOT_BACKREF */ /* Get the subvolume information in BTRFS_ROOT_ITEM and BTRFS_ROOT_BACKREF */
static int btrfs_ioctl_get_subvol_info(struct file *file, void __user *argp) static int btrfs_ioctl_get_subvol_info(struct inode *inode, void __user *argp)
{ {
struct btrfs_ioctl_get_subvol_info_args *subvol_info; struct btrfs_ioctl_get_subvol_info_args *subvol_info;
struct btrfs_fs_info *fs_info; struct btrfs_fs_info *fs_info;
@ -2969,7 +2982,6 @@ static int btrfs_ioctl_get_subvol_info(struct file *file, void __user *argp)
struct extent_buffer *leaf; struct extent_buffer *leaf;
unsigned long item_off; unsigned long item_off;
unsigned long item_len; unsigned long item_len;
struct inode *inode;
int slot; int slot;
int ret = 0; int ret = 0;
@ -2983,7 +2995,6 @@ static int btrfs_ioctl_get_subvol_info(struct file *file, void __user *argp)
return -ENOMEM; return -ENOMEM;
} }
inode = file_inode(file);
fs_info = BTRFS_I(inode)->root->fs_info; fs_info = BTRFS_I(inode)->root->fs_info;
/* Get root_item of inode's subvolume */ /* Get root_item of inode's subvolume */
@ -3077,15 +3088,14 @@ static int btrfs_ioctl_get_subvol_info(struct file *file, void __user *argp)
* Return ROOT_REF information of the subvolume containing this inode * Return ROOT_REF information of the subvolume containing this inode
* except the subvolume name. * except the subvolume name.
*/ */
static int btrfs_ioctl_get_subvol_rootref(struct file *file, void __user *argp) static int btrfs_ioctl_get_subvol_rootref(struct btrfs_root *root,
void __user *argp)
{ {
struct btrfs_ioctl_get_subvol_rootref_args *rootrefs; struct btrfs_ioctl_get_subvol_rootref_args *rootrefs;
struct btrfs_root_ref *rref; struct btrfs_root_ref *rref;
struct btrfs_root *root;
struct btrfs_path *path; struct btrfs_path *path;
struct btrfs_key key; struct btrfs_key key;
struct extent_buffer *leaf; struct extent_buffer *leaf;
struct inode *inode;
u64 objectid; u64 objectid;
int slot; int slot;
int ret; int ret;
@ -3101,15 +3111,13 @@ static int btrfs_ioctl_get_subvol_rootref(struct file *file, void __user *argp)
return PTR_ERR(rootrefs); return PTR_ERR(rootrefs);
} }
inode = file_inode(file); objectid = root->root_key.objectid;
root = BTRFS_I(inode)->root->fs_info->tree_root;
objectid = BTRFS_I(inode)->root->root_key.objectid;
key.objectid = objectid; key.objectid = objectid;
key.type = BTRFS_ROOT_REF_KEY; key.type = BTRFS_ROOT_REF_KEY;
key.offset = rootrefs->min_treeid; key.offset = rootrefs->min_treeid;
found = 0; found = 0;
root = root->fs_info->tree_root;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0) { if (ret < 0) {
goto out; goto out;
@ -3189,6 +3197,13 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
int err = 0; int err = 0;
bool destroy_parent = false; bool destroy_parent = false;
/* We don't support snapshots with extent tree v2 yet. */
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
btrfs_err(fs_info,
"extent tree v2 doesn't support snapshot deletion yet");
return -EOPNOTSUPP;
}
if (destroy_v2) { if (destroy_v2) {
vol_args2 = memdup_user(arg, sizeof(*vol_args2)); vol_args2 = memdup_user(arg, sizeof(*vol_args2));
if (IS_ERR(vol_args2)) if (IS_ERR(vol_args2))
@ -3464,6 +3479,11 @@ static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg)
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
return -EPERM; return -EPERM;
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
btrfs_err(fs_info, "device add not supported on extent tree v2 yet");
return -EINVAL;
}
if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_DEV_ADD)) { if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_DEV_ADD)) {
if (!btrfs_exclop_start_try_lock(fs_info, BTRFS_EXCLOP_DEV_ADD)) if (!btrfs_exclop_start_try_lock(fs_info, BTRFS_EXCLOP_DEV_ADD))
return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
@ -3989,6 +4009,11 @@ static long btrfs_ioctl_scrub(struct file *file, void __user *arg)
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
return -EPERM; return -EPERM;
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
btrfs_err(fs_info, "scrub is not supported on extent tree v2 yet");
return -EINVAL;
}
sa = memdup_user(arg, sizeof(*sa)); sa = memdup_user(arg, sizeof(*sa));
if (IS_ERR(sa)) if (IS_ERR(sa))
return PTR_ERR(sa); return PTR_ERR(sa);
@ -4088,6 +4113,11 @@ static long btrfs_ioctl_dev_replace(struct btrfs_fs_info *fs_info,
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
return -EPERM; return -EPERM;
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
btrfs_err(fs_info, "device replace not supported on extent tree v2 yet");
return -EINVAL;
}
p = memdup_user(arg, sizeof(*p)); p = memdup_user(arg, sizeof(*p));
if (IS_ERR(p)) if (IS_ERR(p))
return PTR_ERR(p); return PTR_ERR(p);
@ -5149,7 +5179,7 @@ static int btrfs_ioctl_set_features(struct file *file, void __user *arg)
return ret; return ret;
} }
static int _btrfs_ioctl_send(struct file *file, void __user *argp, bool compat) static int _btrfs_ioctl_send(struct inode *inode, void __user *argp, bool compat)
{ {
struct btrfs_ioctl_send_args *arg; struct btrfs_ioctl_send_args *arg;
int ret; int ret;
@ -5179,11 +5209,194 @@ static int _btrfs_ioctl_send(struct file *file, void __user *argp, bool compat)
if (IS_ERR(arg)) if (IS_ERR(arg))
return PTR_ERR(arg); return PTR_ERR(arg);
} }
ret = btrfs_ioctl_send(file, arg); ret = btrfs_ioctl_send(inode, arg);
kfree(arg); kfree(arg);
return ret; return ret;
} }
static int btrfs_ioctl_encoded_read(struct file *file, void __user *argp,
bool compat)
{
struct btrfs_ioctl_encoded_io_args args = { 0 };
size_t copy_end_kernel = offsetofend(struct btrfs_ioctl_encoded_io_args,
flags);
size_t copy_end;
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
struct iov_iter iter;
loff_t pos;
struct kiocb kiocb;
ssize_t ret;
if (!capable(CAP_SYS_ADMIN)) {
ret = -EPERM;
goto out_acct;
}
if (compat) {
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
struct btrfs_ioctl_encoded_io_args_32 args32;
copy_end = offsetofend(struct btrfs_ioctl_encoded_io_args_32,
flags);
if (copy_from_user(&args32, argp, copy_end)) {
ret = -EFAULT;
goto out_acct;
}
args.iov = compat_ptr(args32.iov);
args.iovcnt = args32.iovcnt;
args.offset = args32.offset;
args.flags = args32.flags;
#else
return -ENOTTY;
#endif
} else {
copy_end = copy_end_kernel;
if (copy_from_user(&args, argp, copy_end)) {
ret = -EFAULT;
goto out_acct;
}
}
if (args.flags != 0) {
ret = -EINVAL;
goto out_acct;
}
ret = import_iovec(READ, args.iov, args.iovcnt, ARRAY_SIZE(iovstack),
&iov, &iter);
if (ret < 0)
goto out_acct;
if (iov_iter_count(&iter) == 0) {
ret = 0;
goto out_iov;
}
pos = args.offset;
ret = rw_verify_area(READ, file, &pos, args.len);
if (ret < 0)
goto out_iov;
init_sync_kiocb(&kiocb, file);
kiocb.ki_pos = pos;
ret = btrfs_encoded_read(&kiocb, &iter, &args);
if (ret >= 0) {
fsnotify_access(file);
if (copy_to_user(argp + copy_end,
(char *)&args + copy_end_kernel,
sizeof(args) - copy_end_kernel))
ret = -EFAULT;
}
out_iov:
kfree(iov);
out_acct:
if (ret > 0)
add_rchar(current, ret);
inc_syscr(current);
return ret;
}
static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, bool compat)
{
struct btrfs_ioctl_encoded_io_args args;
struct iovec iovstack[UIO_FASTIOV];
struct iovec *iov = iovstack;
struct iov_iter iter;
loff_t pos;
struct kiocb kiocb;
ssize_t ret;
if (!capable(CAP_SYS_ADMIN)) {
ret = -EPERM;
goto out_acct;
}
if (!(file->f_mode & FMODE_WRITE)) {
ret = -EBADF;
goto out_acct;
}
if (compat) {
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
struct btrfs_ioctl_encoded_io_args_32 args32;
if (copy_from_user(&args32, argp, sizeof(args32))) {
ret = -EFAULT;
goto out_acct;
}
args.iov = compat_ptr(args32.iov);
args.iovcnt = args32.iovcnt;
args.offset = args32.offset;
args.flags = args32.flags;
args.len = args32.len;
args.unencoded_len = args32.unencoded_len;
args.unencoded_offset = args32.unencoded_offset;
args.compression = args32.compression;
args.encryption = args32.encryption;
memcpy(args.reserved, args32.reserved, sizeof(args.reserved));
#else
return -ENOTTY;
#endif
} else {
if (copy_from_user(&args, argp, sizeof(args))) {
ret = -EFAULT;
goto out_acct;
}
}
ret = -EINVAL;
if (args.flags != 0)
goto out_acct;
if (memchr_inv(args.reserved, 0, sizeof(args.reserved)))
goto out_acct;
if (args.compression == BTRFS_ENCODED_IO_COMPRESSION_NONE &&
args.encryption == BTRFS_ENCODED_IO_ENCRYPTION_NONE)
goto out_acct;
if (args.compression >= BTRFS_ENCODED_IO_COMPRESSION_TYPES ||
args.encryption >= BTRFS_ENCODED_IO_ENCRYPTION_TYPES)
goto out_acct;
if (args.unencoded_offset > args.unencoded_len)
goto out_acct;
if (args.len > args.unencoded_len - args.unencoded_offset)
goto out_acct;
ret = import_iovec(WRITE, args.iov, args.iovcnt, ARRAY_SIZE(iovstack),
&iov, &iter);
if (ret < 0)
goto out_acct;
file_start_write(file);
if (iov_iter_count(&iter) == 0) {
ret = 0;
goto out_end_write;
}
pos = args.offset;
ret = rw_verify_area(WRITE, file, &pos, args.len);
if (ret < 0)
goto out_end_write;
init_sync_kiocb(&kiocb, file);
ret = kiocb_set_rw_flags(&kiocb, 0);
if (ret)
goto out_end_write;
kiocb.ki_pos = pos;
ret = btrfs_do_write_iter(&kiocb, &iter, &args);
if (ret > 0)
fsnotify_modify(file);
out_end_write:
file_end_write(file);
kfree(iov);
out_acct:
if (ret > 0)
add_wchar(current, ret);
inc_syscw(current);
return ret;
}
long btrfs_ioctl(struct file *file, unsigned int long btrfs_ioctl(struct file *file, unsigned int
cmd, unsigned long arg) cmd, unsigned long arg)
{ {
@ -5194,7 +5407,7 @@ long btrfs_ioctl(struct file *file, unsigned int
switch (cmd) { switch (cmd) {
case FS_IOC_GETVERSION: case FS_IOC_GETVERSION:
return btrfs_ioctl_getversion(file, argp); return btrfs_ioctl_getversion(inode, argp);
case FS_IOC_GETFSLABEL: case FS_IOC_GETFSLABEL:
return btrfs_ioctl_get_fslabel(fs_info, argp); return btrfs_ioctl_get_fslabel(fs_info, argp);
case FS_IOC_SETFSLABEL: case FS_IOC_SETFSLABEL:
@ -5214,7 +5427,7 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_SNAP_DESTROY_V2: case BTRFS_IOC_SNAP_DESTROY_V2:
return btrfs_ioctl_snap_destroy(file, argp, true); return btrfs_ioctl_snap_destroy(file, argp, true);
case BTRFS_IOC_SUBVOL_GETFLAGS: case BTRFS_IOC_SUBVOL_GETFLAGS:
return btrfs_ioctl_subvol_getflags(file, argp); return btrfs_ioctl_subvol_getflags(inode, argp);
case BTRFS_IOC_SUBVOL_SETFLAGS: case BTRFS_IOC_SUBVOL_SETFLAGS:
return btrfs_ioctl_subvol_setflags(file, argp); return btrfs_ioctl_subvol_setflags(file, argp);
case BTRFS_IOC_DEFAULT_SUBVOL: case BTRFS_IOC_DEFAULT_SUBVOL:
@ -5238,11 +5451,11 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_BALANCE: case BTRFS_IOC_BALANCE:
return btrfs_ioctl_balance(file, NULL); return btrfs_ioctl_balance(file, NULL);
case BTRFS_IOC_TREE_SEARCH: case BTRFS_IOC_TREE_SEARCH:
return btrfs_ioctl_tree_search(file, argp); return btrfs_ioctl_tree_search(inode, argp);
case BTRFS_IOC_TREE_SEARCH_V2: case BTRFS_IOC_TREE_SEARCH_V2:
return btrfs_ioctl_tree_search_v2(file, argp); return btrfs_ioctl_tree_search_v2(inode, argp);
case BTRFS_IOC_INO_LOOKUP: case BTRFS_IOC_INO_LOOKUP:
return btrfs_ioctl_ino_lookup(file, argp); return btrfs_ioctl_ino_lookup(root, argp);
case BTRFS_IOC_INO_PATHS: case BTRFS_IOC_INO_PATHS:
return btrfs_ioctl_ino_to_path(root, argp); return btrfs_ioctl_ino_to_path(root, argp);
case BTRFS_IOC_LOGICAL_INO: case BTRFS_IOC_LOGICAL_INO:
@ -5289,10 +5502,10 @@ long btrfs_ioctl(struct file *file, unsigned int
return btrfs_ioctl_set_received_subvol_32(file, argp); return btrfs_ioctl_set_received_subvol_32(file, argp);
#endif #endif
case BTRFS_IOC_SEND: case BTRFS_IOC_SEND:
return _btrfs_ioctl_send(file, argp, false); return _btrfs_ioctl_send(inode, argp, false);
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
case BTRFS_IOC_SEND_32: case BTRFS_IOC_SEND_32:
return _btrfs_ioctl_send(file, argp, true); return _btrfs_ioctl_send(inode, argp, true);
#endif #endif
case BTRFS_IOC_GET_DEV_STATS: case BTRFS_IOC_GET_DEV_STATS:
return btrfs_ioctl_get_dev_stats(fs_info, argp); return btrfs_ioctl_get_dev_stats(fs_info, argp);
@ -5319,15 +5532,25 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_SET_FEATURES: case BTRFS_IOC_SET_FEATURES:
return btrfs_ioctl_set_features(file, argp); return btrfs_ioctl_set_features(file, argp);
case BTRFS_IOC_GET_SUBVOL_INFO: case BTRFS_IOC_GET_SUBVOL_INFO:
return btrfs_ioctl_get_subvol_info(file, argp); return btrfs_ioctl_get_subvol_info(inode, argp);
case BTRFS_IOC_GET_SUBVOL_ROOTREF: case BTRFS_IOC_GET_SUBVOL_ROOTREF:
return btrfs_ioctl_get_subvol_rootref(file, argp); return btrfs_ioctl_get_subvol_rootref(root, argp);
case BTRFS_IOC_INO_LOOKUP_USER: case BTRFS_IOC_INO_LOOKUP_USER:
return btrfs_ioctl_ino_lookup_user(file, argp); return btrfs_ioctl_ino_lookup_user(file, argp);
case FS_IOC_ENABLE_VERITY: case FS_IOC_ENABLE_VERITY:
return fsverity_ioctl_enable(file, (const void __user *)argp); return fsverity_ioctl_enable(file, (const void __user *)argp);
case FS_IOC_MEASURE_VERITY: case FS_IOC_MEASURE_VERITY:
return fsverity_ioctl_measure(file, argp); return fsverity_ioctl_measure(file, argp);
case BTRFS_IOC_ENCODED_READ:
return btrfs_ioctl_encoded_read(file, argp, false);
case BTRFS_IOC_ENCODED_WRITE:
return btrfs_ioctl_encoded_write(file, argp, false);
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
case BTRFS_IOC_ENCODED_READ_32:
return btrfs_ioctl_encoded_read(file, argp, true);
case BTRFS_IOC_ENCODED_WRITE_32:
return btrfs_ioctl_encoded_write(file, argp, true);
#endif
} }
return -ENOTTY; return -ENOTTY;

View File

@ -55,6 +55,9 @@
* 0x1000 | SegHdr N+1| Data payload N+1 ... | * 0x1000 | SegHdr N+1| Data payload N+1 ... |
*/ */
#define WORKSPACE_BUF_LENGTH (lzo1x_worst_compress(PAGE_SIZE))
#define WORKSPACE_CBUF_LENGTH (lzo1x_worst_compress(PAGE_SIZE))
struct workspace { struct workspace {
void *mem; void *mem;
void *buf; /* where decompressed data goes */ void *buf; /* where decompressed data goes */
@ -83,8 +86,8 @@ struct list_head *lzo_alloc_workspace(unsigned int level)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
workspace->mem = kvmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL); workspace->mem = kvmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
workspace->buf = kvmalloc(lzo1x_worst_compress(PAGE_SIZE), GFP_KERNEL); workspace->buf = kvmalloc(WORKSPACE_BUF_LENGTH, GFP_KERNEL);
workspace->cbuf = kvmalloc(lzo1x_worst_compress(PAGE_SIZE), GFP_KERNEL); workspace->cbuf = kvmalloc(WORKSPACE_CBUF_LENGTH, GFP_KERNEL);
if (!workspace->mem || !workspace->buf || !workspace->cbuf) if (!workspace->mem || !workspace->buf || !workspace->cbuf)
goto fail; goto fail;
@ -380,7 +383,7 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
kunmap(cur_page); kunmap(cur_page);
cur_in += LZO_LEN; cur_in += LZO_LEN;
if (seg_len > lzo1x_worst_compress(PAGE_SIZE)) { if (seg_len > WORKSPACE_CBUF_LENGTH) {
/* /*
* seg_len shouldn't be larger than we have allocated * seg_len shouldn't be larger than we have allocated
* for workspace->cbuf * for workspace->cbuf
@ -433,7 +436,7 @@ int lzo_decompress(struct list_head *ws, unsigned char *data_in,
struct workspace *workspace = list_entry(ws, struct workspace, list); struct workspace *workspace = list_entry(ws, struct workspace, list);
size_t in_len; size_t in_len;
size_t out_len; size_t out_len;
size_t max_segment_len = lzo1x_worst_compress(PAGE_SIZE); size_t max_segment_len = WORKSPACE_BUF_LENGTH;
int ret = 0; int ret = 0;
char *kaddr; char *kaddr;
unsigned long bytes; unsigned long bytes;

View File

@ -143,16 +143,28 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
return ret; return ret;
} }
/* /**
* Allocate and add a new ordered_extent into the per-inode tree. * Add an ordered extent to the per-inode tree.
* *
* The tree is given a single reference on the ordered extent that was * @inode: Inode that this extent is for.
* inserted. * @file_offset: Logical offset in file where the extent starts.
* @num_bytes: Logical length of extent in file.
* @ram_bytes: Full length of unencoded data.
* @disk_bytenr: Offset of extent on disk.
* @disk_num_bytes: Size of extent on disk.
* @offset: Offset into unencoded data where file data starts.
* @flags: Flags specifying type of extent (1 << BTRFS_ORDERED_*).
* @compress_type: Compression algorithm used for data.
*
* Most of these parameters correspond to &struct btrfs_file_extent_item. The
* tree is given a single reference on the ordered extent that was inserted.
*
* Return: 0 or -ENOMEM.
*/ */
static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset, int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
u64 disk_bytenr, u64 num_bytes, u64 num_bytes, u64 ram_bytes, u64 disk_bytenr,
u64 disk_num_bytes, int type, int dio, u64 disk_num_bytes, u64 offset, unsigned flags,
int compress_type) int compress_type)
{ {
struct btrfs_root *root = inode->root; struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_fs_info *fs_info = root->fs_info;
@ -161,7 +173,8 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset
struct btrfs_ordered_extent *entry; struct btrfs_ordered_extent *entry;
int ret; int ret;
if (type == BTRFS_ORDERED_NOCOW || type == BTRFS_ORDERED_PREALLOC) { if (flags &
((1 << BTRFS_ORDERED_NOCOW) | (1 << BTRFS_ORDERED_PREALLOC))) {
/* For nocow write, we can release the qgroup rsv right now */ /* For nocow write, we can release the qgroup rsv right now */
ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes); ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes);
if (ret < 0) if (ret < 0)
@ -181,9 +194,11 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset
return -ENOMEM; return -ENOMEM;
entry->file_offset = file_offset; entry->file_offset = file_offset;
entry->disk_bytenr = disk_bytenr;
entry->num_bytes = num_bytes; entry->num_bytes = num_bytes;
entry->ram_bytes = ram_bytes;
entry->disk_bytenr = disk_bytenr;
entry->disk_num_bytes = disk_num_bytes; entry->disk_num_bytes = disk_num_bytes;
entry->offset = offset;
entry->bytes_left = num_bytes; entry->bytes_left = num_bytes;
entry->inode = igrab(&inode->vfs_inode); entry->inode = igrab(&inode->vfs_inode);
entry->compress_type = compress_type; entry->compress_type = compress_type;
@ -191,18 +206,12 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset
entry->qgroup_rsv = ret; entry->qgroup_rsv = ret;
entry->physical = (u64)-1; entry->physical = (u64)-1;
ASSERT(type == BTRFS_ORDERED_REGULAR || ASSERT((flags & ~BTRFS_ORDERED_TYPE_FLAGS) == 0);
type == BTRFS_ORDERED_NOCOW || entry->flags = flags;
type == BTRFS_ORDERED_PREALLOC ||
type == BTRFS_ORDERED_COMPRESSED);
set_bit(type, &entry->flags);
percpu_counter_add_batch(&fs_info->ordered_bytes, num_bytes, percpu_counter_add_batch(&fs_info->ordered_bytes, num_bytes,
fs_info->delalloc_batch); fs_info->delalloc_batch);
if (dio)
set_bit(BTRFS_ORDERED_DIRECT, &entry->flags);
/* one ref for the tree */ /* one ref for the tree */
refcount_set(&entry->refs, 1); refcount_set(&entry->refs, 1);
init_waitqueue_head(&entry->wait); init_waitqueue_head(&entry->wait);
@ -247,41 +256,6 @@ static int __btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset
return 0; return 0;
} }
int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
u64 disk_bytenr, u64 num_bytes, u64 disk_num_bytes,
int type)
{
ASSERT(type == BTRFS_ORDERED_REGULAR ||
type == BTRFS_ORDERED_NOCOW ||
type == BTRFS_ORDERED_PREALLOC);
return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr,
num_bytes, disk_num_bytes, type, 0,
BTRFS_COMPRESS_NONE);
}
int btrfs_add_ordered_extent_dio(struct btrfs_inode *inode, u64 file_offset,
u64 disk_bytenr, u64 num_bytes,
u64 disk_num_bytes, int type)
{
ASSERT(type == BTRFS_ORDERED_REGULAR ||
type == BTRFS_ORDERED_NOCOW ||
type == BTRFS_ORDERED_PREALLOC);
return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr,
num_bytes, disk_num_bytes, type, 1,
BTRFS_COMPRESS_NONE);
}
int btrfs_add_ordered_extent_compress(struct btrfs_inode *inode, u64 file_offset,
u64 disk_bytenr, u64 num_bytes,
u64 disk_num_bytes, int compress_type)
{
ASSERT(compress_type != BTRFS_COMPRESS_NONE);
return __btrfs_add_ordered_extent(inode, file_offset, disk_bytenr,
num_bytes, disk_num_bytes,
BTRFS_ORDERED_COMPRESSED, 0,
compress_type);
}
/* /*
* Add a struct btrfs_ordered_sum into the list of checksums to be inserted * Add a struct btrfs_ordered_sum into the list of checksums to be inserted
* when an ordered extent is finished. If the list covers more than one * when an ordered extent is finished. If the list covers more than one
@ -548,9 +522,15 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
spin_lock(&btrfs_inode->lock); spin_lock(&btrfs_inode->lock);
btrfs_mod_outstanding_extents(btrfs_inode, -1); btrfs_mod_outstanding_extents(btrfs_inode, -1);
spin_unlock(&btrfs_inode->lock); spin_unlock(&btrfs_inode->lock);
if (root != fs_info->tree_root) if (root != fs_info->tree_root) {
btrfs_delalloc_release_metadata(btrfs_inode, entry->num_bytes, u64 release;
false);
if (test_bit(BTRFS_ORDERED_ENCODED, &entry->flags))
release = entry->disk_num_bytes;
else
release = entry->num_bytes;
btrfs_delalloc_release_metadata(btrfs_inode, release, false);
}
percpu_counter_add_batch(&fs_info->ordered_bytes, -entry->num_bytes, percpu_counter_add_batch(&fs_info->ordered_bytes, -entry->num_bytes,
fs_info->delalloc_batch); fs_info->delalloc_batch);
@ -1052,42 +1032,18 @@ static int clone_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pos,
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
u64 file_offset = ordered->file_offset + pos; u64 file_offset = ordered->file_offset + pos;
u64 disk_bytenr = ordered->disk_bytenr + pos; u64 disk_bytenr = ordered->disk_bytenr + pos;
u64 num_bytes = len; unsigned long flags = ordered->flags & BTRFS_ORDERED_TYPE_FLAGS;
u64 disk_num_bytes = len;
int type;
unsigned long flags_masked = ordered->flags & ~(1 << BTRFS_ORDERED_DIRECT);
int compress_type = ordered->compress_type;
unsigned long weight;
int ret;
weight = hweight_long(flags_masked);
WARN_ON_ONCE(weight > 1);
if (!weight)
type = 0;
else
type = __ffs(flags_masked);
/* /*
* The splitting extent is already counted and will be added again * The splitting extent is already counted and will be added again in
* in btrfs_add_ordered_extent_*(). Subtract num_bytes to avoid * btrfs_add_ordered_extent_*(). Subtract len to avoid double counting.
* double counting.
*/ */
percpu_counter_add_batch(&fs_info->ordered_bytes, -num_bytes, percpu_counter_add_batch(&fs_info->ordered_bytes, -len,
fs_info->delalloc_batch); fs_info->delalloc_batch);
if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered->flags)) { WARN_ON_ONCE(flags & (1 << BTRFS_ORDERED_COMPRESSED));
WARN_ON_ONCE(1); return btrfs_add_ordered_extent(BTRFS_I(inode), file_offset, len, len,
ret = btrfs_add_ordered_extent_compress(BTRFS_I(inode), disk_bytenr, len, 0, flags,
file_offset, disk_bytenr, num_bytes, ordered->compress_type);
disk_num_bytes, compress_type);
} else if (test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags)) {
ret = btrfs_add_ordered_extent_dio(BTRFS_I(inode), file_offset,
disk_bytenr, num_bytes, disk_num_bytes, type);
} else {
ret = btrfs_add_ordered_extent(BTRFS_I(inode), file_offset,
disk_bytenr, num_bytes, disk_num_bytes, type);
}
return ret;
} }
int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pre, int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pre,

View File

@ -74,8 +74,18 @@ enum {
BTRFS_ORDERED_LOGGED_CSUM, BTRFS_ORDERED_LOGGED_CSUM,
/* We wait for this extent to complete in the current transaction */ /* We wait for this extent to complete in the current transaction */
BTRFS_ORDERED_PENDING, BTRFS_ORDERED_PENDING,
/* BTRFS_IOC_ENCODED_WRITE */
BTRFS_ORDERED_ENCODED,
}; };
/* BTRFS_ORDERED_* flags that specify the type of the extent. */
#define BTRFS_ORDERED_TYPE_FLAGS ((1UL << BTRFS_ORDERED_REGULAR) | \
(1UL << BTRFS_ORDERED_NOCOW) | \
(1UL << BTRFS_ORDERED_PREALLOC) | \
(1UL << BTRFS_ORDERED_COMPRESSED) | \
(1UL << BTRFS_ORDERED_DIRECT) | \
(1UL << BTRFS_ORDERED_ENCODED))
struct btrfs_ordered_extent { struct btrfs_ordered_extent {
/* logical offset in the file */ /* logical offset in the file */
u64 file_offset; u64 file_offset;
@ -84,9 +94,11 @@ struct btrfs_ordered_extent {
* These fields directly correspond to the same fields in * These fields directly correspond to the same fields in
* btrfs_file_extent_item. * btrfs_file_extent_item.
*/ */
u64 disk_bytenr;
u64 num_bytes; u64 num_bytes;
u64 ram_bytes;
u64 disk_bytenr;
u64 disk_num_bytes; u64 disk_num_bytes;
u64 offset;
/* number of bytes that still need writing */ /* number of bytes that still need writing */
u64 bytes_left; u64 bytes_left;
@ -179,14 +191,9 @@ bool btrfs_dec_test_ordered_pending(struct btrfs_inode *inode,
struct btrfs_ordered_extent **cached, struct btrfs_ordered_extent **cached,
u64 file_offset, u64 io_size); u64 file_offset, u64 io_size);
int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset, int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
u64 disk_bytenr, u64 num_bytes, u64 disk_num_bytes, u64 num_bytes, u64 ram_bytes, u64 disk_bytenr,
int type); u64 disk_num_bytes, u64 offset, unsigned flags,
int btrfs_add_ordered_extent_dio(struct btrfs_inode *inode, u64 file_offset, int compress_type);
u64 disk_bytenr, u64 num_bytes,
u64 disk_num_bytes, int type);
int btrfs_add_ordered_extent_compress(struct btrfs_inode *inode, u64 file_offset,
u64 disk_bytenr, u64 num_bytes,
u64 disk_num_bytes, int compress_type);
void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry, void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
struct btrfs_ordered_sum *sum); struct btrfs_ordered_sum *sum);
struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *inode, struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *inode,

View File

@ -23,6 +23,7 @@ static const struct root_name_map root_map[] = {
{ BTRFS_QUOTA_TREE_OBJECTID, "QUOTA_TREE" }, { BTRFS_QUOTA_TREE_OBJECTID, "QUOTA_TREE" },
{ BTRFS_UUID_TREE_OBJECTID, "UUID_TREE" }, { BTRFS_UUID_TREE_OBJECTID, "UUID_TREE" },
{ BTRFS_FREE_SPACE_TREE_OBJECTID, "FREE_SPACE_TREE" }, { BTRFS_FREE_SPACE_TREE_OBJECTID, "FREE_SPACE_TREE" },
{ BTRFS_BLOCK_GROUP_TREE_OBJECTID, "BLOCK_GROUP_TREE" },
{ BTRFS_DATA_RELOC_TREE_OBJECTID, "DATA_RELOC_TREE" }, { BTRFS_DATA_RELOC_TREE_OBJECTID, "DATA_RELOC_TREE" },
}; };
@ -391,9 +392,9 @@ void btrfs_print_tree(struct extent_buffer *c, bool follow)
btrfs_header_owner(c), btrfs_header_owner(c),
btrfs_node_ptr_generation(c, i), btrfs_node_ptr_generation(c, i),
level - 1, &first_key); level - 1, &first_key);
if (IS_ERR(next)) { if (IS_ERR(next))
continue; continue;
} else if (!extent_buffer_uptodate(next)) { if (!extent_buffer_uptodate(next)) {
free_extent_buffer(next); free_extent_buffer(next);
continue; continue;
} }

View File

@ -25,18 +25,6 @@
#include "sysfs.h" #include "sysfs.h"
#include "tree-mod-log.h" #include "tree-mod-log.h"
/* TODO XXX FIXME
* - subvol delete -> delete when ref goes to 0? delete limits also?
* - reorganize keys
* - compressed
* - sync
* - copy also limits on subvol creation
* - limit
* - caches for ulists
* - performance benchmarks
* - check all ioctl parameters
*/
/* /*
* Helpers to access qgroup reservation * Helpers to access qgroup reservation
* *
@ -258,16 +246,19 @@ static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
return 0; return 0;
} }
/* must be called with qgroup_lock held */ /*
static int add_relation_rb(struct btrfs_fs_info *fs_info, * Add relation specified by two qgroups.
u64 memberid, u64 parentid) *
* Must be called with qgroup_lock held.
*
* Return: 0 on success
* -ENOENT if one of the qgroups is NULL
* <0 other errors
*/
static int __add_relation_rb(struct btrfs_qgroup *member, struct btrfs_qgroup *parent)
{ {
struct btrfs_qgroup *member;
struct btrfs_qgroup *parent;
struct btrfs_qgroup_list *list; struct btrfs_qgroup_list *list;
member = find_qgroup_rb(fs_info, memberid);
parent = find_qgroup_rb(fs_info, parentid);
if (!member || !parent) if (!member || !parent)
return -ENOENT; return -ENOENT;
@ -283,7 +274,27 @@ static int add_relation_rb(struct btrfs_fs_info *fs_info,
return 0; return 0;
} }
/* must be called with qgroup_lock held */ /*
* Add relation specified by two qgoup ids.
*
* Must be called with qgroup_lock held.
*
* Return: 0 on success
* -ENOENT if one of the ids does not exist
* <0 other errors
*/
static int add_relation_rb(struct btrfs_fs_info *fs_info, u64 memberid, u64 parentid)
{
struct btrfs_qgroup *member;
struct btrfs_qgroup *parent;
member = find_qgroup_rb(fs_info, memberid);
parent = find_qgroup_rb(fs_info, parentid);
return __add_relation_rb(member, parent);
}
/* Must be called with qgroup_lock held */
static int del_relation_rb(struct btrfs_fs_info *fs_info, static int del_relation_rb(struct btrfs_fs_info *fs_info,
u64 memberid, u64 parentid) u64 memberid, u64 parentid)
{ {
@ -948,6 +959,12 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info)
*/ */
lockdep_assert_held_write(&fs_info->subvol_sem); lockdep_assert_held_write(&fs_info->subvol_sem);
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
btrfs_err(fs_info,
"qgroups are currently unsupported in extent tree v2");
return -EINVAL;
}
mutex_lock(&fs_info->qgroup_ioctl_lock); mutex_lock(&fs_info->qgroup_ioctl_lock);
if (fs_info->quota_root) if (fs_info->quota_root)
goto out; goto out;
@ -1451,7 +1468,7 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
} }
spin_lock(&fs_info->qgroup_lock); spin_lock(&fs_info->qgroup_lock);
ret = add_relation_rb(fs_info, src, dst); ret = __add_relation_rb(member, parent);
if (ret < 0) { if (ret < 0) {
spin_unlock(&fs_info->qgroup_lock); spin_unlock(&fs_info->qgroup_lock);
goto out; goto out;
@ -3268,7 +3285,8 @@ static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans,
static bool rescan_should_stop(struct btrfs_fs_info *fs_info) static bool rescan_should_stop(struct btrfs_fs_info *fs_info)
{ {
return btrfs_fs_closing(fs_info) || return btrfs_fs_closing(fs_info) ||
test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state) ||
!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
} }
static void btrfs_qgroup_rescan_worker(struct btrfs_work *work) static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
@ -3298,11 +3316,9 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
err = PTR_ERR(trans); err = PTR_ERR(trans);
break; break;
} }
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) {
err = -EINTR; err = qgroup_rescan_leaf(trans, path);
} else {
err = qgroup_rescan_leaf(trans, path);
}
if (err > 0) if (err > 0)
btrfs_commit_transaction(trans); btrfs_commit_transaction(trans);
else else
@ -3316,7 +3332,7 @@ static void btrfs_qgroup_rescan_worker(struct btrfs_work *work)
if (err > 0 && if (err > 0 &&
fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) { fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT) {
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
} else if (err < 0) { } else if (err < 0 || stopped) {
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT; fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
} }
mutex_unlock(&fs_info->qgroup_rescan_lock); mutex_unlock(&fs_info->qgroup_rescan_lock);

View File

@ -277,7 +277,7 @@ static int clone_copy_inline_extent(struct inode *dst,
path->slots[0]), path->slots[0]),
size); size);
btrfs_update_inode_bytes(BTRFS_I(dst), datal, drop_args.bytes_found); btrfs_update_inode_bytes(BTRFS_I(dst), datal, drop_args.bytes_found);
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(dst)->runtime_flags); btrfs_set_inode_full_sync(BTRFS_I(dst));
ret = btrfs_inode_set_file_extent_range(BTRFS_I(dst), 0, aligned_end); ret = btrfs_inode_set_file_extent_range(BTRFS_I(dst), 0, aligned_end);
out: out:
if (!ret && !trans) { if (!ret && !trans) {
@ -494,7 +494,8 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
&clone_info, &trans); &clone_info, &trans);
if (ret) if (ret)
goto out; goto out;
} else if (type == BTRFS_FILE_EXTENT_INLINE) { } else {
ASSERT(type == BTRFS_FILE_EXTENT_INLINE);
/* /*
* Inline extents always have to start at file offset 0 * Inline extents always have to start at file offset 0
* and can never be bigger then the sector size. We can * and can never be bigger then the sector size. We can
@ -505,8 +506,12 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
*/ */
ASSERT(key.offset == 0); ASSERT(key.offset == 0);
ASSERT(datal <= fs_info->sectorsize); ASSERT(datal <= fs_info->sectorsize);
if (key.offset != 0 || datal > fs_info->sectorsize) if (WARN_ON(type != BTRFS_FILE_EXTENT_INLINE) ||
return -EUCLEAN; WARN_ON(key.offset != 0) ||
WARN_ON(datal > fs_info->sectorsize)) {
ret = -EUCLEAN;
goto out;
}
ret = clone_copy_inline_extent(inode, path, &new_key, ret = clone_copy_inline_extent(inode, path, &new_key,
drop_start, datal, size, drop_start, datal, size,
@ -518,17 +523,22 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
btrfs_release_path(path); btrfs_release_path(path);
/* /*
* If this is a new extent update the last_reflink_trans of both * Whenever we share an extent we update the last_reflink_trans
* inodes. This is used by fsync to make sure it does not log * of each inode to the current transaction. This is needed to
* multiple checksum items with overlapping ranges. For older * make sure fsync does not log multiple checksum items with
* extents we don't need to do it since inode logging skips the * overlapping ranges (because some extent items might refer
* checksums for older extents. Also ignore holes and inline * only to sections of the original extent). For the destination
* extents because they don't have checksums in the csum tree. * inode we do this regardless of the generation of the extents
* or even if they are inline extents or explicit holes, to make
* sure a full fsync does not skip them. For the source inode,
* we only need to update last_reflink_trans in case it's a new
* extent that is not a hole or an inline extent, to deal with
* the checksums problem on fsync.
*/ */
if (extent_gen == trans->transid && disko > 0) { if (extent_gen == trans->transid && disko > 0)
BTRFS_I(src)->last_reflink_trans = trans->transid; BTRFS_I(src)->last_reflink_trans = trans->transid;
BTRFS_I(inode)->last_reflink_trans = trans->transid;
} BTRFS_I(inode)->last_reflink_trans = trans->transid;
last_dest_end = ALIGN(new_key.offset + datal, last_dest_end = ALIGN(new_key.offset + datal,
fs_info->sectorsize); fs_info->sectorsize);
@ -575,8 +585,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode,
* replaced file extent items. * replaced file extent items.
*/ */
if (last_dest_end >= i_size_read(inode)) if (last_dest_end >= i_size_read(inode))
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, btrfs_set_inode_full_sync(BTRFS_I(inode));
&BTRFS_I(inode)->runtime_flags);
ret = btrfs_replace_file_extents(BTRFS_I(inode), path, ret = btrfs_replace_file_extents(BTRFS_I(inode), path,
last_dest_end, destoff + len - 1, NULL, &trans); last_dest_end, destoff + len - 1, NULL, &trans);
@ -772,9 +781,7 @@ static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in,
if (btrfs_root_readonly(root_out)) if (btrfs_root_readonly(root_out))
return -EROFS; return -EROFS;
if (file_in->f_path.mnt != file_out->f_path.mnt || ASSERT(inode_in->i_sb == inode_out->i_sb);
inode_in->i_sb != inode_out->i_sb)
return -EXDEV;
} }
/* Don't make the dst file partly checksummed */ /* Don't make the dst file partly checksummed */

View File

@ -2599,9 +2599,9 @@ static int get_tree_block_key(struct btrfs_fs_info *fs_info,
eb = read_tree_block(fs_info, block->bytenr, block->owner, eb = read_tree_block(fs_info, block->bytenr, block->owner,
block->key.offset, block->level, NULL); block->key.offset, block->level, NULL);
if (IS_ERR(eb)) { if (IS_ERR(eb))
return PTR_ERR(eb); return PTR_ERR(eb);
} else if (!extent_buffer_uptodate(eb)) { if (!extent_buffer_uptodate(eb)) {
free_extent_buffer(eb); free_extent_buffer(eb);
return -EIO; return -EIO;
} }
@ -2997,7 +2997,7 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
/* Reserve metadata for this range */ /* Reserve metadata for this range */
ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode), ret = btrfs_delalloc_reserve_metadata(BTRFS_I(inode),
clamped_len); clamped_len, clamped_len);
if (ret) if (ret)
goto release_page; goto release_page;
@ -4123,9 +4123,8 @@ static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)
* this function resumes merging reloc trees with corresponding fs trees. * this function resumes merging reloc trees with corresponding fs trees.
* this is important for keeping the sharing of tree blocks * this is important for keeping the sharing of tree blocks
*/ */
int btrfs_recover_relocation(struct btrfs_root *root) int btrfs_recover_relocation(struct btrfs_fs_info *fs_info)
{ {
struct btrfs_fs_info *fs_info = root->fs_info;
LIST_HEAD(reloc_roots); LIST_HEAD(reloc_roots);
struct btrfs_key key; struct btrfs_key key;
struct btrfs_root *fs_root; struct btrfs_root *fs_root;
@ -4166,7 +4165,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)
key.type != BTRFS_ROOT_ITEM_KEY) key.type != BTRFS_ROOT_ITEM_KEY)
break; break;
reloc_root = btrfs_read_tree_root(root, &key); reloc_root = btrfs_read_tree_root(fs_info->tree_root, &key);
if (IS_ERR(reloc_root)) { if (IS_ERR(reloc_root)) {
err = PTR_ERR(reloc_root); err = PTR_ERR(reloc_root);
goto out; goto out;

View File

@ -3190,7 +3190,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
u64 generation; u64 generation;
int mirror_num; int mirror_num;
struct btrfs_key key; struct btrfs_key key;
u64 increment = map->stripe_len; u64 increment;
u64 offset; u64 offset;
u64 extent_logical; u64 extent_logical;
u64 extent_physical; u64 extent_physical;

View File

@ -528,17 +528,12 @@ static int fs_path_add_from_extent_buffer(struct fs_path *p,
static int fs_path_copy(struct fs_path *p, struct fs_path *from) static int fs_path_copy(struct fs_path *p, struct fs_path *from)
{ {
int ret;
p->reversed = from->reversed; p->reversed = from->reversed;
fs_path_reset(p); fs_path_reset(p);
ret = fs_path_add_path(p, from); return fs_path_add_path(p, from);
return ret;
} }
static void fs_path_unreverse(struct fs_path *p) static void fs_path_unreverse(struct fs_path *p)
{ {
char *tmp; char *tmp;
@ -7477,10 +7472,10 @@ static void dedupe_in_progress_warn(const struct btrfs_root *root)
root->root_key.objectid, root->dedupe_in_progress); root->root_key.objectid, root->dedupe_in_progress);
} }
long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg)
{ {
int ret = 0; int ret = 0;
struct btrfs_root *send_root = BTRFS_I(file_inode(mnt_file))->root; struct btrfs_root *send_root = BTRFS_I(inode)->root;
struct btrfs_fs_info *fs_info = send_root->fs_info; struct btrfs_fs_info *fs_info = send_root->fs_info;
struct btrfs_root *clone_root; struct btrfs_root *clone_root;
struct send_ctx *sctx = NULL; struct send_ctx *sctx = NULL;

View File

@ -126,7 +126,7 @@ enum {
#define BTRFS_SEND_A_MAX (__BTRFS_SEND_A_MAX - 1) #define BTRFS_SEND_A_MAX (__BTRFS_SEND_A_MAX - 1)
#ifdef __KERNEL__ #ifdef __KERNEL__
long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg); long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg);
#endif #endif
#endif #endif

View File

@ -737,6 +737,8 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
u64 thresh = div_factor_fine(space_info->total_bytes, 90); u64 thresh = div_factor_fine(space_info->total_bytes, 90);
u64 used; u64 used;
lockdep_assert_held(&space_info->lock);
/* If we're just plain full then async reclaim just slows us down. */ /* If we're just plain full then async reclaim just slows us down. */
if ((space_info->bytes_used + space_info->bytes_reserved + if ((space_info->bytes_used + space_info->bytes_reserved +
global_rsv_size) >= thresh) global_rsv_size) >= thresh)
@ -1061,7 +1063,6 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
trans_rsv->reserved; trans_rsv->reserved;
if (block_rsv_size < space_info->bytes_may_use) if (block_rsv_size < space_info->bytes_may_use)
delalloc_size = space_info->bytes_may_use - block_rsv_size; delalloc_size = space_info->bytes_may_use - block_rsv_size;
spin_unlock(&space_info->lock);
/* /*
* We don't want to include the global_rsv in our calculation, * We don't want to include the global_rsv in our calculation,
@ -1092,6 +1093,8 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
flush = FLUSH_DELAYED_REFS_NR; flush = FLUSH_DELAYED_REFS_NR;
} }
spin_unlock(&space_info->lock);
/* /*
* We don't want to reclaim everything, just a portion, so scale * We don't want to reclaim everything, just a portion, so scale
* down the to_reclaim by 1/4. If it takes us down to 0, * down the to_reclaim by 1/4. If it takes us down to 0,

View File

@ -66,6 +66,52 @@ static struct file_system_type btrfs_root_fs_type;
static int btrfs_remount(struct super_block *sb, int *flags, char *data); static int btrfs_remount(struct super_block *sb, int *flags, char *data);
#ifdef CONFIG_PRINTK
#define STATE_STRING_PREFACE ": state "
#define STATE_STRING_BUF_LEN (sizeof(STATE_STRING_PREFACE) + BTRFS_FS_STATE_COUNT)
/*
* Characters to print to indicate error conditions or uncommon filesystem sate.
* RO is not an error.
*/
static const char fs_state_chars[] = {
[BTRFS_FS_STATE_ERROR] = 'E',
[BTRFS_FS_STATE_REMOUNTING] = 'M',
[BTRFS_FS_STATE_RO] = 0,
[BTRFS_FS_STATE_TRANS_ABORTED] = 'A',
[BTRFS_FS_STATE_DEV_REPLACING] = 'R',
[BTRFS_FS_STATE_DUMMY_FS_INFO] = 0,
[BTRFS_FS_STATE_NO_CSUMS] = 'C',
[BTRFS_FS_STATE_LOG_CLEANUP_ERROR] = 'L',
};
static void btrfs_state_to_string(const struct btrfs_fs_info *info, char *buf)
{
unsigned int bit;
bool states_printed = false;
unsigned long fs_state = READ_ONCE(info->fs_state);
char *curr = buf;
memcpy(curr, STATE_STRING_PREFACE, sizeof(STATE_STRING_PREFACE));
curr += sizeof(STATE_STRING_PREFACE) - 1;
for_each_set_bit(bit, &fs_state, sizeof(fs_state)) {
WARN_ON_ONCE(bit >= BTRFS_FS_STATE_COUNT);
if ((bit < BTRFS_FS_STATE_COUNT) && fs_state_chars[bit]) {
*curr++ = fs_state_chars[bit];
states_printed = true;
}
}
/* If no states were printed, reset the buffer */
if (!states_printed)
curr = buf;
*curr++ = 0;
}
#endif
/* /*
* Generally the error codes correspond to their respective errors, but there * Generally the error codes correspond to their respective errors, but there
* are a few special cases. * are a few special cases.
@ -128,6 +174,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
{ {
struct super_block *sb = fs_info->sb; struct super_block *sb = fs_info->sb;
#ifdef CONFIG_PRINTK #ifdef CONFIG_PRINTK
char statestr[STATE_STRING_BUF_LEN];
const char *errstr; const char *errstr;
#endif #endif
@ -140,6 +187,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
#ifdef CONFIG_PRINTK #ifdef CONFIG_PRINTK
errstr = btrfs_decode_error(errno); errstr = btrfs_decode_error(errno);
btrfs_state_to_string(fs_info, statestr);
if (fmt) { if (fmt) {
struct va_format vaf; struct va_format vaf;
va_list args; va_list args;
@ -148,12 +196,12 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
vaf.fmt = fmt; vaf.fmt = fmt;
vaf.va = &args; vaf.va = &args;
pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s (%pV)\n", pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s (%pV)\n",
sb->s_id, function, line, errno, errstr, &vaf); sb->s_id, statestr, function, line, errno, errstr, &vaf);
va_end(args); va_end(args);
} else { } else {
pr_crit("BTRFS: error (device %s) in %s:%d: errno=%d %s\n", pr_crit("BTRFS: error (device %s%s) in %s:%d: errno=%d %s\n",
sb->s_id, function, line, errno, errstr); sb->s_id, statestr, function, line, errno, errstr);
} }
#endif #endif
@ -240,11 +288,15 @@ void __cold btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, .
vaf.va = &args; vaf.va = &args;
if (__ratelimit(ratelimit)) { if (__ratelimit(ratelimit)) {
if (fs_info) if (fs_info) {
printk("%sBTRFS %s (device %s): %pV\n", lvl, type, char statestr[STATE_STRING_BUF_LEN];
fs_info->sb->s_id, &vaf);
else btrfs_state_to_string(fs_info, statestr);
printk("%sBTRFS %s (device %s%s): %pV\n", lvl, type,
fs_info->sb->s_id, statestr, &vaf);
} else {
printk("%sBTRFS %s: %pV\n", lvl, type, &vaf); printk("%sBTRFS %s: %pV\n", lvl, type, &vaf);
}
} }
va_end(args); va_end(args);
@ -861,6 +913,14 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
break; break;
case Opt_space_cache: case Opt_space_cache:
case Opt_space_cache_version: case Opt_space_cache_version:
/*
* We already set FREE_SPACE_TREE above because we have
* compat_ro(FREE_SPACE_TREE) set, and we aren't going
* to allow v1 to be set for extent tree v2, simply
* ignore this setting if we're extent tree v2.
*/
if (btrfs_fs_incompat(info, EXTENT_TREE_V2))
break;
if (token == Opt_space_cache || if (token == Opt_space_cache ||
strcmp(args[0].from, "v1") == 0) { strcmp(args[0].from, "v1") == 0) {
btrfs_clear_opt(info->mount_opt, btrfs_clear_opt(info->mount_opt,
@ -881,6 +941,12 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE); btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE);
break; break;
case Opt_no_space_cache: case Opt_no_space_cache:
/*
* We cannot operate without the free space tree with
* extent tree v2, ignore this option.
*/
if (btrfs_fs_incompat(info, EXTENT_TREE_V2))
break;
if (btrfs_test_opt(info, SPACE_CACHE)) { if (btrfs_test_opt(info, SPACE_CACHE)) {
btrfs_clear_and_info(info, SPACE_CACHE, btrfs_clear_and_info(info, SPACE_CACHE,
"disabling disk space caching"); "disabling disk space caching");
@ -896,6 +962,12 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
"the 'inode_cache' option is deprecated and has no effect since 5.11"); "the 'inode_cache' option is deprecated and has no effect since 5.11");
break; break;
case Opt_clear_cache: case Opt_clear_cache:
/*
* We cannot clear the free space tree with extent tree
* v2, ignore this option.
*/
if (btrfs_fs_incompat(info, EXTENT_TREE_V2))
break;
btrfs_set_and_info(info, CLEAR_CACHE, btrfs_set_and_info(info, CLEAR_CACHE,
"force clearing of disk cache"); "force clearing of disk cache");
break; break;
@ -2383,6 +2455,7 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
{ {
struct btrfs_ioctl_vol_args *vol; struct btrfs_ioctl_vol_args *vol;
struct btrfs_device *device = NULL; struct btrfs_device *device = NULL;
dev_t devt = 0;
int ret = -ENOTTY; int ret = -ENOTTY;
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
@ -2402,7 +2475,12 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
mutex_unlock(&uuid_mutex); mutex_unlock(&uuid_mutex);
break; break;
case BTRFS_IOC_FORGET_DEV: case BTRFS_IOC_FORGET_DEV:
ret = btrfs_forget_devices(vol->name); if (vol->name[0] != 0) {
ret = lookup_bdev(vol->name, &devt);
if (ret)
break;
}
ret = btrfs_forget_devices(devt);
break; break;
case BTRFS_IOC_DEVICES_READY: case BTRFS_IOC_DEVICES_READY:
mutex_lock(&uuid_mutex); mutex_lock(&uuid_mutex);

View File

@ -283,9 +283,11 @@ BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES);
BTRFS_FEAT_ATTR_INCOMPAT(metadata_uuid, METADATA_UUID); BTRFS_FEAT_ATTR_INCOMPAT(metadata_uuid, METADATA_UUID);
BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE); BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE);
BTRFS_FEAT_ATTR_INCOMPAT(raid1c34, RAID1C34); BTRFS_FEAT_ATTR_INCOMPAT(raid1c34, RAID1C34);
/* Remove once support for zoned allocation is feature complete */
#ifdef CONFIG_BTRFS_DEBUG #ifdef CONFIG_BTRFS_DEBUG
/* Remove once support for zoned allocation is feature complete */
BTRFS_FEAT_ATTR_INCOMPAT(zoned, ZONED); BTRFS_FEAT_ATTR_INCOMPAT(zoned, ZONED);
/* Remove once support for extent tree v2 is feature complete */
BTRFS_FEAT_ATTR_INCOMPAT(extent_tree_v2, EXTENT_TREE_V2);
#endif #endif
#ifdef CONFIG_FS_VERITY #ifdef CONFIG_FS_VERITY
BTRFS_FEAT_ATTR_COMPAT_RO(verity, VERITY); BTRFS_FEAT_ATTR_COMPAT_RO(verity, VERITY);
@ -314,6 +316,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = {
BTRFS_FEAT_ATTR_PTR(raid1c34), BTRFS_FEAT_ATTR_PTR(raid1c34),
#ifdef CONFIG_BTRFS_DEBUG #ifdef CONFIG_BTRFS_DEBUG
BTRFS_FEAT_ATTR_PTR(zoned), BTRFS_FEAT_ATTR_PTR(zoned),
BTRFS_FEAT_ATTR_PTR(extent_tree_v2),
#endif #endif
#ifdef CONFIG_FS_VERITY #ifdef CONFIG_FS_VERITY
BTRFS_FEAT_ATTR_PTR(verity), BTRFS_FEAT_ATTR_PTR(verity),
@ -1104,6 +1107,11 @@ static inline struct btrfs_fs_info *to_fs_info(struct kobject *kobj)
static char btrfs_unknown_feature_names[FEAT_MAX][NUM_FEATURE_BITS][BTRFS_FEATURE_NAME_MAX]; static char btrfs_unknown_feature_names[FEAT_MAX][NUM_FEATURE_BITS][BTRFS_FEATURE_NAME_MAX];
static struct btrfs_feature_attr btrfs_feature_attrs[FEAT_MAX][NUM_FEATURE_BITS]; static struct btrfs_feature_attr btrfs_feature_attrs[FEAT_MAX][NUM_FEATURE_BITS];
static_assert(ARRAY_SIZE(btrfs_unknown_feature_names) ==
ARRAY_SIZE(btrfs_feature_attrs));
static_assert(ARRAY_SIZE(btrfs_unknown_feature_names[0]) ==
ARRAY_SIZE(btrfs_feature_attrs[0]));
static const u64 supported_feature_masks[FEAT_MAX] = { static const u64 supported_feature_masks[FEAT_MAX] = {
[FEAT_COMPAT] = BTRFS_FEATURE_COMPAT_SUPP, [FEAT_COMPAT] = BTRFS_FEATURE_COMPAT_SUPP,
[FEAT_COMPAT_RO] = BTRFS_FEATURE_COMPAT_RO_SUPP, [FEAT_COMPAT_RO] = BTRFS_FEATURE_COMPAT_RO_SUPP,
@ -1272,11 +1280,6 @@ static void init_feature_attrs(void)
struct btrfs_feature_attr *fa; struct btrfs_feature_attr *fa;
int set, i; int set, i;
BUILD_BUG_ON(ARRAY_SIZE(btrfs_unknown_feature_names) !=
ARRAY_SIZE(btrfs_feature_attrs));
BUILD_BUG_ON(ARRAY_SIZE(btrfs_unknown_feature_names[0]) !=
ARRAY_SIZE(btrfs_feature_attrs[0]));
memset(btrfs_feature_attrs, 0, sizeof(btrfs_feature_attrs)); memset(btrfs_feature_attrs, 0, sizeof(btrfs_feature_attrs));
memset(btrfs_unknown_feature_names, 0, memset(btrfs_unknown_feature_names, 0,
sizeof(btrfs_unknown_feature_names)); sizeof(btrfs_unknown_feature_names));

View File

@ -15,6 +15,7 @@ static void free_extent_map_tree(struct extent_map_tree *em_tree)
struct extent_map *em; struct extent_map *em;
struct rb_node *node; struct rb_node *node;
write_lock(&em_tree->lock);
while (!RB_EMPTY_ROOT(&em_tree->map.rb_root)) { while (!RB_EMPTY_ROOT(&em_tree->map.rb_root)) {
node = rb_first_cached(&em_tree->map); node = rb_first_cached(&em_tree->map);
em = rb_entry(node, struct extent_map, rb_node); em = rb_entry(node, struct extent_map, rb_node);
@ -32,6 +33,7 @@ static void free_extent_map_tree(struct extent_map_tree *em_tree)
#endif #endif
free_extent_map(em); free_extent_map(em);
} }
write_unlock(&em_tree->lock);
} }
/* /*

View File

@ -1911,6 +1911,14 @@ static void update_super_roots(struct btrfs_fs_info *fs_info)
super->cache_generation = 0; super->cache_generation = 0;
if (test_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags)) if (test_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags))
super->uuid_tree_generation = root_item->generation; super->uuid_tree_generation = root_item->generation;
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
root_item = &fs_info->block_group_root->root_item;
super->block_group_root = root_item->bytenr;
super->block_group_root_generation = root_item->generation;
super->block_group_root_level = root_item->level;
}
} }
int btrfs_transaction_in_commit(struct btrfs_fs_info *info) int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
@ -2362,6 +2370,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
list_add_tail(&fs_info->chunk_root->dirty_list, list_add_tail(&fs_info->chunk_root->dirty_list,
&cur_trans->switch_commits); &cur_trans->switch_commits);
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
btrfs_set_root_node(&fs_info->block_group_root->root_item,
fs_info->block_group_root->node);
list_add_tail(&fs_info->block_group_root->dirty_list,
&cur_trans->switch_commits);
}
switch_commit_roots(trans); switch_commit_roots(trans);
ASSERT(list_empty(&cur_trans->dirty_bgs)); ASSERT(list_empty(&cur_trans->dirty_bgs));
@ -2490,10 +2505,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
* because btrfs_commit_super will poke cleaner thread and it will process it a * because btrfs_commit_super will poke cleaner thread and it will process it a
* few seconds later. * few seconds later.
*/ */
int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root) int btrfs_clean_one_deleted_snapshot(struct btrfs_fs_info *fs_info)
{ {
struct btrfs_root *root;
int ret; int ret;
struct btrfs_fs_info *fs_info = root->fs_info;
spin_lock(&fs_info->trans_lock); spin_lock(&fs_info->trans_lock);
if (list_empty(&fs_info->dead_roots)) { if (list_empty(&fs_info->dead_roots)) {

View File

@ -217,7 +217,7 @@ int btrfs_wait_for_commit(struct btrfs_fs_info *fs_info, u64 transid);
void btrfs_add_dead_root(struct btrfs_root *root); void btrfs_add_dead_root(struct btrfs_root *root);
int btrfs_defrag_root(struct btrfs_root *root); int btrfs_defrag_root(struct btrfs_root *root);
void btrfs_maybe_wake_unfinished_drop(struct btrfs_fs_info *fs_info); void btrfs_maybe_wake_unfinished_drop(struct btrfs_fs_info *fs_info);
int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root); int btrfs_clean_one_deleted_snapshot(struct btrfs_fs_info *fs_info);
int btrfs_commit_transaction(struct btrfs_trans_handle *trans); int btrfs_commit_transaction(struct btrfs_trans_handle *trans);
void btrfs_commit_transaction_async(struct btrfs_trans_handle *trans); void btrfs_commit_transaction_async(struct btrfs_trans_handle *trans);
int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans); int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans);

View File

@ -639,8 +639,10 @@ static void block_group_err(const struct extent_buffer *eb, int slot,
static int check_block_group_item(struct extent_buffer *leaf, static int check_block_group_item(struct extent_buffer *leaf,
struct btrfs_key *key, int slot) struct btrfs_key *key, int slot)
{ {
struct btrfs_fs_info *fs_info = leaf->fs_info;
struct btrfs_block_group_item bgi; struct btrfs_block_group_item bgi;
u32 item_size = btrfs_item_size(leaf, slot); u32 item_size = btrfs_item_size(leaf, slot);
u64 chunk_objectid;
u64 flags; u64 flags;
u64 type; u64 type;
@ -663,8 +665,23 @@ static int check_block_group_item(struct extent_buffer *leaf,
read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot), read_extent_buffer(leaf, &bgi, btrfs_item_ptr_offset(leaf, slot),
sizeof(bgi)); sizeof(bgi));
if (unlikely(btrfs_stack_block_group_chunk_objectid(&bgi) != chunk_objectid = btrfs_stack_block_group_chunk_objectid(&bgi);
BTRFS_FIRST_CHUNK_TREE_OBJECTID)) { if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
/*
* We don't init the nr_global_roots until we load the global
* roots, so this could be 0 at mount time. If it's 0 we'll
* just assume we're fine, and later we'll check against our
* actual value.
*/
if (unlikely(fs_info->nr_global_roots &&
chunk_objectid >= fs_info->nr_global_roots)) {
block_group_err(leaf, slot,
"invalid block group global root id, have %llu, needs to be <= %llu",
chunk_objectid,
fs_info->nr_global_roots);
return -EUCLEAN;
}
} else if (unlikely(chunk_objectid != BTRFS_FIRST_CHUNK_TREE_OBJECTID)) {
block_group_err(leaf, slot, block_group_err(leaf, slot,
"invalid block group chunk objectid, have %llu expect %llu", "invalid block group chunk objectid, have %llu expect %llu",
btrfs_stack_block_group_chunk_objectid(&bgi), btrfs_stack_block_group_chunk_objectid(&bgi),
@ -1648,7 +1665,6 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
/* These trees must never be empty */ /* These trees must never be empty */
if (unlikely(owner == BTRFS_ROOT_TREE_OBJECTID || if (unlikely(owner == BTRFS_ROOT_TREE_OBJECTID ||
owner == BTRFS_CHUNK_TREE_OBJECTID || owner == BTRFS_CHUNK_TREE_OBJECTID ||
owner == BTRFS_EXTENT_TREE_OBJECTID ||
owner == BTRFS_DEV_TREE_OBJECTID || owner == BTRFS_DEV_TREE_OBJECTID ||
owner == BTRFS_FS_TREE_OBJECTID || owner == BTRFS_FS_TREE_OBJECTID ||
owner == BTRFS_DATA_RELOC_TREE_OBJECTID)) { owner == BTRFS_DATA_RELOC_TREE_OBJECTID)) {
@ -1657,12 +1673,25 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
owner); owner);
return -EUCLEAN; return -EUCLEAN;
} }
/* Unknown tree */ /* Unknown tree */
if (unlikely(owner == 0)) { if (unlikely(owner == 0)) {
generic_err(leaf, 0, generic_err(leaf, 0,
"invalid owner, root 0 is not defined"); "invalid owner, root 0 is not defined");
return -EUCLEAN; return -EUCLEAN;
} }
/* EXTENT_TREE_V2 can have empty extent trees. */
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
return 0;
if (unlikely(owner == BTRFS_EXTENT_TREE_OBJECTID)) {
generic_err(leaf, 0,
"invalid root, root %llu must never be empty",
owner);
return -EUCLEAN;
}
return 0; return 0;
} }

File diff suppressed because it is too large Load Diff

View File

@ -17,6 +17,8 @@ struct btrfs_log_ctx {
int log_transid; int log_transid;
bool log_new_dentries; bool log_new_dentries;
bool logging_new_name; bool logging_new_name;
/* Indicate if the inode being logged was logged before. */
bool logged_before;
/* Tracks the last logged dir item/index key offset. */ /* Tracks the last logged dir item/index key offset. */
u64 last_dir_item_offset; u64 last_dir_item_offset;
struct inode *inode; struct inode *inode;
@ -32,6 +34,7 @@ static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx,
ctx->log_transid = 0; ctx->log_transid = 0;
ctx->log_new_dentries = false; ctx->log_new_dentries = false;
ctx->logging_new_name = false; ctx->logging_new_name = false;
ctx->logged_before = false;
ctx->inode = inode; ctx->inode = inode;
INIT_LIST_HEAD(&ctx->list); INIT_LIST_HEAD(&ctx->list);
INIT_LIST_HEAD(&ctx->ordered_extents); INIT_LIST_HEAD(&ctx->ordered_extents);
@ -86,7 +89,7 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans, void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
struct btrfs_inode *dir); struct btrfs_inode *dir);
void btrfs_log_new_name(struct btrfs_trans_handle *trans, void btrfs_log_new_name(struct btrfs_trans_handle *trans,
struct btrfs_inode *inode, struct btrfs_inode *old_dir, struct dentry *old_dentry, struct btrfs_inode *old_dir,
struct dentry *parent); u64 old_dir_index, struct dentry *parent);
#endif #endif

View File

@ -534,30 +534,20 @@ btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
return ret; return ret;
} }
static bool device_path_matched(const char *path, struct btrfs_device *device) /**
{ * Search and remove all stale devices (which are not mounted).
int found;
rcu_read_lock();
found = strcmp(rcu_str_deref(device->name), path);
rcu_read_unlock();
return found == 0;
}
/*
* Search and remove all stale (devices which are not mounted) devices.
* When both inputs are NULL, it will search and release all stale devices. * When both inputs are NULL, it will search and release all stale devices.
* path: Optional. When provided will it release all unmounted devices *
* matching this path only. * @devt: Optional. When provided will it release all unmounted devices
* skip_dev: Optional. Will skip this device when searching for the stale * matching this devt only.
* @skip_device: Optional. Will skip this device when searching for the stale
* devices. * devices.
* Return: 0 for success or if @path is NULL. *
* -EBUSY if @path is a mounted device. * Return: 0 for success or if @devt is 0.
* -ENOENT if @path does not match any device in the list. * -EBUSY if @devt is a mounted device.
* -ENOENT if @devt does not match any device in the list.
*/ */
static int btrfs_free_stale_devices(const char *path, static int btrfs_free_stale_devices(dev_t devt, struct btrfs_device *skip_device)
struct btrfs_device *skip_device)
{ {
struct btrfs_fs_devices *fs_devices, *tmp_fs_devices; struct btrfs_fs_devices *fs_devices, *tmp_fs_devices;
struct btrfs_device *device, *tmp_device; struct btrfs_device *device, *tmp_device;
@ -565,7 +555,7 @@ static int btrfs_free_stale_devices(const char *path,
lockdep_assert_held(&uuid_mutex); lockdep_assert_held(&uuid_mutex);
if (path) if (devt)
ret = -ENOENT; ret = -ENOENT;
list_for_each_entry_safe(fs_devices, tmp_fs_devices, &fs_uuids, fs_list) { list_for_each_entry_safe(fs_devices, tmp_fs_devices, &fs_uuids, fs_list) {
@ -575,13 +565,11 @@ static int btrfs_free_stale_devices(const char *path,
&fs_devices->devices, dev_list) { &fs_devices->devices, dev_list) {
if (skip_device && skip_device == device) if (skip_device && skip_device == device)
continue; continue;
if (path && !device->name) if (devt && devt != device->devt)
continue;
if (path && !device_path_matched(path, device))
continue; continue;
if (fs_devices->opened) { if (fs_devices->opened) {
/* for an already deleted device return 0 */ /* for an already deleted device return 0 */
if (path && ret != 0) if (devt && ret != 0)
ret = -EBUSY; ret = -EBUSY;
break; break;
} }
@ -614,7 +602,6 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
struct btrfs_device *device, fmode_t flags, struct btrfs_device *device, fmode_t flags,
void *holder) void *holder)
{ {
struct request_queue *q;
struct block_device *bdev; struct block_device *bdev;
struct btrfs_super_block *disk_super; struct btrfs_super_block *disk_super;
u64 devid; u64 devid;
@ -656,8 +643,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
} }
q = bdev_get_queue(bdev); if (!blk_queue_nonrot(bdev_get_queue(bdev)))
if (!blk_queue_nonrot(q))
fs_devices->rotating = true; fs_devices->rotating = true;
device->bdev = bdev; device->bdev = bdev;
@ -781,11 +767,17 @@ static noinline struct btrfs_device *device_list_add(const char *path,
struct rcu_string *name; struct rcu_string *name;
u64 found_transid = btrfs_super_generation(disk_super); u64 found_transid = btrfs_super_generation(disk_super);
u64 devid = btrfs_stack_device_id(&disk_super->dev_item); u64 devid = btrfs_stack_device_id(&disk_super->dev_item);
dev_t path_devt;
int error;
bool has_metadata_uuid = (btrfs_super_incompat_flags(disk_super) & bool has_metadata_uuid = (btrfs_super_incompat_flags(disk_super) &
BTRFS_FEATURE_INCOMPAT_METADATA_UUID); BTRFS_FEATURE_INCOMPAT_METADATA_UUID);
bool fsid_change_in_progress = (btrfs_super_flags(disk_super) & bool fsid_change_in_progress = (btrfs_super_flags(disk_super) &
BTRFS_SUPER_FLAG_CHANGING_FSID_V2); BTRFS_SUPER_FLAG_CHANGING_FSID_V2);
error = lookup_bdev(path, &path_devt);
if (error)
return ERR_PTR(error);
if (fsid_change_in_progress) { if (fsid_change_in_progress) {
if (!has_metadata_uuid) if (!has_metadata_uuid)
fs_devices = find_fsid_inprogress(disk_super); fs_devices = find_fsid_inprogress(disk_super);
@ -868,6 +860,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
} }
rcu_assign_pointer(device->name, name); rcu_assign_pointer(device->name, name);
device->devt = path_devt;
list_add_rcu(&device->dev_list, &fs_devices->devices); list_add_rcu(&device->dev_list, &fs_devices->devices);
fs_devices->num_devices++; fs_devices->num_devices++;
@ -928,25 +921,15 @@ static noinline struct btrfs_device *device_list_add(const char *path,
/* /*
* We are going to replace the device path for a given devid, * We are going to replace the device path for a given devid,
* make sure it's the same device if the device is mounted * make sure it's the same device if the device is mounted
*
* NOTE: the device->fs_info may not be reliable here so pass
* in a NULL to message helpers instead. This avoids a possible
* use-after-free when the fs_info and fs_info->sb are already
* torn down.
*/ */
if (device->bdev) { if (device->bdev) {
int error; if (device->devt != path_devt) {
dev_t path_dev;
error = lookup_bdev(path, &path_dev);
if (error) {
mutex_unlock(&fs_devices->device_list_mutex); mutex_unlock(&fs_devices->device_list_mutex);
return ERR_PTR(error);
}
if (device->bdev->bd_dev != path_dev) {
mutex_unlock(&fs_devices->device_list_mutex);
/*
* device->fs_info may not be reliable here, so
* pass in a NULL instead. This avoids a
* possible use-after-free when the fs_info and
* fs_info->sb are already torn down.
*/
btrfs_warn_in_rcu(NULL, btrfs_warn_in_rcu(NULL,
"duplicate device %s devid %llu generation %llu scanned by %s (%d)", "duplicate device %s devid %llu generation %llu scanned by %s (%d)",
path, devid, found_transid, path, devid, found_transid,
@ -954,7 +937,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
task_pid_nr(current)); task_pid_nr(current));
return ERR_PTR(-EEXIST); return ERR_PTR(-EEXIST);
} }
btrfs_info_in_rcu(device->fs_info, btrfs_info_in_rcu(NULL,
"devid %llu device path %s changed to %s scanned by %s (%d)", "devid %llu device path %s changed to %s scanned by %s (%d)",
devid, rcu_str_deref(device->name), devid, rcu_str_deref(device->name),
path, current->comm, path, current->comm,
@ -972,6 +955,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
fs_devices->missing_devices--; fs_devices->missing_devices--;
clear_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state); clear_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
} }
device->devt = path_devt;
} }
/* /*
@ -1331,12 +1315,12 @@ static struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev
return disk_super; return disk_super;
} }
int btrfs_forget_devices(const char *path) int btrfs_forget_devices(dev_t devt)
{ {
int ret; int ret;
mutex_lock(&uuid_mutex); mutex_lock(&uuid_mutex);
ret = btrfs_free_stale_devices(strlen(path) ? path : NULL, NULL); ret = btrfs_free_stale_devices(devt, NULL);
mutex_unlock(&uuid_mutex); mutex_unlock(&uuid_mutex);
return ret; return ret;
@ -1385,10 +1369,8 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, fmode_t flags,
} }
device = device_list_add(path, disk_super, &new_device_added); device = device_list_add(path, disk_super, &new_device_added);
if (!IS_ERR(device)) { if (!IS_ERR(device) && new_device_added)
if (new_device_added) btrfs_free_stale_devices(device->devt, device);
btrfs_free_stale_devices(path, device);
}
btrfs_release_disk_super(disk_super); btrfs_release_disk_super(disk_super);
@ -2102,6 +2084,11 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info,
u64 num_devices; u64 num_devices;
int ret = 0; int ret = 0;
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
btrfs_err(fs_info, "device remove not supported on extent tree v2 yet");
return -EINVAL;
}
/* /*
* The device list in fs_devices is accessed without locks (neither * The device list in fs_devices is accessed without locks (neither
* uuid_mutex nor device_list_mutex) as it won't change on a mounted * uuid_mutex nor device_list_mutex) as it won't change on a mounted
@ -2606,7 +2593,6 @@ static int btrfs_finish_sprout(struct btrfs_trans_handle *trans)
int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path) int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path)
{ {
struct btrfs_root *root = fs_info->dev_root; struct btrfs_root *root = fs_info->dev_root;
struct request_queue *q;
struct btrfs_trans_handle *trans; struct btrfs_trans_handle *trans;
struct btrfs_device *device; struct btrfs_device *device;
struct block_device *bdev; struct block_device *bdev;
@ -2668,6 +2654,9 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
device->fs_info = fs_info; device->fs_info = fs_info;
device->bdev = bdev; device->bdev = bdev;
ret = lookup_bdev(device_path, &device->devt);
if (ret)
goto error_free_device;
ret = btrfs_get_dev_zone_info(device, false); ret = btrfs_get_dev_zone_info(device, false);
if (ret) if (ret)
@ -2679,7 +2668,6 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
goto error_free_zone; goto error_free_zone;
} }
q = bdev_get_queue(bdev);
set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state); set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
device->generation = trans->transid; device->generation = trans->transid;
device->io_width = fs_info->sectorsize; device->io_width = fs_info->sectorsize;
@ -2727,7 +2715,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
atomic64_add(device->total_bytes, &fs_info->free_chunk_space); atomic64_add(device->total_bytes, &fs_info->free_chunk_space);
if (!blk_queue_nonrot(q)) if (!blk_queue_nonrot(bdev_get_queue(bdev)))
fs_devices->rotating = true; fs_devices->rotating = true;
orig_super_total_bytes = btrfs_super_total_bytes(fs_info->super_copy); orig_super_total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
@ -2814,7 +2802,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
* We can ignore the return value as it typically returns -EINVAL and * We can ignore the return value as it typically returns -EINVAL and
* only succeeds if the device was an alien. * only succeeds if the device was an alien.
*/ */
btrfs_forget_devices(device_path); btrfs_forget_devices(device->devt);
/* Update ctime/mtime for blkid or udev */ /* Update ctime/mtime for blkid or udev */
update_dev_time(device_path); update_dev_time(device_path);
@ -3251,6 +3239,12 @@ int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
u64 length; u64 length;
int ret; int ret;
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
btrfs_err(fs_info,
"relocate: not supported on extent tree v2 yet");
return -EINVAL;
}
/* /*
* Prevent races with automatic removal of unused block groups. * Prevent races with automatic removal of unused block groups.
* After we relocate and before we remove the chunk with offset * After we relocate and before we remove the chunk with offset
@ -7060,6 +7054,27 @@ static void warn_32bit_meta_chunk(struct btrfs_fs_info *fs_info,
} }
#endif #endif
static struct btrfs_device *handle_missing_device(struct btrfs_fs_info *fs_info,
u64 devid, u8 *uuid)
{
struct btrfs_device *dev;
if (!btrfs_test_opt(fs_info, DEGRADED)) {
btrfs_report_missing_device(fs_info, devid, uuid, true);
return ERR_PTR(-ENOENT);
}
dev = add_missing_dev(fs_info->fs_devices, devid, uuid);
if (IS_ERR(dev)) {
btrfs_err(fs_info, "failed to init missing device %llu: %ld",
devid, PTR_ERR(dev));
return dev;
}
btrfs_report_missing_device(fs_info, devid, uuid, false);
return dev;
}
static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf, static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
struct btrfs_chunk *chunk) struct btrfs_chunk *chunk)
{ {
@ -7147,28 +7162,17 @@ static int read_one_chunk(struct btrfs_key *key, struct extent_buffer *leaf,
BTRFS_UUID_SIZE); BTRFS_UUID_SIZE);
args.uuid = uuid; args.uuid = uuid;
map->stripes[i].dev = btrfs_find_device(fs_info->fs_devices, &args); map->stripes[i].dev = btrfs_find_device(fs_info->fs_devices, &args);
if (!map->stripes[i].dev &&
!btrfs_test_opt(fs_info, DEGRADED)) {
free_extent_map(em);
btrfs_report_missing_device(fs_info, devid, uuid, true);
return -ENOENT;
}
if (!map->stripes[i].dev) { if (!map->stripes[i].dev) {
map->stripes[i].dev = map->stripes[i].dev = handle_missing_device(fs_info,
add_missing_dev(fs_info->fs_devices, devid, devid, uuid);
uuid);
if (IS_ERR(map->stripes[i].dev)) { if (IS_ERR(map->stripes[i].dev)) {
free_extent_map(em); free_extent_map(em);
btrfs_err(fs_info,
"failed to init missing dev %llu: %ld",
devid, PTR_ERR(map->stripes[i].dev));
return PTR_ERR(map->stripes[i].dev); return PTR_ERR(map->stripes[i].dev);
} }
btrfs_report_missing_device(fs_info, devid, uuid, false);
} }
set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, set_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
&(map->stripes[i].dev->dev_state)); &(map->stripes[i].dev->dev_state));
} }
write_lock(&map_tree->lock); write_lock(&map_tree->lock);
@ -8299,10 +8303,12 @@ static int relocating_repair_kthread(void *data)
target = cache->start; target = cache->start;
btrfs_put_block_group(cache); btrfs_put_block_group(cache);
sb_start_write(fs_info->sb);
if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) { if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE)) {
btrfs_info(fs_info, btrfs_info(fs_info,
"zoned: skip relocating block group %llu to repair: EBUSY", "zoned: skip relocating block group %llu to repair: EBUSY",
target); target);
sb_end_write(fs_info->sb);
return -EBUSY; return -EBUSY;
} }
@ -8330,6 +8336,7 @@ static int relocating_repair_kthread(void *data)
btrfs_put_block_group(cache); btrfs_put_block_group(cache);
mutex_unlock(&fs_info->reclaim_bgs_lock); mutex_unlock(&fs_info->reclaim_bgs_lock);
btrfs_exclop_finish(fs_info); btrfs_exclop_finish(fs_info);
sb_end_write(fs_info->sb);
return ret; return ret;
} }

View File

@ -72,6 +72,11 @@ struct btrfs_device {
/* the mode sent to blkdev_get */ /* the mode sent to blkdev_get */
fmode_t mode; fmode_t mode;
/*
* Device's major-minor number. Must be set even if the device is not
* opened (bdev == NULL), unless the device is missing.
*/
dev_t devt;
unsigned long dev_state; unsigned long dev_state;
blk_status_t last_flush_error; blk_status_t last_flush_error;
@ -505,7 +510,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
fmode_t flags, void *holder); fmode_t flags, void *holder);
struct btrfs_device *btrfs_scan_one_device(const char *path, struct btrfs_device *btrfs_scan_one_device(const char *path,
fmode_t flags, void *holder); fmode_t flags, void *holder);
int btrfs_forget_devices(const char *path); int btrfs_forget_devices(dev_t devt);
void btrfs_close_devices(struct btrfs_fs_devices *fs_devices); void btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices); void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices);
void btrfs_assign_next_active_device(struct btrfs_device *device, void btrfs_assign_next_active_device(struct btrfs_device *device,

View File

@ -652,8 +652,7 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
if (model == BLK_ZONED_HM || if (model == BLK_ZONED_HM ||
(model == BLK_ZONED_HA && incompat_zoned) || (model == BLK_ZONED_HA && incompat_zoned) ||
(model == BLK_ZONED_NONE && incompat_zoned)) { (model == BLK_ZONED_NONE && incompat_zoned)) {
struct btrfs_zoned_device_info *zone_info = struct btrfs_zoned_device_info *zone_info;
device->zone_info;
zone_info = device->zone_info; zone_info = device->zone_info;
zoned_devices++; zoned_devices++;
@ -1215,12 +1214,12 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
struct btrfs_device *device; struct btrfs_device *device;
u64 logical = cache->start; u64 logical = cache->start;
u64 length = cache->length; u64 length = cache->length;
u64 physical = 0;
int ret; int ret;
int i; int i;
unsigned int nofs_flag; unsigned int nofs_flag;
u64 *alloc_offsets = NULL; u64 *alloc_offsets = NULL;
u64 *caps = NULL; u64 *caps = NULL;
u64 *physical = NULL;
unsigned long *active = NULL; unsigned long *active = NULL;
u64 last_alloc = 0; u64 last_alloc = 0;
u32 num_sequential = 0, num_conventional = 0; u32 num_sequential = 0, num_conventional = 0;
@ -1264,6 +1263,12 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
goto out; goto out;
} }
physical = kcalloc(map->num_stripes, sizeof(*physical), GFP_NOFS);
if (!physical) {
ret = -ENOMEM;
goto out;
}
active = bitmap_zalloc(map->num_stripes, GFP_NOFS); active = bitmap_zalloc(map->num_stripes, GFP_NOFS);
if (!active) { if (!active) {
ret = -ENOMEM; ret = -ENOMEM;
@ -1277,14 +1282,14 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
int dev_replace_is_ongoing = 0; int dev_replace_is_ongoing = 0;
device = map->stripes[i].dev; device = map->stripes[i].dev;
physical = map->stripes[i].physical; physical[i] = map->stripes[i].physical;
if (device->bdev == NULL) { if (device->bdev == NULL) {
alloc_offsets[i] = WP_MISSING_DEV; alloc_offsets[i] = WP_MISSING_DEV;
continue; continue;
} }
is_sequential = btrfs_dev_is_sequential(device, physical); is_sequential = btrfs_dev_is_sequential(device, physical[i]);
if (is_sequential) if (is_sequential)
num_sequential++; num_sequential++;
else else
@ -1299,21 +1304,21 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
* This zone will be used for allocation, so mark this zone * This zone will be used for allocation, so mark this zone
* non-empty. * non-empty.
*/ */
btrfs_dev_clear_zone_empty(device, physical); btrfs_dev_clear_zone_empty(device, physical[i]);
down_read(&dev_replace->rwsem); down_read(&dev_replace->rwsem);
dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace); dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL)
btrfs_dev_clear_zone_empty(dev_replace->tgtdev, physical); btrfs_dev_clear_zone_empty(dev_replace->tgtdev, physical[i]);
up_read(&dev_replace->rwsem); up_read(&dev_replace->rwsem);
/* /*
* The group is mapped to a sequential zone. Get the zone write * The group is mapped to a sequential zone. Get the zone write
* pointer to determine the allocation offset within the zone. * pointer to determine the allocation offset within the zone.
*/ */
WARN_ON(!IS_ALIGNED(physical, fs_info->zone_size)); WARN_ON(!IS_ALIGNED(physical[i], fs_info->zone_size));
nofs_flag = memalloc_nofs_save(); nofs_flag = memalloc_nofs_save();
ret = btrfs_get_dev_zone(device, physical, &zone); ret = btrfs_get_dev_zone(device, physical[i], &zone);
memalloc_nofs_restore(nofs_flag); memalloc_nofs_restore(nofs_flag);
if (ret == -EIO || ret == -EOPNOTSUPP) { if (ret == -EIO || ret == -EOPNOTSUPP) {
ret = 0; ret = 0;
@ -1339,7 +1344,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
case BLK_ZONE_COND_READONLY: case BLK_ZONE_COND_READONLY:
btrfs_err(fs_info, btrfs_err(fs_info,
"zoned: offline/readonly zone %llu on device %s (devid %llu)", "zoned: offline/readonly zone %llu on device %s (devid %llu)",
physical >> device->zone_info->zone_size_shift, physical[i] >> device->zone_info->zone_size_shift,
rcu_str_deref(device->name), device->devid); rcu_str_deref(device->name), device->devid);
alloc_offsets[i] = WP_MISSING_DEV; alloc_offsets[i] = WP_MISSING_DEV;
break; break;
@ -1404,7 +1409,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
if (alloc_offsets[0] == WP_MISSING_DEV) { if (alloc_offsets[0] == WP_MISSING_DEV) {
btrfs_err(fs_info, btrfs_err(fs_info,
"zoned: cannot recover write pointer for zone %llu", "zoned: cannot recover write pointer for zone %llu",
physical); physical[0]);
ret = -EIO; ret = -EIO;
goto out; goto out;
} }
@ -1413,6 +1418,42 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
cache->zone_is_active = test_bit(0, active); cache->zone_is_active = test_bit(0, active);
break; break;
case BTRFS_BLOCK_GROUP_DUP: case BTRFS_BLOCK_GROUP_DUP:
if (map->type & BTRFS_BLOCK_GROUP_DATA) {
btrfs_err(fs_info, "zoned: profile DUP not yet supported on data bg");
ret = -EINVAL;
goto out;
}
if (alloc_offsets[0] == WP_MISSING_DEV) {
btrfs_err(fs_info,
"zoned: cannot recover write pointer for zone %llu",
physical[0]);
ret = -EIO;
goto out;
}
if (alloc_offsets[1] == WP_MISSING_DEV) {
btrfs_err(fs_info,
"zoned: cannot recover write pointer for zone %llu",
physical[1]);
ret = -EIO;
goto out;
}
if (alloc_offsets[0] != alloc_offsets[1]) {
btrfs_err(fs_info,
"zoned: write pointer offset mismatch of zones in DUP profile");
ret = -EIO;
goto out;
}
if (test_bit(0, active) != test_bit(1, active)) {
if (!btrfs_zone_activate(cache)) {
ret = -EIO;
goto out;
}
} else {
cache->zone_is_active = test_bit(0, active);
}
cache->alloc_offset = alloc_offsets[0];
cache->zone_capacity = min(caps[0], caps[1]);
break;
case BTRFS_BLOCK_GROUP_RAID1: case BTRFS_BLOCK_GROUP_RAID1:
case BTRFS_BLOCK_GROUP_RAID0: case BTRFS_BLOCK_GROUP_RAID0:
case BTRFS_BLOCK_GROUP_RAID10: case BTRFS_BLOCK_GROUP_RAID10:
@ -1465,6 +1506,7 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
cache->physical_map = NULL; cache->physical_map = NULL;
} }
bitmap_free(active); bitmap_free(active);
kfree(physical);
kfree(caps); kfree(caps);
kfree(alloc_offsets); kfree(alloc_offsets);
free_extent_map(em); free_extent_map(em);
@ -1781,50 +1823,55 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
struct btrfs_device *device; struct btrfs_device *device;
u64 physical; u64 physical;
bool ret; bool ret;
int i;
if (!btrfs_is_zoned(block_group->fs_info)) if (!btrfs_is_zoned(block_group->fs_info))
return true; return true;
map = block_group->physical_map; map = block_group->physical_map;
/* Currently support SINGLE profile only */
ASSERT(map->num_stripes == 1);
device = map->stripes[0].dev;
physical = map->stripes[0].physical;
if (device->zone_info->max_active_zones == 0)
return true;
spin_lock(&block_group->lock); spin_lock(&block_group->lock);
if (block_group->zone_is_active) { if (block_group->zone_is_active) {
ret = true; ret = true;
goto out_unlock; goto out_unlock;
} }
/* No space left */ for (i = 0; i < map->num_stripes; i++) {
if (block_group->alloc_offset == block_group->zone_capacity) { device = map->stripes[i].dev;
ret = false; physical = map->stripes[i].physical;
goto out_unlock;
if (device->zone_info->max_active_zones == 0)
continue;
/* No space left */
if (block_group->alloc_offset == block_group->zone_capacity) {
ret = false;
goto out_unlock;
}
if (!btrfs_dev_set_active_zone(device, physical)) {
/* Cannot activate the zone */
ret = false;
goto out_unlock;
}
/* Successfully activated all the zones */
if (i == map->num_stripes - 1)
block_group->zone_is_active = 1;
} }
if (!btrfs_dev_set_active_zone(device, physical)) {
/* Cannot activate the zone */
ret = false;
goto out_unlock;
}
/* Successfully activated all the zones */
block_group->zone_is_active = 1;
spin_unlock(&block_group->lock); spin_unlock(&block_group->lock);
/* For the active block group list */ if (block_group->zone_is_active) {
btrfs_get_block_group(block_group); /* For the active block group list */
btrfs_get_block_group(block_group);
spin_lock(&fs_info->zone_active_bgs_lock); spin_lock(&fs_info->zone_active_bgs_lock);
ASSERT(list_empty(&block_group->active_bg_list)); list_add_tail(&block_group->active_bg_list,
list_add_tail(&block_group->active_bg_list, &fs_info->zone_active_bgs); &fs_info->zone_active_bgs);
spin_unlock(&fs_info->zone_active_bgs_lock); spin_unlock(&fs_info->zone_active_bgs_lock);
}
return true; return true;
@ -1840,19 +1887,12 @@ int btrfs_zone_finish(struct btrfs_block_group *block_group)
struct btrfs_device *device; struct btrfs_device *device;
u64 physical; u64 physical;
int ret = 0; int ret = 0;
int i;
if (!btrfs_is_zoned(fs_info)) if (!btrfs_is_zoned(fs_info))
return 0; return 0;
map = block_group->physical_map; map = block_group->physical_map;
/* Currently support SINGLE profile only */
ASSERT(map->num_stripes == 1);
device = map->stripes[0].dev;
physical = map->stripes[0].physical;
if (device->zone_info->max_active_zones == 0)
return 0;
spin_lock(&block_group->lock); spin_lock(&block_group->lock);
if (!block_group->zone_is_active) { if (!block_group->zone_is_active) {
@ -1904,25 +1944,34 @@ int btrfs_zone_finish(struct btrfs_block_group *block_group)
btrfs_clear_data_reloc_bg(block_group); btrfs_clear_data_reloc_bg(block_group);
spin_unlock(&block_group->lock); spin_unlock(&block_group->lock);
ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH, for (i = 0; i < map->num_stripes; i++) {
physical >> SECTOR_SHIFT, device = map->stripes[i].dev;
device->zone_info->zone_size >> SECTOR_SHIFT, physical = map->stripes[i].physical;
GFP_NOFS);
if (device->zone_info->max_active_zones == 0)
continue;
ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
physical >> SECTOR_SHIFT,
device->zone_info->zone_size >> SECTOR_SHIFT,
GFP_NOFS);
if (ret)
return ret;
btrfs_dev_clear_active_zone(device, physical);
}
btrfs_dec_block_group_ro(block_group); btrfs_dec_block_group_ro(block_group);
if (!ret) { spin_lock(&fs_info->zone_active_bgs_lock);
btrfs_dev_clear_active_zone(device, physical); ASSERT(!list_empty(&block_group->active_bg_list));
list_del_init(&block_group->active_bg_list);
spin_unlock(&fs_info->zone_active_bgs_lock);
spin_lock(&fs_info->zone_active_bgs_lock); /* For active_bg_list */
ASSERT(!list_empty(&block_group->active_bg_list)); btrfs_put_block_group(block_group);
list_del_init(&block_group->active_bg_list);
spin_unlock(&fs_info->zone_active_bgs_lock);
/* For active_bg_list */ return 0;
btrfs_put_block_group(block_group);
}
return ret;
} }
bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags) bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)

View File

@ -157,11 +157,6 @@ extern char *simple_dname(struct dentry *, char *, int);
extern void dput_to_list(struct dentry *, struct list_head *); extern void dput_to_list(struct dentry *, struct list_head *);
extern void shrink_dentry_list(struct list_head *); extern void shrink_dentry_list(struct list_head *);
/*
* read_write.c
*/
extern int rw_verify_area(int, struct file *, const loff_t *, size_t);
/* /*
* pipe.c * pipe.c
*/ */

View File

@ -236,9 +236,6 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
if (!src_file.file) if (!src_file.file)
return -EBADF; return -EBADF;
ret = -EXDEV;
if (src_file.file->f_path.mnt != dst_file->f_path.mnt)
goto fdput;
cloned = vfs_clone_file_range(src_file.file, off, dst_file, destoff, cloned = vfs_clone_file_range(src_file.file, off, dst_file, destoff,
olen, 0); olen, 0);
if (cloned < 0) if (cloned < 0)
@ -247,7 +244,6 @@ static long ioctl_file_clone(struct file *dst_file, unsigned long srcfd,
ret = -EINVAL; ret = -EINVAL;
else else
ret = 0; ret = 0;
fdput:
fdput(src_file); fdput(src_file);
return ret; return ret;
} }

View File

@ -385,6 +385,7 @@ int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t
return security_file_permission(file, return security_file_permission(file,
read_write == READ ? MAY_READ : MAY_WRITE); read_write == READ ? MAY_READ : MAY_WRITE);
} }
EXPORT_SYMBOL(rw_verify_area);
static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
{ {
@ -1617,24 +1618,16 @@ int generic_write_check_limits(struct file *file, loff_t pos, loff_t *count)
return 0; return 0;
} }
/* /* Like generic_write_checks(), but takes size of write instead of iter. */
* Performs necessary checks before doing a write int generic_write_checks_count(struct kiocb *iocb, loff_t *count)
*
* Can adjust writing position or amount of bytes to write.
* Returns appropriate error code that caller should return or
* zero in case that write should be allowed.
*/
ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
{ {
struct file *file = iocb->ki_filp; struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host; struct inode *inode = file->f_mapping->host;
loff_t count;
int ret;
if (IS_SWAPFILE(inode)) if (IS_SWAPFILE(inode))
return -ETXTBSY; return -ETXTBSY;
if (!iov_iter_count(from)) if (!*count)
return 0; return 0;
/* FIXME: this is for backwards compatibility with 2.4 */ /* FIXME: this is for backwards compatibility with 2.4 */
@ -1644,8 +1637,23 @@ ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT)) if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
return -EINVAL; return -EINVAL;
count = iov_iter_count(from); return generic_write_check_limits(iocb->ki_filp, iocb->ki_pos, count);
ret = generic_write_check_limits(file, iocb->ki_pos, &count); }
EXPORT_SYMBOL(generic_write_checks_count);
/*
* Performs necessary checks before doing a write
*
* Can adjust writing position or amount of bytes to write.
* Returns appropriate error code that caller should return or
* zero in case that write should be allowed.
*/
ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from)
{
loff_t count = iov_iter_count(from);
int ret;
ret = generic_write_checks_count(iocb, &count);
if (ret) if (ret)
return ret; return ret;

View File

@ -362,11 +362,6 @@ loff_t do_clone_file_range(struct file *file_in, loff_t pos_in,
WARN_ON_ONCE(remap_flags & REMAP_FILE_DEDUP); WARN_ON_ONCE(remap_flags & REMAP_FILE_DEDUP);
/*
* FICLONE/FICLONERANGE ioctls enforce that src and dest files are on
* the same mount. Practically, they only need to be on the same file
* system.
*/
if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb)
return -EXDEV; return -EXDEV;
@ -458,7 +453,7 @@ loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
goto out_drop_write; goto out_drop_write;
ret = -EXDEV; ret = -EXDEV;
if (src_file->f_path.mnt != dst_file->f_path.mnt) if (file_inode(src_file)->i_sb != file_inode(dst_file)->i_sb)
goto out_drop_write; goto out_drop_write;
ret = -EISDIR; ret = -EISDIR;

View File

@ -3130,6 +3130,7 @@ extern int sb_min_blocksize(struct super_block *, int);
extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern int generic_file_mmap(struct file *, struct vm_area_struct *);
extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *);
extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *);
int generic_write_checks_count(struct kiocb *iocb, loff_t *count);
extern int generic_write_check_limits(struct file *file, loff_t pos, extern int generic_write_check_limits(struct file *file, loff_t pos,
loff_t *count); loff_t *count);
extern int generic_file_rw_checks(struct file *file_in, struct file *file_out); extern int generic_file_rw_checks(struct file *file_in, struct file *file_out);
@ -3173,6 +3174,7 @@ extern loff_t fixed_size_llseek(struct file *file, loff_t offset,
int whence, loff_t size); int whence, loff_t size);
extern loff_t no_seek_end_llseek_size(struct file *, loff_t, int, loff_t); extern loff_t no_seek_end_llseek_size(struct file *, loff_t, int, loff_t);
extern loff_t no_seek_end_llseek(struct file *, loff_t, int); extern loff_t no_seek_end_llseek(struct file *, loff_t, int);
int rw_verify_area(int, struct file *, const loff_t *, size_t);
extern int generic_file_open(struct inode * inode, struct file * filp); extern int generic_file_open(struct inode * inode, struct file * filp);
extern int nonseekable_open(struct inode * inode, struct file * filp); extern int nonseekable_open(struct inode * inode, struct file * filp);
extern int stream_open(struct inode * inode, struct file * filp); extern int stream_open(struct inode * inode, struct file * filp);

View File

@ -53,6 +53,7 @@ struct btrfs_space_info;
{ BTRFS_TREE_RELOC_OBJECTID, "TREE_RELOC" }, \ { BTRFS_TREE_RELOC_OBJECTID, "TREE_RELOC" }, \
{ BTRFS_UUID_TREE_OBJECTID, "UUID_TREE" }, \ { BTRFS_UUID_TREE_OBJECTID, "UUID_TREE" }, \
{ BTRFS_FREE_SPACE_TREE_OBJECTID, "FREE_SPACE_TREE" }, \ { BTRFS_FREE_SPACE_TREE_OBJECTID, "FREE_SPACE_TREE" }, \
{ BTRFS_BLOCK_GROUP_TREE_OBJECTID, "BLOCK_GROUP_TREE" },\
{ BTRFS_DATA_RELOC_TREE_OBJECTID, "DATA_RELOC_TREE" }) { BTRFS_DATA_RELOC_TREE_OBJECTID, "DATA_RELOC_TREE" })
#define show_root_type(obj) \ #define show_root_type(obj) \

View File

@ -309,6 +309,7 @@ struct btrfs_ioctl_fs_info_args {
#define BTRFS_FEATURE_INCOMPAT_METADATA_UUID (1ULL << 10) #define BTRFS_FEATURE_INCOMPAT_METADATA_UUID (1ULL << 10)
#define BTRFS_FEATURE_INCOMPAT_RAID1C34 (1ULL << 11) #define BTRFS_FEATURE_INCOMPAT_RAID1C34 (1ULL << 11)
#define BTRFS_FEATURE_INCOMPAT_ZONED (1ULL << 12) #define BTRFS_FEATURE_INCOMPAT_ZONED (1ULL << 12)
#define BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2 (1ULL << 13)
struct btrfs_ioctl_feature_flags { struct btrfs_ioctl_feature_flags {
__u64 compat_flags; __u64 compat_flags;
@ -868,6 +869,134 @@ struct btrfs_ioctl_get_subvol_rootref_args {
__u8 align[7]; __u8 align[7];
}; };
/*
* Data and metadata for an encoded read or write.
*
* Encoded I/O bypasses any encoding automatically done by the filesystem (e.g.,
* compression). This can be used to read the compressed contents of a file or
* write pre-compressed data directly to a file.
*
* BTRFS_IOC_ENCODED_READ and BTRFS_IOC_ENCODED_WRITE are essentially
* preadv/pwritev with additional metadata about how the data is encoded and the
* size of the unencoded data.
*
* BTRFS_IOC_ENCODED_READ fills the given iovecs with the encoded data, fills
* the metadata fields, and returns the size of the encoded data. It reads one
* extent per call. It can also read data which is not encoded.
*
* BTRFS_IOC_ENCODED_WRITE uses the metadata fields, writes the encoded data
* from the iovecs, and returns the size of the encoded data. Note that the
* encoded data is not validated when it is written; if it is not valid (e.g.,
* it cannot be decompressed), then a subsequent read may return an error.
*
* Since the filesystem page cache contains decoded data, encoded I/O bypasses
* the page cache. Encoded I/O requires CAP_SYS_ADMIN.
*/
struct btrfs_ioctl_encoded_io_args {
/* Input parameters for both reads and writes. */
/*
* iovecs containing encoded data.
*
* For reads, if the size of the encoded data is larger than the sum of
* iov[n].iov_len for 0 <= n < iovcnt, then the ioctl fails with
* ENOBUFS.
*
* For writes, the size of the encoded data is the sum of iov[n].iov_len
* for 0 <= n < iovcnt. This must be less than 128 KiB (this limit may
* increase in the future). This must also be less than or equal to
* unencoded_len.
*/
const struct iovec __user *iov;
/* Number of iovecs. */
unsigned long iovcnt;
/*
* Offset in file.
*
* For writes, must be aligned to the sector size of the filesystem.
*/
__s64 offset;
/* Currently must be zero. */
__u64 flags;
/*
* For reads, the following members are output parameters that will
* contain the returned metadata for the encoded data.
* For writes, the following members must be set to the metadata for the
* encoded data.
*/
/*
* Length of the data in the file.
*
* Must be less than or equal to unencoded_len - unencoded_offset. For
* writes, must be aligned to the sector size of the filesystem unless
* the data ends at or beyond the current end of the file.
*/
__u64 len;
/*
* Length of the unencoded (i.e., decrypted and decompressed) data.
*
* For writes, must be no more than 128 KiB (this limit may increase in
* the future). If the unencoded data is actually longer than
* unencoded_len, then it is truncated; if it is shorter, then it is
* extended with zeroes.
*/
__u64 unencoded_len;
/*
* Offset from the first byte of the unencoded data to the first byte of
* logical data in the file.
*
* Must be less than unencoded_len.
*/
__u64 unencoded_offset;
/*
* BTRFS_ENCODED_IO_COMPRESSION_* type.
*
* For writes, must not be BTRFS_ENCODED_IO_COMPRESSION_NONE.
*/
__u32 compression;
/* Currently always BTRFS_ENCODED_IO_ENCRYPTION_NONE. */
__u32 encryption;
/*
* Reserved for future expansion.
*
* For reads, always returned as zero. Users should check for non-zero
* bytes. If there are any, then the kernel has a newer version of this
* structure with additional information that the user definition is
* missing.
*
* For writes, must be zeroed.
*/
__u8 reserved[64];
};
/* Data is not compressed. */
#define BTRFS_ENCODED_IO_COMPRESSION_NONE 0
/* Data is compressed as a single zlib stream. */
#define BTRFS_ENCODED_IO_COMPRESSION_ZLIB 1
/*
* Data is compressed as a single zstd frame with the windowLog compression
* parameter set to no more than 17.
*/
#define BTRFS_ENCODED_IO_COMPRESSION_ZSTD 2
/*
* Data is compressed sector by sector (using the sector size indicated by the
* name of the constant) with LZO1X and wrapped in the format documented in
* fs/btrfs/lzo.c. For writes, the compression sector size must match the
* filesystem sector size.
*/
#define BTRFS_ENCODED_IO_COMPRESSION_LZO_4K 3
#define BTRFS_ENCODED_IO_COMPRESSION_LZO_8K 4
#define BTRFS_ENCODED_IO_COMPRESSION_LZO_16K 5
#define BTRFS_ENCODED_IO_COMPRESSION_LZO_32K 6
#define BTRFS_ENCODED_IO_COMPRESSION_LZO_64K 7
#define BTRFS_ENCODED_IO_COMPRESSION_TYPES 8
/* Data is not encrypted. */
#define BTRFS_ENCODED_IO_ENCRYPTION_NONE 0
#define BTRFS_ENCODED_IO_ENCRYPTION_TYPES 1
/* Error codes as returned by the kernel */ /* Error codes as returned by the kernel */
enum btrfs_err_code { enum btrfs_err_code {
BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET = 1, BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET = 1,
@ -996,5 +1125,9 @@ enum btrfs_err_code {
struct btrfs_ioctl_ino_lookup_user_args) struct btrfs_ioctl_ino_lookup_user_args)
#define BTRFS_IOC_SNAP_DESTROY_V2 _IOW(BTRFS_IOCTL_MAGIC, 63, \ #define BTRFS_IOC_SNAP_DESTROY_V2 _IOW(BTRFS_IOCTL_MAGIC, 63, \
struct btrfs_ioctl_vol_args_v2) struct btrfs_ioctl_vol_args_v2)
#define BTRFS_IOC_ENCODED_READ _IOR(BTRFS_IOCTL_MAGIC, 64, \
struct btrfs_ioctl_encoded_io_args)
#define BTRFS_IOC_ENCODED_WRITE _IOW(BTRFS_IOCTL_MAGIC, 64, \
struct btrfs_ioctl_encoded_io_args)
#endif /* _UAPI_LINUX_BTRFS_H */ #endif /* _UAPI_LINUX_BTRFS_H */

View File

@ -53,6 +53,9 @@
/* tracks free space in block groups. */ /* tracks free space in block groups. */
#define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL #define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL
/* Holds the block group items for extent tree v2. */
#define BTRFS_BLOCK_GROUP_TREE_OBJECTID 11ULL
/* device stats in the device tree */ /* device stats in the device tree */
#define BTRFS_DEV_STATS_OBJECTID 0ULL #define BTRFS_DEV_STATS_OBJECTID 0ULL