mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2024-12-28 16:56:26 +00:00
for-6.13-tag
-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmc0zT4ACgkQxWXV+ddt WDtThRAAhzSSiHcJqTfCL5nHh7w85MNEVw28o1ETgXSYJmx0JOWLE7Znlp2FV7jj IbYkFfF2gXJzYvRZkcXB/TAHV9KJG5yZIBZfccbM+9db9f8xkImVKMuqQRXPU41R ppSCmqZTeujtt8ucsaJkMpm6pzECKJCJaGOsMJ8fiqKpo89dKO3eGAVboSbpPF4C r0YmppiBwSP/cCXQCqWxZRbqPGN+lUgZpIGNRi157kehfmRHlVVJTO1pgqK8PCXb uIT09Kulppfez8+1A10CPcniDTyinLik/qLTNlzdWoDBL4iNJMg0A0wsA04AJVf0 PdOS0REusiv3QcEIO6PefuRFRRfXcSLPpPDUceltJT5O0uM2gUqf2C7dEHXUGU3o TdgYlbQpsJWpZ7VGWQDZeGGV04lOPQvu0LGLPgEerUQd5H9ABa0dX8Fn0sPhKsa8 whpAcdfE4rdNxB2OJFnqQeFq0z3cSjP/rvKlluCmAj97QYI+kiu3QyhemcT1YSC9 U7n5Ya9IzIYCN3ml54q3hEgyD0IVGGG20GuUmqC9XSP9mrQRC8I1g7v26AiOTrrk VhgSdtMmphDxXudifsnYMaQ0Z1QqiUrW1SM/prAEOnBYCo75+HDsTgrq9ithgHoI 4xz4YXJyMRs18qfTJctXC1wmGuz5plTdQrwarHdNsELN5HEyqX4= =aAcf -----END PGP SIGNATURE----- Merge tag 'for-6.13-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux Pull btrfs updates from David Sterba: "Changes outside of btrfs: add io_uring command flag to track a dying task (the rest will go via the block git tree). User visible changes: - wire encoded read (ioctl) to io_uring commands, this can be used on itself, in the future this will allow 'send' to be asynchronous. As a consequence, the encoded read ioctl can also work in non-blocking mode - new ioctl to wait for cleaned subvolumes, no need to use the generic and root-only SEARCH_TREE ioctl, will be used by "btrfs subvol sync" - recognize different paths/symlinks for the same devices and don't report them during rescanning, this can be observed with LVM or DM - seeding device use case change, the sprout device (the one capturing new writes) will not clear the read-only status of the super block; this prevents accumulating space from deleted snapshots Performance improvements: - reduce lock contention when traversing extent buffers - reduce extent tree lock contention when searching for inline backref - switch from rb-trees to xarray for delayed ref tracking, improvements due to better cache locality, branching factors and more compact data structures - enable extent map shrinker again (prevent memory exhaustion under some types of IO load), reworked to run in a single worker thread (there used to be problems causing long stalls under memory pressure) Core changes: - raid-stripe-tree feature updates: - make device replace and scrub work - implement partial deletion of stripe extents - new selftests - split the config option BTRFS_DEBUG and add EXPERIMENTAL for features that are experimental or with known problems so we don't misuse debugging config for that - subpage mode updates (sector < page): - update compression implementations - update writepage, writeback - continued folio API conversions: - buffered writes - make buffered write copy one page at a time, preparatory work for future integration with large folios, may cause performance drop - proper locking of root item regarding starting send - error handling improvements - code cleanups and refactoring: - dead code removal - unused parameter reduction - lockdep assertions" * tag 'for-6.13-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (119 commits) btrfs: send: check for read-only send root under critical section btrfs: send: check for dead send root under critical section btrfs: remove check for NULL fs_info at btrfs_folio_end_lock_bitmap() btrfs: fix warning on PTR_ERR() against NULL device at btrfs_control_ioctl() btrfs: fix a typo in btrfs_use_zone_append btrfs: avoid superfluous calls to free_extent_map() in btrfs_encoded_read() btrfs: simplify logic to decrement snapshot counter at btrfs_mksnapshot() btrfs: remove hole from struct btrfs_delayed_node btrfs: update stale comment for struct btrfs_delayed_ref_node::add_list btrfs: add new ioctl to wait for cleaned subvolumes btrfs: simplify range tracking in cow_file_range() btrfs: remove conditional path allocation in btrfs_read_locked_inode() btrfs: push cleanup into btrfs_read_locked_inode() io_uring/cmd: let cmds to know about dying task btrfs: add struct io_btrfs_cmd as type for io_uring_cmd_to_pdu() btrfs: add io_uring command for encoded reads (ENCODED_READ ioctl) btrfs: move priv off stack in btrfs_encoded_read_regular_fill_pages() btrfs: don't sleep in btrfs_encoded_read() if IOCB_NOWAIT is set btrfs: change btrfs_encoded_read() so that reading of extent is done by caller btrfs: remove pointless iocb::ki_pos addition in btrfs_encoded_read() ...
This commit is contained in:
commit
c14a8a4c04
@ -78,6 +78,32 @@ config BTRFS_ASSERT
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config BTRFS_EXPERIMENTAL
|
||||
bool "Btrfs experimental features"
|
||||
depends on BTRFS_FS
|
||||
default n
|
||||
help
|
||||
Enable experimental features. These features may not be stable enough
|
||||
for end users. This is meant for btrfs developers or users who wish
|
||||
to test the functionality and report problems.
|
||||
|
||||
Current list:
|
||||
|
||||
- extent map shrinker - performance problems with too frequent shrinks
|
||||
|
||||
- send stream protocol v3 - fs-verity support
|
||||
|
||||
- checksum offload mode - sysfs knob to affect when checksums are
|
||||
calculated (at IO time, or in a thread)
|
||||
|
||||
- raid-stripe-tree - additional mapping of extents to devices to
|
||||
support RAID1* profiles on zoned devices,
|
||||
RAID56 not yet supported
|
||||
|
||||
- extent tree v2 - complex rework of extent tracking
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config BTRFS_FS_REF_VERIFY
|
||||
bool "Btrfs with the ref verify tool compiled in"
|
||||
depends on BTRFS_FS
|
||||
|
@ -43,4 +43,5 @@ btrfs-$(CONFIG_FS_VERITY) += verity.o
|
||||
btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o \
|
||||
tests/extent-buffer-tests.o tests/btrfs-tests.o \
|
||||
tests/extent-io-tests.o tests/inode-tests.o tests/qgroup-tests.o \
|
||||
tests/free-space-tree-tests.o tests/extent-map-tests.o
|
||||
tests/free-space-tree-tests.o tests/extent-map-tests.o \
|
||||
tests/raid-stripe-tree-tests.o
|
||||
|
@ -1442,7 +1442,8 @@ static int find_parent_nodes(struct btrfs_backref_walk_ctx *ctx,
|
||||
*/
|
||||
delayed_refs = &ctx->trans->transaction->delayed_refs;
|
||||
spin_lock(&delayed_refs->lock);
|
||||
head = btrfs_find_delayed_ref_head(delayed_refs, ctx->bytenr);
|
||||
head = btrfs_find_delayed_ref_head(ctx->fs_info, delayed_refs,
|
||||
ctx->bytenr);
|
||||
if (head) {
|
||||
if (!mutex_trylock(&head->mutex)) {
|
||||
refcount_inc(&head->refs);
|
||||
|
@ -587,7 +587,7 @@ static bool should_async_write(struct btrfs_bio *bbio)
|
||||
{
|
||||
bool auto_csum_mode = true;
|
||||
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
#ifdef CONFIG_BTRFS_EXPERIMENTAL
|
||||
struct btrfs_fs_devices *fs_devices = bbio->fs_info->fs_devices;
|
||||
enum btrfs_offload_csum_mode csum_mode = READ_ONCE(fs_devices->offload_csum_mode);
|
||||
|
||||
|
@ -2797,7 +2797,7 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans)
|
||||
* uncompressed data size, because the compression is only done
|
||||
* when writeback triggered and we don't know how much space we
|
||||
* are actually going to need, so we reserve the uncompressed
|
||||
* size because the data may be uncompressible in the worst case.
|
||||
* size because the data may be incompressible in the worst case.
|
||||
*/
|
||||
if (ret == 0) {
|
||||
bool used;
|
||||
|
@ -577,7 +577,6 @@ void btrfs_merge_delalloc_extent(struct btrfs_inode *inode, struct extent_state
|
||||
struct extent_state *other);
|
||||
void btrfs_split_delalloc_extent(struct btrfs_inode *inode,
|
||||
struct extent_state *orig, u64 split);
|
||||
void btrfs_set_range_writeback(struct btrfs_inode *inode, u64 start, u64 end);
|
||||
void btrfs_evict_inode(struct inode *inode);
|
||||
struct inode *btrfs_alloc_inode(struct super_block *sb);
|
||||
void btrfs_destroy_inode(struct inode *inode);
|
||||
@ -613,11 +612,17 @@ int btrfs_writepage_cow_fixup(struct folio *folio);
|
||||
int btrfs_encoded_io_compression_from_extent(struct btrfs_fs_info *fs_info,
|
||||
int compress_type);
|
||||
int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
|
||||
u64 file_offset, u64 disk_bytenr,
|
||||
u64 disk_io_size,
|
||||
struct page **pages);
|
||||
u64 disk_bytenr, u64 disk_io_size,
|
||||
struct page **pages, void *uring_ctx);
|
||||
ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
|
||||
struct btrfs_ioctl_encoded_io_args *encoded);
|
||||
struct btrfs_ioctl_encoded_io_args *encoded,
|
||||
struct extent_state **cached_state,
|
||||
u64 *disk_bytenr, u64 *disk_io_size);
|
||||
ssize_t btrfs_encoded_read_regular(struct kiocb *iocb, struct iov_iter *iter,
|
||||
u64 start, u64 lockend,
|
||||
struct extent_state **cached_state,
|
||||
u64 disk_bytenr, u64 disk_io_size,
|
||||
size_t count, bool compressed, bool *unlocked);
|
||||
ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
|
||||
const struct btrfs_ioctl_encoded_io_args *encoded);
|
||||
|
||||
|
@ -453,7 +453,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
|
||||
if (pg_index > end_index)
|
||||
break;
|
||||
|
||||
folio = __filemap_get_folio(mapping, pg_index, 0, 0);
|
||||
folio = filemap_get_folio(mapping, pg_index);
|
||||
if (!IS_ERR(folio)) {
|
||||
u64 folio_sz = folio_size(folio);
|
||||
u64 offset = offset_in_folio(folio, cur);
|
||||
@ -545,8 +545,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
|
||||
* subpage::readers and to unlock the page.
|
||||
*/
|
||||
if (fs_info->sectorsize < PAGE_SIZE)
|
||||
btrfs_subpage_start_reader(fs_info, folio, cur,
|
||||
add_size);
|
||||
btrfs_folio_set_lock(fs_info, folio, cur, add_size);
|
||||
folio_put(folio);
|
||||
cur += add_size;
|
||||
}
|
||||
@ -702,7 +701,7 @@ static void free_heuristic_ws(struct list_head *ws)
|
||||
kfree(workspace);
|
||||
}
|
||||
|
||||
static struct list_head *alloc_heuristic_ws(unsigned int level)
|
||||
static struct list_head *alloc_heuristic_ws(void)
|
||||
{
|
||||
struct heuristic_ws *ws;
|
||||
|
||||
@ -744,9 +743,9 @@ static const struct btrfs_compress_op * const btrfs_compress_op[] = {
|
||||
static struct list_head *alloc_workspace(int type, unsigned int level)
|
||||
{
|
||||
switch (type) {
|
||||
case BTRFS_COMPRESS_NONE: return alloc_heuristic_ws(level);
|
||||
case BTRFS_COMPRESS_NONE: return alloc_heuristic_ws();
|
||||
case BTRFS_COMPRESS_ZLIB: return zlib_alloc_workspace(level);
|
||||
case BTRFS_COMPRESS_LZO: return lzo_alloc_workspace(level);
|
||||
case BTRFS_COMPRESS_LZO: return lzo_alloc_workspace();
|
||||
case BTRFS_COMPRESS_ZSTD: return zstd_alloc_workspace(level);
|
||||
default:
|
||||
/*
|
||||
@ -1030,6 +1029,7 @@ int btrfs_compress_folios(unsigned int type_level, struct address_space *mapping
|
||||
{
|
||||
int type = btrfs_compress_type(type_level);
|
||||
int level = btrfs_compress_level(type_level);
|
||||
const unsigned long orig_len = *total_out;
|
||||
struct list_head *workspace;
|
||||
int ret;
|
||||
|
||||
@ -1037,6 +1037,8 @@ int btrfs_compress_folios(unsigned int type_level, struct address_space *mapping
|
||||
workspace = get_workspace(type, level);
|
||||
ret = compression_compress_pages(type, workspace, mapping, start, folios,
|
||||
out_folios, total_in, total_out);
|
||||
/* The total read-in bytes should be no larger than the input. */
|
||||
ASSERT(*total_in <= orig_len);
|
||||
put_workspace(type, workspace);
|
||||
return ret;
|
||||
}
|
||||
|
@ -175,7 +175,7 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb);
|
||||
int lzo_decompress(struct list_head *ws, const u8 *data_in,
|
||||
struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen,
|
||||
size_t destlen);
|
||||
struct list_head *lzo_alloc_workspace(unsigned int level);
|
||||
struct list_head *lzo_alloc_workspace(void);
|
||||
void lzo_free_workspace(struct list_head *ws);
|
||||
|
||||
int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
|
||||
|
148
fs/btrfs/ctree.c
148
fs/btrfs/ctree.c
@ -1508,26 +1508,26 @@ static noinline void unlock_up(struct btrfs_path *path, int level,
|
||||
*/
|
||||
static int
|
||||
read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
|
||||
struct extent_buffer **eb_ret, int level, int slot,
|
||||
struct extent_buffer **eb_ret, int slot,
|
||||
const struct btrfs_key *key)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_tree_parent_check check = { 0 };
|
||||
u64 blocknr;
|
||||
u64 gen;
|
||||
struct extent_buffer *tmp;
|
||||
int ret;
|
||||
struct extent_buffer *tmp = NULL;
|
||||
int ret = 0;
|
||||
int parent_level;
|
||||
bool unlock_up;
|
||||
int err;
|
||||
bool read_tmp = false;
|
||||
bool tmp_locked = false;
|
||||
bool path_released = false;
|
||||
|
||||
unlock_up = ((level + 1 < BTRFS_MAX_LEVEL) && p->locks[level + 1]);
|
||||
blocknr = btrfs_node_blockptr(*eb_ret, slot);
|
||||
gen = btrfs_node_ptr_generation(*eb_ret, slot);
|
||||
parent_level = btrfs_header_level(*eb_ret);
|
||||
btrfs_node_key_to_cpu(*eb_ret, &check.first_key, slot);
|
||||
check.has_first_key = true;
|
||||
check.level = parent_level - 1;
|
||||
check.transid = gen;
|
||||
check.transid = btrfs_node_ptr_generation(*eb_ret, slot);
|
||||
check.owner_root = btrfs_root_id(root);
|
||||
|
||||
/*
|
||||
@ -1540,80 +1540,116 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
|
||||
tmp = find_extent_buffer(fs_info, blocknr);
|
||||
if (tmp) {
|
||||
if (p->reada == READA_FORWARD_ALWAYS)
|
||||
reada_for_search(fs_info, p, level, slot, key->objectid);
|
||||
reada_for_search(fs_info, p, parent_level, slot, key->objectid);
|
||||
|
||||
/* first we do an atomic uptodate check */
|
||||
if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
|
||||
if (btrfs_buffer_uptodate(tmp, check.transid, 1) > 0) {
|
||||
/*
|
||||
* Do extra check for first_key, eb can be stale due to
|
||||
* being cached, read from scrub, or have multiple
|
||||
* parents (shared tree blocks).
|
||||
*/
|
||||
if (btrfs_verify_level_key(tmp,
|
||||
parent_level - 1, &check.first_key, gen)) {
|
||||
free_extent_buffer(tmp);
|
||||
return -EUCLEAN;
|
||||
if (btrfs_verify_level_key(tmp, &check)) {
|
||||
ret = -EUCLEAN;
|
||||
goto out;
|
||||
}
|
||||
*eb_ret = tmp;
|
||||
return 0;
|
||||
tmp = NULL;
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (p->nowait) {
|
||||
free_extent_buffer(tmp);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
if (unlock_up)
|
||||
btrfs_unlock_up_safe(p, level + 1);
|
||||
|
||||
/* now we're allowed to do a blocking uptodate check */
|
||||
ret = btrfs_read_extent_buffer(tmp, &check);
|
||||
if (ret) {
|
||||
free_extent_buffer(tmp);
|
||||
btrfs_release_path(p);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (unlock_up)
|
||||
ret = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!p->skip_locking) {
|
||||
btrfs_unlock_up_safe(p, parent_level + 1);
|
||||
tmp_locked = true;
|
||||
btrfs_tree_read_lock(tmp);
|
||||
btrfs_release_path(p);
|
||||
ret = -EAGAIN;
|
||||
path_released = true;
|
||||
}
|
||||
|
||||
/* Now we're allowed to do a blocking uptodate check. */
|
||||
err = btrfs_read_extent_buffer(tmp, &check);
|
||||
if (err) {
|
||||
ret = err;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (ret == 0) {
|
||||
ASSERT(!tmp_locked);
|
||||
*eb_ret = tmp;
|
||||
tmp = NULL;
|
||||
}
|
||||
goto out;
|
||||
} else if (p->nowait) {
|
||||
return -EAGAIN;
|
||||
ret = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (unlock_up) {
|
||||
btrfs_unlock_up_safe(p, level + 1);
|
||||
if (!p->skip_locking) {
|
||||
btrfs_unlock_up_safe(p, parent_level + 1);
|
||||
ret = -EAGAIN;
|
||||
} else {
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
if (p->reada != READA_NONE)
|
||||
reada_for_search(fs_info, p, level, slot, key->objectid);
|
||||
reada_for_search(fs_info, p, parent_level, slot, key->objectid);
|
||||
|
||||
tmp = read_tree_block(fs_info, blocknr, &check);
|
||||
tmp = btrfs_find_create_tree_block(fs_info, blocknr, check.owner_root, check.level);
|
||||
if (IS_ERR(tmp)) {
|
||||
btrfs_release_path(p);
|
||||
return PTR_ERR(tmp);
|
||||
ret = PTR_ERR(tmp);
|
||||
tmp = NULL;
|
||||
goto out;
|
||||
}
|
||||
read_tmp = true;
|
||||
|
||||
if (!p->skip_locking) {
|
||||
ASSERT(ret == -EAGAIN);
|
||||
tmp_locked = true;
|
||||
btrfs_tree_read_lock(tmp);
|
||||
btrfs_release_path(p);
|
||||
path_released = true;
|
||||
}
|
||||
|
||||
/* Now we're allowed to do a blocking uptodate check. */
|
||||
err = btrfs_read_extent_buffer(tmp, &check);
|
||||
if (err) {
|
||||
ret = err;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the read above didn't mark this buffer up to date,
|
||||
* it will never end up being up to date. Set ret to EIO now
|
||||
* and give up so that our caller doesn't loop forever
|
||||
* on our EAGAINs.
|
||||
*/
|
||||
if (!extent_buffer_uptodate(tmp))
|
||||
if (!extent_buffer_uptodate(tmp)) {
|
||||
ret = -EIO;
|
||||
|
||||
out:
|
||||
if (ret == 0) {
|
||||
*eb_ret = tmp;
|
||||
} else {
|
||||
free_extent_buffer(tmp);
|
||||
btrfs_release_path(p);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (ret == 0) {
|
||||
ASSERT(!tmp_locked);
|
||||
*eb_ret = tmp;
|
||||
tmp = NULL;
|
||||
}
|
||||
out:
|
||||
if (tmp) {
|
||||
if (tmp_locked)
|
||||
btrfs_tree_read_unlock(tmp);
|
||||
if (read_tmp && ret && ret != -EAGAIN)
|
||||
free_extent_buffer_stale(tmp);
|
||||
else
|
||||
free_extent_buffer(tmp);
|
||||
}
|
||||
if (ret && !path_released)
|
||||
btrfs_release_path(p);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -2197,8 +2233,8 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
|
||||
goto done;
|
||||
}
|
||||
|
||||
err = read_block_for_search(root, p, &b, level, slot, key);
|
||||
if (err == -EAGAIN)
|
||||
err = read_block_for_search(root, p, &b, slot, key);
|
||||
if (err == -EAGAIN && !p->nowait)
|
||||
goto again;
|
||||
if (err) {
|
||||
ret = err;
|
||||
@ -2324,8 +2360,8 @@ int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
|
||||
goto done;
|
||||
}
|
||||
|
||||
err = read_block_for_search(root, p, &b, level, slot, key);
|
||||
if (err == -EAGAIN)
|
||||
err = read_block_for_search(root, p, &b, slot, key);
|
||||
if (err == -EAGAIN && !p->nowait)
|
||||
goto again;
|
||||
if (err) {
|
||||
ret = err;
|
||||
@ -2334,7 +2370,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
|
||||
|
||||
level = btrfs_header_level(b);
|
||||
btrfs_tree_read_lock(b);
|
||||
b = btrfs_tree_mod_log_rewind(fs_info, p, b, time_seq);
|
||||
b = btrfs_tree_mod_log_rewind(fs_info, b, time_seq);
|
||||
if (!b) {
|
||||
ret = -ENOMEM;
|
||||
goto done;
|
||||
@ -4930,8 +4966,7 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
|
||||
}
|
||||
|
||||
next = c;
|
||||
ret = read_block_for_search(root, path, &next, level,
|
||||
slot, &key);
|
||||
ret = read_block_for_search(root, path, &next, slot, &key);
|
||||
if (ret == -EAGAIN && !path->nowait)
|
||||
goto again;
|
||||
|
||||
@ -4974,8 +5009,7 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
|
||||
if (!level)
|
||||
break;
|
||||
|
||||
ret = read_block_for_search(root, path, &next, level,
|
||||
0, &key);
|
||||
ret = read_block_for_search(root, path, &next, 0, &key);
|
||||
if (ret == -EAGAIN && !path->nowait)
|
||||
goto again;
|
||||
|
||||
|
@ -64,9 +64,9 @@ struct btrfs_delayed_node {
|
||||
struct mutex mutex;
|
||||
struct btrfs_inode_item inode_item;
|
||||
refcount_t refs;
|
||||
int count;
|
||||
u64 index_cnt;
|
||||
unsigned long flags;
|
||||
int count;
|
||||
/*
|
||||
* The size of the next batch of dir index items to insert (if this
|
||||
* node is from a directory inode). Protected by @mutex.
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include "messages.h"
|
||||
#include "ctree.h"
|
||||
#include "delayed-ref.h"
|
||||
#include "extent-tree.h"
|
||||
#include "transaction.h"
|
||||
#include "qgroup.h"
|
||||
#include "space-info.h"
|
||||
@ -313,39 +314,6 @@ static int comp_refs(struct btrfs_delayed_ref_node *ref1,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* insert a new ref to head ref rbtree */
|
||||
static struct btrfs_delayed_ref_head *htree_insert(struct rb_root_cached *root,
|
||||
struct rb_node *node)
|
||||
{
|
||||
struct rb_node **p = &root->rb_root.rb_node;
|
||||
struct rb_node *parent_node = NULL;
|
||||
struct btrfs_delayed_ref_head *entry;
|
||||
struct btrfs_delayed_ref_head *ins;
|
||||
u64 bytenr;
|
||||
bool leftmost = true;
|
||||
|
||||
ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node);
|
||||
bytenr = ins->bytenr;
|
||||
while (*p) {
|
||||
parent_node = *p;
|
||||
entry = rb_entry(parent_node, struct btrfs_delayed_ref_head,
|
||||
href_node);
|
||||
|
||||
if (bytenr < entry->bytenr) {
|
||||
p = &(*p)->rb_left;
|
||||
} else if (bytenr > entry->bytenr) {
|
||||
p = &(*p)->rb_right;
|
||||
leftmost = false;
|
||||
} else {
|
||||
return entry;
|
||||
}
|
||||
}
|
||||
|
||||
rb_link_node(node, parent_node, p);
|
||||
rb_insert_color_cached(node, root, leftmost);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct btrfs_delayed_ref_node* tree_insert(struct rb_root_cached *root,
|
||||
struct btrfs_delayed_ref_node *ins)
|
||||
{
|
||||
@ -380,75 +348,32 @@ static struct btrfs_delayed_ref_node* tree_insert(struct rb_root_cached *root,
|
||||
static struct btrfs_delayed_ref_head *find_first_ref_head(
|
||||
struct btrfs_delayed_ref_root *dr)
|
||||
{
|
||||
struct rb_node *n;
|
||||
struct btrfs_delayed_ref_head *entry;
|
||||
unsigned long from = 0;
|
||||
|
||||
n = rb_first_cached(&dr->href_root);
|
||||
if (!n)
|
||||
return NULL;
|
||||
lockdep_assert_held(&dr->lock);
|
||||
|
||||
entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
|
||||
|
||||
return entry;
|
||||
return xa_find(&dr->head_refs, &from, ULONG_MAX, XA_PRESENT);
|
||||
}
|
||||
|
||||
/*
|
||||
* Find a head entry based on bytenr. This returns the delayed ref head if it
|
||||
* was able to find one, or NULL if nothing was in that spot. If return_bigger
|
||||
* is given, the next bigger entry is returned if no exact match is found.
|
||||
*/
|
||||
static struct btrfs_delayed_ref_head *find_ref_head(
|
||||
struct btrfs_delayed_ref_root *dr, u64 bytenr,
|
||||
bool return_bigger)
|
||||
{
|
||||
struct rb_root *root = &dr->href_root.rb_root;
|
||||
struct rb_node *n;
|
||||
struct btrfs_delayed_ref_head *entry;
|
||||
|
||||
n = root->rb_node;
|
||||
entry = NULL;
|
||||
while (n) {
|
||||
entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
|
||||
|
||||
if (bytenr < entry->bytenr)
|
||||
n = n->rb_left;
|
||||
else if (bytenr > entry->bytenr)
|
||||
n = n->rb_right;
|
||||
else
|
||||
return entry;
|
||||
}
|
||||
if (entry && return_bigger) {
|
||||
if (bytenr > entry->bytenr) {
|
||||
n = rb_next(&entry->href_node);
|
||||
if (!n)
|
||||
return NULL;
|
||||
entry = rb_entry(n, struct btrfs_delayed_ref_head,
|
||||
href_node);
|
||||
}
|
||||
return entry;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int btrfs_delayed_ref_lock(struct btrfs_delayed_ref_root *delayed_refs,
|
||||
struct btrfs_delayed_ref_head *head)
|
||||
static bool btrfs_delayed_ref_lock(struct btrfs_delayed_ref_root *delayed_refs,
|
||||
struct btrfs_delayed_ref_head *head)
|
||||
{
|
||||
lockdep_assert_held(&delayed_refs->lock);
|
||||
if (mutex_trylock(&head->mutex))
|
||||
return 0;
|
||||
return true;
|
||||
|
||||
refcount_inc(&head->refs);
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
|
||||
mutex_lock(&head->mutex);
|
||||
spin_lock(&delayed_refs->lock);
|
||||
if (RB_EMPTY_NODE(&head->href_node)) {
|
||||
if (!head->tracked) {
|
||||
mutex_unlock(&head->mutex);
|
||||
btrfs_put_delayed_ref_head(head);
|
||||
return -EAGAIN;
|
||||
return false;
|
||||
}
|
||||
btrfs_put_delayed_ref_head(head);
|
||||
return 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void drop_delayed_ref(struct btrfs_fs_info *fs_info,
|
||||
@ -462,7 +387,6 @@ static inline void drop_delayed_ref(struct btrfs_fs_info *fs_info,
|
||||
if (!list_empty(&ref->add_list))
|
||||
list_del(&ref->add_list);
|
||||
btrfs_put_delayed_ref(ref);
|
||||
atomic_dec(&delayed_refs->num_entries);
|
||||
btrfs_delayed_refs_rsv_release(fs_info, 1, 0);
|
||||
}
|
||||
|
||||
@ -558,33 +482,31 @@ int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq)
|
||||
}
|
||||
|
||||
struct btrfs_delayed_ref_head *btrfs_select_ref_head(
|
||||
const struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_delayed_ref_root *delayed_refs)
|
||||
{
|
||||
struct btrfs_delayed_ref_head *head;
|
||||
unsigned long start_index;
|
||||
unsigned long found_index;
|
||||
bool found_head = false;
|
||||
bool locked;
|
||||
|
||||
lockdep_assert_held(&delayed_refs->lock);
|
||||
spin_lock(&delayed_refs->lock);
|
||||
again:
|
||||
head = find_ref_head(delayed_refs, delayed_refs->run_delayed_start,
|
||||
true);
|
||||
if (!head && delayed_refs->run_delayed_start != 0) {
|
||||
delayed_refs->run_delayed_start = 0;
|
||||
head = find_first_ref_head(delayed_refs);
|
||||
}
|
||||
if (!head)
|
||||
return NULL;
|
||||
|
||||
while (head->processing) {
|
||||
struct rb_node *node;
|
||||
|
||||
node = rb_next(&head->href_node);
|
||||
if (!node) {
|
||||
if (delayed_refs->run_delayed_start == 0)
|
||||
return NULL;
|
||||
delayed_refs->run_delayed_start = 0;
|
||||
goto again;
|
||||
start_index = (delayed_refs->run_delayed_start >> fs_info->sectorsize_bits);
|
||||
xa_for_each_start(&delayed_refs->head_refs, found_index, head, start_index) {
|
||||
if (!head->processing) {
|
||||
found_head = true;
|
||||
break;
|
||||
}
|
||||
head = rb_entry(node, struct btrfs_delayed_ref_head,
|
||||
href_node);
|
||||
}
|
||||
if (!found_head) {
|
||||
if (delayed_refs->run_delayed_start == 0) {
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
return NULL;
|
||||
}
|
||||
delayed_refs->run_delayed_start = 0;
|
||||
goto again;
|
||||
}
|
||||
|
||||
head->processing = true;
|
||||
@ -592,18 +514,42 @@ struct btrfs_delayed_ref_head *btrfs_select_ref_head(
|
||||
delayed_refs->num_heads_ready--;
|
||||
delayed_refs->run_delayed_start = head->bytenr +
|
||||
head->num_bytes;
|
||||
|
||||
locked = btrfs_delayed_ref_lock(delayed_refs, head);
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
|
||||
/*
|
||||
* We may have dropped the spin lock to get the head mutex lock, and
|
||||
* that might have given someone else time to free the head. If that's
|
||||
* true, it has been removed from our list and we can move on.
|
||||
*/
|
||||
if (!locked)
|
||||
return ERR_PTR(-EAGAIN);
|
||||
|
||||
return head;
|
||||
}
|
||||
|
||||
void btrfs_delete_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
|
||||
void btrfs_unselect_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
|
||||
struct btrfs_delayed_ref_head *head)
|
||||
{
|
||||
spin_lock(&delayed_refs->lock);
|
||||
head->processing = false;
|
||||
delayed_refs->num_heads_ready++;
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
btrfs_delayed_ref_unlock(head);
|
||||
}
|
||||
|
||||
void btrfs_delete_ref_head(const struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_delayed_ref_root *delayed_refs,
|
||||
struct btrfs_delayed_ref_head *head)
|
||||
{
|
||||
const unsigned long index = (head->bytenr >> fs_info->sectorsize_bits);
|
||||
|
||||
lockdep_assert_held(&delayed_refs->lock);
|
||||
lockdep_assert_held(&head->lock);
|
||||
|
||||
rb_erase_cached(&head->href_node, &delayed_refs->href_root);
|
||||
RB_CLEAR_NODE(&head->href_node);
|
||||
atomic_dec(&delayed_refs->num_entries);
|
||||
xa_erase(&delayed_refs->head_refs, index);
|
||||
head->tracked = false;
|
||||
delayed_refs->num_heads--;
|
||||
if (!head->processing)
|
||||
delayed_refs->num_heads_ready--;
|
||||
@ -629,7 +575,6 @@ static bool insert_delayed_ref(struct btrfs_trans_handle *trans,
|
||||
if (!exist) {
|
||||
if (ref->action == BTRFS_ADD_DELAYED_REF)
|
||||
list_add_tail(&ref->add_list, &href->ref_add_list);
|
||||
atomic_inc(&root->num_entries);
|
||||
spin_unlock(&href->lock);
|
||||
trans->delayed_ref_updates++;
|
||||
return false;
|
||||
@ -813,7 +758,7 @@ static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref,
|
||||
head_ref->is_system = (generic_ref->ref_root == BTRFS_CHUNK_TREE_OBJECTID);
|
||||
head_ref->ref_tree = RB_ROOT_CACHED;
|
||||
INIT_LIST_HEAD(&head_ref->ref_add_list);
|
||||
RB_CLEAR_NODE(&head_ref->href_node);
|
||||
head_ref->tracked = false;
|
||||
head_ref->processing = false;
|
||||
head_ref->total_ref_mod = count_mod;
|
||||
spin_lock_init(&head_ref->lock);
|
||||
@ -830,7 +775,6 @@ static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref,
|
||||
qrecord->data_rsv = reserved;
|
||||
qrecord->data_rsv_refroot = generic_ref->ref_root;
|
||||
}
|
||||
qrecord->bytenr = generic_ref->bytenr;
|
||||
qrecord->num_bytes = generic_ref->num_bytes;
|
||||
qrecord->old_roots = NULL;
|
||||
}
|
||||
@ -852,19 +796,33 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
struct btrfs_delayed_ref_head *existing;
|
||||
struct btrfs_delayed_ref_root *delayed_refs;
|
||||
const unsigned long index = (head_ref->bytenr >> fs_info->sectorsize_bits);
|
||||
bool qrecord_inserted = false;
|
||||
|
||||
delayed_refs = &trans->transaction->delayed_refs;
|
||||
lockdep_assert_held(&delayed_refs->lock);
|
||||
|
||||
#if BITS_PER_LONG == 32
|
||||
if (head_ref->bytenr >= MAX_LFS_FILESIZE) {
|
||||
if (qrecord)
|
||||
xa_release(&delayed_refs->dirty_extents, index);
|
||||
btrfs_err_rl(fs_info,
|
||||
"delayed ref head %llu is beyond 32bit page cache and xarray index limit",
|
||||
head_ref->bytenr);
|
||||
btrfs_err_32bit_limit(fs_info);
|
||||
return ERR_PTR(-EOVERFLOW);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Record qgroup extent info if provided */
|
||||
if (qrecord) {
|
||||
int ret;
|
||||
|
||||
ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, qrecord);
|
||||
ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, qrecord,
|
||||
head_ref->bytenr);
|
||||
if (ret) {
|
||||
/* Clean up if insertion fails or item exists. */
|
||||
xa_release(&delayed_refs->dirty_extents,
|
||||
qrecord->bytenr >> fs_info->sectorsize_bits);
|
||||
xa_release(&delayed_refs->dirty_extents, index);
|
||||
/* Caller responsible for freeing qrecord on error. */
|
||||
if (ret < 0)
|
||||
return ERR_PTR(ret);
|
||||
@ -876,8 +834,7 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans,
|
||||
|
||||
trace_add_delayed_ref_head(fs_info, head_ref, action);
|
||||
|
||||
existing = htree_insert(&delayed_refs->href_root,
|
||||
&head_ref->href_node);
|
||||
existing = xa_load(&delayed_refs->head_refs, index);
|
||||
if (existing) {
|
||||
update_existing_head_ref(trans, existing, head_ref);
|
||||
/*
|
||||
@ -887,6 +844,19 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans,
|
||||
kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
|
||||
head_ref = existing;
|
||||
} else {
|
||||
existing = xa_store(&delayed_refs->head_refs, index, head_ref, GFP_ATOMIC);
|
||||
if (xa_is_err(existing)) {
|
||||
/* Memory was preallocated by the caller. */
|
||||
ASSERT(xa_err(existing) != -ENOMEM);
|
||||
return ERR_PTR(xa_err(existing));
|
||||
} else if (WARN_ON(existing)) {
|
||||
/*
|
||||
* Shouldn't happen we just did a lookup before under
|
||||
* delayed_refs->lock.
|
||||
*/
|
||||
return ERR_PTR(-EEXIST);
|
||||
}
|
||||
head_ref->tracked = true;
|
||||
/*
|
||||
* We reserve the amount of bytes needed to delete csums when
|
||||
* adding the ref head and not when adding individual drop refs
|
||||
@ -900,7 +870,6 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans,
|
||||
}
|
||||
delayed_refs->num_heads++;
|
||||
delayed_refs->num_heads_ready++;
|
||||
atomic_inc(&delayed_refs->num_entries);
|
||||
}
|
||||
if (qrecord_inserted_ret)
|
||||
*qrecord_inserted_ret = qrecord_inserted;
|
||||
@ -1008,6 +977,8 @@ static int add_delayed_ref(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_delayed_ref_head *new_head_ref;
|
||||
struct btrfs_delayed_ref_root *delayed_refs;
|
||||
struct btrfs_qgroup_extent_record *record = NULL;
|
||||
const unsigned long index = (generic_ref->bytenr >> fs_info->sectorsize_bits);
|
||||
bool qrecord_reserved = false;
|
||||
bool qrecord_inserted;
|
||||
int action = generic_ref->action;
|
||||
bool merged;
|
||||
@ -1023,25 +994,32 @@ static int add_delayed_ref(struct btrfs_trans_handle *trans,
|
||||
goto free_node;
|
||||
}
|
||||
|
||||
delayed_refs = &trans->transaction->delayed_refs;
|
||||
|
||||
if (btrfs_qgroup_full_accounting(fs_info) && !generic_ref->skip_qgroup) {
|
||||
record = kzalloc(sizeof(*record), GFP_NOFS);
|
||||
if (!record) {
|
||||
ret = -ENOMEM;
|
||||
goto free_head_ref;
|
||||
}
|
||||
if (xa_reserve(&trans->transaction->delayed_refs.dirty_extents,
|
||||
generic_ref->bytenr >> fs_info->sectorsize_bits,
|
||||
GFP_NOFS)) {
|
||||
if (xa_reserve(&delayed_refs->dirty_extents, index, GFP_NOFS)) {
|
||||
ret = -ENOMEM;
|
||||
goto free_record;
|
||||
}
|
||||
qrecord_reserved = true;
|
||||
}
|
||||
|
||||
ret = xa_reserve(&delayed_refs->head_refs, index, GFP_NOFS);
|
||||
if (ret) {
|
||||
if (qrecord_reserved)
|
||||
xa_release(&delayed_refs->dirty_extents, index);
|
||||
goto free_record;
|
||||
}
|
||||
|
||||
init_delayed_ref_common(fs_info, node, generic_ref);
|
||||
init_delayed_ref_head(head_ref, generic_ref, record, reserved);
|
||||
head_ref->extent_op = extent_op;
|
||||
|
||||
delayed_refs = &trans->transaction->delayed_refs;
|
||||
spin_lock(&delayed_refs->lock);
|
||||
|
||||
/*
|
||||
@ -1051,6 +1029,7 @@ static int add_delayed_ref(struct btrfs_trans_handle *trans,
|
||||
new_head_ref = add_delayed_ref_head(trans, head_ref, record,
|
||||
action, &qrecord_inserted);
|
||||
if (IS_ERR(new_head_ref)) {
|
||||
xa_release(&delayed_refs->head_refs, index);
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
ret = PTR_ERR(new_head_ref);
|
||||
goto free_record;
|
||||
@ -1074,7 +1053,7 @@ static int add_delayed_ref(struct btrfs_trans_handle *trans,
|
||||
kmem_cache_free(btrfs_delayed_ref_node_cachep, node);
|
||||
|
||||
if (qrecord_inserted)
|
||||
return btrfs_qgroup_trace_extent_post(trans, record);
|
||||
return btrfs_qgroup_trace_extent_post(trans, record, generic_ref->bytenr);
|
||||
return 0;
|
||||
|
||||
free_record:
|
||||
@ -1113,6 +1092,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
|
||||
u64 bytenr, u64 num_bytes, u8 level,
|
||||
struct btrfs_delayed_extent_op *extent_op)
|
||||
{
|
||||
const unsigned long index = (bytenr >> trans->fs_info->sectorsize_bits);
|
||||
struct btrfs_delayed_ref_head *head_ref;
|
||||
struct btrfs_delayed_ref_head *head_ref_ret;
|
||||
struct btrfs_delayed_ref_root *delayed_refs;
|
||||
@ -1123,6 +1103,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
|
||||
.num_bytes = num_bytes,
|
||||
.tree_ref.level = level,
|
||||
};
|
||||
int ret;
|
||||
|
||||
head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
|
||||
if (!head_ref)
|
||||
@ -1132,16 +1113,23 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
|
||||
head_ref->extent_op = extent_op;
|
||||
|
||||
delayed_refs = &trans->transaction->delayed_refs;
|
||||
spin_lock(&delayed_refs->lock);
|
||||
|
||||
ret = xa_reserve(&delayed_refs->head_refs, index, GFP_NOFS);
|
||||
if (ret) {
|
||||
kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
|
||||
return ret;
|
||||
}
|
||||
|
||||
spin_lock(&delayed_refs->lock);
|
||||
head_ref_ret = add_delayed_ref_head(trans, head_ref, NULL,
|
||||
BTRFS_UPDATE_DELAYED_HEAD, NULL);
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
|
||||
if (IS_ERR(head_ref_ret)) {
|
||||
xa_release(&delayed_refs->head_refs, index);
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
kmem_cache_free(btrfs_delayed_ref_head_cachep, head_ref);
|
||||
return PTR_ERR(head_ref_ret);
|
||||
}
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
|
||||
/*
|
||||
* Need to update the delayed_refs_rsv with any changes we may have
|
||||
@ -1164,11 +1152,15 @@ void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
|
||||
* head node if found, or NULL if not.
|
||||
*/
|
||||
struct btrfs_delayed_ref_head *
|
||||
btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 bytenr)
|
||||
btrfs_find_delayed_ref_head(const struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_delayed_ref_root *delayed_refs,
|
||||
u64 bytenr)
|
||||
{
|
||||
const unsigned long index = (bytenr >> fs_info->sectorsize_bits);
|
||||
|
||||
lockdep_assert_held(&delayed_refs->lock);
|
||||
|
||||
return find_ref_head(delayed_refs, bytenr, false);
|
||||
return xa_load(&delayed_refs->head_refs, index);
|
||||
}
|
||||
|
||||
static int find_comp(struct btrfs_delayed_ref_node *entry, u64 root, u64 parent)
|
||||
@ -1238,6 +1230,81 @@ bool btrfs_find_delayed_tree_ref(struct btrfs_delayed_ref_head *head,
|
||||
return found;
|
||||
}
|
||||
|
||||
void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans)
|
||||
{
|
||||
struct btrfs_delayed_ref_root *delayed_refs = &trans->delayed_refs;
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
|
||||
spin_lock(&delayed_refs->lock);
|
||||
while (true) {
|
||||
struct btrfs_delayed_ref_head *head;
|
||||
struct rb_node *n;
|
||||
bool pin_bytes = false;
|
||||
|
||||
head = find_first_ref_head(delayed_refs);
|
||||
if (!head)
|
||||
break;
|
||||
|
||||
if (!btrfs_delayed_ref_lock(delayed_refs, head))
|
||||
continue;
|
||||
|
||||
spin_lock(&head->lock);
|
||||
while ((n = rb_first_cached(&head->ref_tree)) != NULL) {
|
||||
struct btrfs_delayed_ref_node *ref;
|
||||
|
||||
ref = rb_entry(n, struct btrfs_delayed_ref_node, ref_node);
|
||||
drop_delayed_ref(fs_info, delayed_refs, head, ref);
|
||||
}
|
||||
if (head->must_insert_reserved)
|
||||
pin_bytes = true;
|
||||
btrfs_free_delayed_extent_op(head->extent_op);
|
||||
btrfs_delete_ref_head(fs_info, delayed_refs, head);
|
||||
spin_unlock(&head->lock);
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
mutex_unlock(&head->mutex);
|
||||
|
||||
if (pin_bytes) {
|
||||
struct btrfs_block_group *bg;
|
||||
|
||||
bg = btrfs_lookup_block_group(fs_info, head->bytenr);
|
||||
if (WARN_ON_ONCE(bg == NULL)) {
|
||||
/*
|
||||
* Unexpected and there's nothing we can do here
|
||||
* because we are in a transaction abort path,
|
||||
* so any errors can only be ignored or reported
|
||||
* while attempting to cleanup all resources.
|
||||
*/
|
||||
btrfs_err(fs_info,
|
||||
"block group for delayed ref at %llu was not found while destroying ref head",
|
||||
head->bytenr);
|
||||
} else {
|
||||
spin_lock(&bg->space_info->lock);
|
||||
spin_lock(&bg->lock);
|
||||
bg->pinned += head->num_bytes;
|
||||
btrfs_space_info_update_bytes_pinned(fs_info,
|
||||
bg->space_info,
|
||||
head->num_bytes);
|
||||
bg->reserved -= head->num_bytes;
|
||||
bg->space_info->bytes_reserved -= head->num_bytes;
|
||||
spin_unlock(&bg->lock);
|
||||
spin_unlock(&bg->space_info->lock);
|
||||
|
||||
btrfs_put_block_group(bg);
|
||||
}
|
||||
|
||||
btrfs_error_unpin_extent_range(fs_info, head->bytenr,
|
||||
head->bytenr + head->num_bytes - 1);
|
||||
}
|
||||
btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
|
||||
btrfs_put_delayed_ref_head(head);
|
||||
cond_resched();
|
||||
spin_lock(&delayed_refs->lock);
|
||||
}
|
||||
btrfs_qgroup_destroy_extent_records(trans);
|
||||
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
}
|
||||
|
||||
void __cold btrfs_delayed_ref_exit(void)
|
||||
{
|
||||
kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
|
||||
|
@ -61,7 +61,8 @@ struct btrfs_delayed_ref_node {
|
||||
/*
|
||||
* If action is BTRFS_ADD_DELAYED_REF, also link this node to
|
||||
* ref_head->ref_add_list, then we do not need to iterate the
|
||||
* whole ref_head->ref_list to find BTRFS_ADD_DELAYED_REF nodes.
|
||||
* refs rbtree in the corresponding delayed ref head
|
||||
* (struct btrfs_delayed_ref_head::ref_tree).
|
||||
*/
|
||||
struct list_head add_list;
|
||||
|
||||
@ -122,12 +123,6 @@ struct btrfs_delayed_extent_op {
|
||||
struct btrfs_delayed_ref_head {
|
||||
u64 bytenr;
|
||||
u64 num_bytes;
|
||||
/*
|
||||
* For insertion into struct btrfs_delayed_ref_root::href_root.
|
||||
* Keep it in the same cache line as 'bytenr' for more efficient
|
||||
* searches in the rbtree.
|
||||
*/
|
||||
struct rb_node href_node;
|
||||
/*
|
||||
* the mutex is held while running the refs, and it is also
|
||||
* held when checking the sum of reference modifications.
|
||||
@ -191,6 +186,11 @@ struct btrfs_delayed_ref_head {
|
||||
bool is_data;
|
||||
bool is_system;
|
||||
bool processing;
|
||||
/*
|
||||
* Indicate if it's currently in the data structure that tracks head
|
||||
* refs (struct btrfs_delayed_ref_root::head_refs).
|
||||
*/
|
||||
bool tracked;
|
||||
};
|
||||
|
||||
enum btrfs_delayed_ref_flags {
|
||||
@ -199,38 +199,52 @@ enum btrfs_delayed_ref_flags {
|
||||
};
|
||||
|
||||
struct btrfs_delayed_ref_root {
|
||||
/* head ref rbtree */
|
||||
struct rb_root_cached href_root;
|
||||
|
||||
/*
|
||||
* Track dirty extent records.
|
||||
* Track head references.
|
||||
* The keys correspond to the logical address of the extent ("bytenr")
|
||||
* right shifted by fs_info->sectorsize_bits. This is both to get a more
|
||||
* dense index space (optimizes xarray structure) and because indexes in
|
||||
* xarrays are of "unsigned long" type, meaning they are 32 bits wide on
|
||||
* 32 bits platforms, limiting the extent range to 4G which is too low
|
||||
* and makes it unusable (truncated index values) on 32 bits platforms.
|
||||
* Protected by the spinlock 'lock' defined below.
|
||||
*/
|
||||
struct xarray head_refs;
|
||||
|
||||
/*
|
||||
* Track dirty extent records.
|
||||
* The keys correspond to the logical address of the extent ("bytenr")
|
||||
* right shifted by fs_info->sectorsize_bits, for same reasons as above.
|
||||
*/
|
||||
struct xarray dirty_extents;
|
||||
|
||||
/* this spin lock protects the rbtree and the entries inside */
|
||||
/*
|
||||
* Protects the xarray head_refs, its entries and the following fields:
|
||||
* num_heads, num_heads_ready, pending_csums and run_delayed_start.
|
||||
*/
|
||||
spinlock_t lock;
|
||||
|
||||
/* how many delayed ref updates we've queued, used by the
|
||||
* throttling code
|
||||
*/
|
||||
atomic_t num_entries;
|
||||
|
||||
/* total number of head nodes in tree */
|
||||
/* Total number of head refs, protected by the spinlock 'lock'. */
|
||||
unsigned long num_heads;
|
||||
|
||||
/* total number of head nodes ready for processing */
|
||||
/*
|
||||
* Total number of head refs ready for processing, protected by the
|
||||
* spinlock 'lock'.
|
||||
*/
|
||||
unsigned long num_heads_ready;
|
||||
|
||||
/*
|
||||
* Track space reserved for deleting csums of data extents.
|
||||
* Protected by the spinlock 'lock'.
|
||||
*/
|
||||
u64 pending_csums;
|
||||
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* Track from which bytenr to start searching ref heads.
|
||||
* Protected by the spinlock 'lock'.
|
||||
*/
|
||||
u64 run_delayed_start;
|
||||
|
||||
/*
|
||||
@ -372,19 +386,22 @@ void btrfs_merge_delayed_refs(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_delayed_ref_head *head);
|
||||
|
||||
struct btrfs_delayed_ref_head *
|
||||
btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
|
||||
btrfs_find_delayed_ref_head(const struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_delayed_ref_root *delayed_refs,
|
||||
u64 bytenr);
|
||||
int btrfs_delayed_ref_lock(struct btrfs_delayed_ref_root *delayed_refs,
|
||||
struct btrfs_delayed_ref_head *head);
|
||||
static inline void btrfs_delayed_ref_unlock(struct btrfs_delayed_ref_head *head)
|
||||
{
|
||||
mutex_unlock(&head->mutex);
|
||||
}
|
||||
void btrfs_delete_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
|
||||
void btrfs_delete_ref_head(const struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_delayed_ref_root *delayed_refs,
|
||||
struct btrfs_delayed_ref_head *head);
|
||||
|
||||
struct btrfs_delayed_ref_head *btrfs_select_ref_head(
|
||||
const struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_delayed_ref_root *delayed_refs);
|
||||
void btrfs_unselect_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
|
||||
struct btrfs_delayed_ref_head *head);
|
||||
|
||||
int btrfs_check_delayed_seq(struct btrfs_fs_info *fs_info, u64 seq);
|
||||
|
||||
@ -399,6 +416,7 @@ int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
|
||||
bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info);
|
||||
bool btrfs_find_delayed_tree_ref(struct btrfs_delayed_ref_head *head,
|
||||
u64 root, u64 parent);
|
||||
void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans);
|
||||
|
||||
static inline u64 btrfs_delayed_ref_owner(struct btrfs_delayed_ref_node *node)
|
||||
{
|
||||
|
@ -45,7 +45,7 @@
|
||||
*
|
||||
* - Copy existing extents
|
||||
*
|
||||
* This happens by re-using scrub facility, as scrub also iterates through
|
||||
* This happens by reusing scrub facility, as scrub also iterates through
|
||||
* existing extents from commit root.
|
||||
*
|
||||
* Location: scrub_write_block_to_dev_replace() from
|
||||
@ -641,6 +641,7 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
|
||||
return ret;
|
||||
|
||||
down_write(&dev_replace->rwsem);
|
||||
dev_replace->replace_task = current;
|
||||
switch (dev_replace->replace_state) {
|
||||
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
|
||||
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
|
||||
@ -994,6 +995,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
|
||||
list_add(&tgt_device->dev_alloc_list, &fs_devices->alloc_list);
|
||||
fs_devices->rw_devices++;
|
||||
|
||||
dev_replace->replace_task = NULL;
|
||||
up_write(&dev_replace->rwsem);
|
||||
btrfs_rm_dev_replace_blocked(fs_info);
|
||||
|
||||
|
@ -27,7 +27,6 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
|
||||
const char *name,
|
||||
int name_len)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
int ret;
|
||||
char *ptr;
|
||||
struct extent_buffer *leaf;
|
||||
@ -35,7 +34,7 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
|
||||
ret = btrfs_insert_empty_item(trans, root, path, cpu_key, data_size);
|
||||
if (ret == -EEXIST) {
|
||||
struct btrfs_dir_item *di;
|
||||
di = btrfs_match_dir_item_name(fs_info, path, name, name_len);
|
||||
di = btrfs_match_dir_item_name(path, name, name_len);
|
||||
if (di)
|
||||
return ERR_PTR(-EEXIST);
|
||||
btrfs_extend_item(trans, path, data_size);
|
||||
@ -190,7 +189,7 @@ static struct btrfs_dir_item *btrfs_lookup_match_dir(
|
||||
if (ret > 0)
|
||||
return ERR_PTR(-ENOENT);
|
||||
|
||||
return btrfs_match_dir_item_name(root->fs_info, path, name, name_len);
|
||||
return btrfs_match_dir_item_name(path, name, name_len);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -341,8 +340,7 @@ btrfs_search_dir_index_item(struct btrfs_root *root, struct btrfs_path *path,
|
||||
if (key.objectid != dirid || key.type != BTRFS_DIR_INDEX_KEY)
|
||||
break;
|
||||
|
||||
di = btrfs_match_dir_item_name(root->fs_info, path,
|
||||
name->name, name->len);
|
||||
di = btrfs_match_dir_item_name(path, name->name, name->len);
|
||||
if (di)
|
||||
return di;
|
||||
}
|
||||
@ -378,8 +376,7 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
|
||||
* this walks through all the entries in a dir item and finds one
|
||||
* for a specific name.
|
||||
*/
|
||||
struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_fs_info *fs_info,
|
||||
const struct btrfs_path *path,
|
||||
struct btrfs_dir_item *btrfs_match_dir_item_name(const struct btrfs_path *path,
|
||||
const char *name, int name_len)
|
||||
{
|
||||
struct btrfs_dir_item *dir_item;
|
||||
|
@ -44,8 +44,7 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_path *path, u64 dir,
|
||||
const char *name, u16 name_len,
|
||||
int mod);
|
||||
struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_fs_info *fs_info,
|
||||
const struct btrfs_path *path,
|
||||
struct btrfs_dir_item *btrfs_match_dir_item_name(const struct btrfs_path *path,
|
||||
const char *name,
|
||||
int name_len);
|
||||
|
||||
|
@ -834,7 +834,7 @@ ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = btrfs_write_check(iocb, from, ret);
|
||||
ret = btrfs_write_check(iocb, ret);
|
||||
if (ret < 0) {
|
||||
btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
|
||||
goto out;
|
||||
|
@ -917,8 +917,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_fs_info *fs_info)
|
||||
static struct btrfs_root *alloc_log_tree(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct btrfs_root *root;
|
||||
|
||||
@ -966,7 +965,7 @@ int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans,
|
||||
{
|
||||
struct btrfs_root *log_root;
|
||||
|
||||
log_root = alloc_log_tree(trans, fs_info);
|
||||
log_root = alloc_log_tree(fs_info);
|
||||
if (IS_ERR(log_root))
|
||||
return PTR_ERR(log_root);
|
||||
|
||||
@ -992,7 +991,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_inode_item *inode_item;
|
||||
int ret;
|
||||
|
||||
log_root = alloc_log_tree(trans, fs_info);
|
||||
log_root = alloc_log_tree(fs_info);
|
||||
if (IS_ERR(log_root))
|
||||
return PTR_ERR(log_root);
|
||||
|
||||
@ -2786,6 +2785,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
|
||||
btrfs_init_scrub(fs_info);
|
||||
btrfs_init_balance(fs_info);
|
||||
btrfs_init_async_reclaim_work(fs_info);
|
||||
btrfs_init_extent_map_shrinker_work(fs_info);
|
||||
|
||||
rwlock_init(&fs_info->block_group_cache_lock);
|
||||
fs_info->block_group_cache_tree = RB_ROOT_CACHED;
|
||||
@ -2852,8 +2852,6 @@ static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
spin_lock_init(&fs_info->extent_map_shrinker_lock);
|
||||
|
||||
ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -3202,8 +3200,7 @@ int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices,
|
||||
const char *options)
|
||||
int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices)
|
||||
{
|
||||
u32 sectorsize;
|
||||
u32 nodesize;
|
||||
@ -4186,7 +4183,7 @@ static void warn_about_uncommitted_trans(struct btrfs_fs_info *fs_info)
|
||||
btrfs_warn(fs_info,
|
||||
"transaction %llu (with %llu dirty metadata bytes) is not committed",
|
||||
trans->transid, dirty_bytes);
|
||||
btrfs_cleanup_one_transaction(trans, fs_info);
|
||||
btrfs_cleanup_one_transaction(trans);
|
||||
|
||||
if (trans == fs_info->running_transaction)
|
||||
fs_info->running_transaction = NULL;
|
||||
@ -4294,6 +4291,7 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
|
||||
cancel_work_sync(&fs_info->async_reclaim_work);
|
||||
cancel_work_sync(&fs_info->async_data_reclaim_work);
|
||||
cancel_work_sync(&fs_info->preempt_reclaim_work);
|
||||
cancel_work_sync(&fs_info->em_shrinker_work);
|
||||
|
||||
/* Cancel or finish ongoing discard work */
|
||||
btrfs_discard_cleanup(fs_info);
|
||||
@ -4531,75 +4529,6 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
|
||||
btrfs_wait_ordered_roots(fs_info, U64_MAX, NULL);
|
||||
}
|
||||
|
||||
static void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
|
||||
struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct rb_node *node;
|
||||
struct btrfs_delayed_ref_root *delayed_refs = &trans->delayed_refs;
|
||||
struct btrfs_delayed_ref_node *ref;
|
||||
|
||||
spin_lock(&delayed_refs->lock);
|
||||
while ((node = rb_first_cached(&delayed_refs->href_root)) != NULL) {
|
||||
struct btrfs_delayed_ref_head *head;
|
||||
struct rb_node *n;
|
||||
bool pin_bytes = false;
|
||||
|
||||
head = rb_entry(node, struct btrfs_delayed_ref_head,
|
||||
href_node);
|
||||
if (btrfs_delayed_ref_lock(delayed_refs, head))
|
||||
continue;
|
||||
|
||||
spin_lock(&head->lock);
|
||||
while ((n = rb_first_cached(&head->ref_tree)) != NULL) {
|
||||
ref = rb_entry(n, struct btrfs_delayed_ref_node,
|
||||
ref_node);
|
||||
rb_erase_cached(&ref->ref_node, &head->ref_tree);
|
||||
RB_CLEAR_NODE(&ref->ref_node);
|
||||
if (!list_empty(&ref->add_list))
|
||||
list_del(&ref->add_list);
|
||||
atomic_dec(&delayed_refs->num_entries);
|
||||
btrfs_put_delayed_ref(ref);
|
||||
btrfs_delayed_refs_rsv_release(fs_info, 1, 0);
|
||||
}
|
||||
if (head->must_insert_reserved)
|
||||
pin_bytes = true;
|
||||
btrfs_free_delayed_extent_op(head->extent_op);
|
||||
btrfs_delete_ref_head(delayed_refs, head);
|
||||
spin_unlock(&head->lock);
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
mutex_unlock(&head->mutex);
|
||||
|
||||
if (pin_bytes) {
|
||||
struct btrfs_block_group *cache;
|
||||
|
||||
cache = btrfs_lookup_block_group(fs_info, head->bytenr);
|
||||
BUG_ON(!cache);
|
||||
|
||||
spin_lock(&cache->space_info->lock);
|
||||
spin_lock(&cache->lock);
|
||||
cache->pinned += head->num_bytes;
|
||||
btrfs_space_info_update_bytes_pinned(fs_info,
|
||||
cache->space_info, head->num_bytes);
|
||||
cache->reserved -= head->num_bytes;
|
||||
cache->space_info->bytes_reserved -= head->num_bytes;
|
||||
spin_unlock(&cache->lock);
|
||||
spin_unlock(&cache->space_info->lock);
|
||||
|
||||
btrfs_put_block_group(cache);
|
||||
|
||||
btrfs_error_unpin_extent_range(fs_info, head->bytenr,
|
||||
head->bytenr + head->num_bytes - 1);
|
||||
}
|
||||
btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
|
||||
btrfs_put_delayed_ref_head(head);
|
||||
cond_resched();
|
||||
spin_lock(&delayed_refs->lock);
|
||||
}
|
||||
btrfs_qgroup_destroy_extent_records(trans);
|
||||
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
}
|
||||
|
||||
static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
|
||||
{
|
||||
struct btrfs_inode *btrfs_inode;
|
||||
@ -4805,9 +4734,9 @@ static void btrfs_free_all_qgroup_pertrans(struct btrfs_fs_info *fs_info)
|
||||
spin_unlock(&fs_info->fs_roots_radix_lock);
|
||||
}
|
||||
|
||||
void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
|
||||
struct btrfs_fs_info *fs_info)
|
||||
void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = cur_trans->fs_info;
|
||||
struct btrfs_device *dev, *tmp;
|
||||
|
||||
btrfs_cleanup_dirty_bgs(cur_trans, fs_info);
|
||||
@ -4819,7 +4748,7 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
|
||||
list_del_init(&dev->post_commit_list);
|
||||
}
|
||||
|
||||
btrfs_destroy_delayed_refs(cur_trans, fs_info);
|
||||
btrfs_destroy_delayed_refs(cur_trans);
|
||||
|
||||
cur_trans->state = TRANS_STATE_COMMIT_START;
|
||||
wake_up(&fs_info->transaction_blocked_wait);
|
||||
@ -4865,7 +4794,7 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info)
|
||||
} else {
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
}
|
||||
btrfs_cleanup_one_transaction(t, fs_info);
|
||||
btrfs_cleanup_one_transaction(t);
|
||||
|
||||
spin_lock(&fs_info->trans_lock);
|
||||
if (t == fs_info->running_transaction)
|
||||
|
@ -52,8 +52,7 @@ struct extent_buffer *btrfs_find_create_tree_block(
|
||||
int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
|
||||
const struct btrfs_super_block *disk_sb);
|
||||
int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices,
|
||||
const char *options);
|
||||
int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices);
|
||||
void __cold close_ctree(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_validate_super(const struct btrfs_fs_info *fs_info,
|
||||
const struct btrfs_super_block *sb, int mirror_num);
|
||||
@ -127,8 +126,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root);
|
||||
void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *trans,
|
||||
struct btrfs_fs_info *fs_info);
|
||||
void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans,
|
||||
struct btrfs_fs_info *fs_info);
|
||||
void btrfs_cleanup_one_transaction(struct btrfs_transaction *trans);
|
||||
struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
|
||||
u64 objectid);
|
||||
int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags);
|
||||
|
@ -182,7 +182,7 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
|
||||
|
||||
delayed_refs = &trans->transaction->delayed_refs;
|
||||
spin_lock(&delayed_refs->lock);
|
||||
head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
|
||||
head = btrfs_find_delayed_ref_head(fs_info, delayed_refs, bytenr);
|
||||
if (head) {
|
||||
if (!mutex_trylock(&head->mutex)) {
|
||||
refcount_inc(&head->refs);
|
||||
@ -795,7 +795,6 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
|
||||
if (insert) {
|
||||
extra_size = btrfs_extent_inline_ref_size(want);
|
||||
path->search_for_extension = 1;
|
||||
path->keep_locks = 1;
|
||||
} else
|
||||
extra_size = -1;
|
||||
|
||||
@ -946,6 +945,25 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
|
||||
ret = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (path->slots[0] + 1 < btrfs_header_nritems(path->nodes[0])) {
|
||||
struct btrfs_key tmp_key;
|
||||
|
||||
btrfs_item_key_to_cpu(path->nodes[0], &tmp_key, path->slots[0] + 1);
|
||||
if (tmp_key.objectid == bytenr &&
|
||||
tmp_key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) {
|
||||
ret = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
goto out_no_entry;
|
||||
}
|
||||
|
||||
if (!path->keep_locks) {
|
||||
btrfs_release_path(path);
|
||||
path->keep_locks = 1;
|
||||
goto again;
|
||||
}
|
||||
|
||||
/*
|
||||
* To add new inline back ref, we have to make sure
|
||||
* there is no corresponding back ref item.
|
||||
@ -959,13 +977,15 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
out_no_entry:
|
||||
*ref_ret = (struct btrfs_extent_inline_ref *)ptr;
|
||||
out:
|
||||
if (insert) {
|
||||
if (path->keep_locks) {
|
||||
path->keep_locks = 0;
|
||||
path->search_for_extension = 0;
|
||||
btrfs_unlock_up_safe(path, 1);
|
||||
}
|
||||
if (insert)
|
||||
path->search_for_extension = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1807,16 +1827,6 @@ select_delayed_ref(struct btrfs_delayed_ref_head *head)
|
||||
return ref;
|
||||
}
|
||||
|
||||
static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
|
||||
struct btrfs_delayed_ref_head *head)
|
||||
{
|
||||
spin_lock(&delayed_refs->lock);
|
||||
head->processing = false;
|
||||
delayed_refs->num_heads_ready++;
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
btrfs_delayed_ref_unlock(head);
|
||||
}
|
||||
|
||||
static struct btrfs_delayed_extent_op *cleanup_extent_op(
|
||||
struct btrfs_delayed_ref_head *head)
|
||||
{
|
||||
@ -1891,7 +1901,7 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
|
||||
|
||||
ret = run_and_cleanup_extent_op(trans, head);
|
||||
if (ret < 0) {
|
||||
unselect_delayed_ref_head(delayed_refs, head);
|
||||
btrfs_unselect_ref_head(delayed_refs, head);
|
||||
btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
|
||||
return ret;
|
||||
} else if (ret) {
|
||||
@ -1910,7 +1920,7 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
return 1;
|
||||
}
|
||||
btrfs_delete_ref_head(delayed_refs, head);
|
||||
btrfs_delete_ref_head(fs_info, delayed_refs, head);
|
||||
spin_unlock(&head->lock);
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
|
||||
@ -1933,39 +1943,6 @@ static int cleanup_ref_head(struct btrfs_trans_handle *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct btrfs_delayed_ref_head *btrfs_obtain_ref_head(
|
||||
struct btrfs_trans_handle *trans)
|
||||
{
|
||||
struct btrfs_delayed_ref_root *delayed_refs =
|
||||
&trans->transaction->delayed_refs;
|
||||
struct btrfs_delayed_ref_head *head = NULL;
|
||||
int ret;
|
||||
|
||||
spin_lock(&delayed_refs->lock);
|
||||
head = btrfs_select_ref_head(delayed_refs);
|
||||
if (!head) {
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
return head;
|
||||
}
|
||||
|
||||
/*
|
||||
* Grab the lock that says we are going to process all the refs for
|
||||
* this head
|
||||
*/
|
||||
ret = btrfs_delayed_ref_lock(delayed_refs, head);
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
|
||||
/*
|
||||
* We may have dropped the spin lock to get the head mutex lock, and
|
||||
* that might have given someone else time to free the head. If that's
|
||||
* true, it has been removed from our list and we can move on.
|
||||
*/
|
||||
if (ret == -EAGAIN)
|
||||
head = ERR_PTR(-EAGAIN);
|
||||
|
||||
return head;
|
||||
}
|
||||
|
||||
static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_delayed_ref_head *locked_ref,
|
||||
u64 *bytes_released)
|
||||
@ -1986,7 +1963,7 @@ static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
|
||||
if (ref->seq &&
|
||||
btrfs_check_delayed_seq(fs_info, ref->seq)) {
|
||||
spin_unlock(&locked_ref->lock);
|
||||
unselect_delayed_ref_head(delayed_refs, locked_ref);
|
||||
btrfs_unselect_ref_head(delayed_refs, locked_ref);
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
@ -2009,7 +1986,6 @@ static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
|
||||
default:
|
||||
WARN_ON(1);
|
||||
}
|
||||
atomic_dec(&delayed_refs->num_entries);
|
||||
|
||||
/*
|
||||
* Record the must_insert_reserved flag before we drop the
|
||||
@ -2035,7 +2011,7 @@ static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
|
||||
|
||||
btrfs_free_delayed_extent_op(extent_op);
|
||||
if (ret) {
|
||||
unselect_delayed_ref_head(delayed_refs, locked_ref);
|
||||
btrfs_unselect_ref_head(delayed_refs, locked_ref);
|
||||
btrfs_put_delayed_ref(ref);
|
||||
return ret;
|
||||
}
|
||||
@ -2073,7 +2049,7 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
|
||||
|
||||
do {
|
||||
if (!locked_ref) {
|
||||
locked_ref = btrfs_obtain_ref_head(trans);
|
||||
locked_ref = btrfs_select_ref_head(fs_info, delayed_refs);
|
||||
if (IS_ERR_OR_NULL(locked_ref)) {
|
||||
if (PTR_ERR(locked_ref) == -EAGAIN) {
|
||||
continue;
|
||||
@ -2220,7 +2196,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, u64 min_bytes)
|
||||
btrfs_create_pending_block_groups(trans);
|
||||
|
||||
spin_lock(&delayed_refs->lock);
|
||||
if (RB_EMPTY_ROOT(&delayed_refs->href_root.rb_root)) {
|
||||
if (xa_empty(&delayed_refs->head_refs)) {
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
return 0;
|
||||
}
|
||||
@ -2275,7 +2251,7 @@ static noinline int check_delayed_ref(struct btrfs_root *root,
|
||||
|
||||
delayed_refs = &cur_trans->delayed_refs;
|
||||
spin_lock(&delayed_refs->lock);
|
||||
head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
|
||||
head = btrfs_find_delayed_ref_head(root->fs_info, delayed_refs, bytenr);
|
||||
if (!head) {
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
btrfs_put_transaction(cur_trans);
|
||||
@ -3144,7 +3120,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
|
||||
break;
|
||||
}
|
||||
|
||||
/* Quick path didn't find the EXTEMT/METADATA_ITEM */
|
||||
/* Quick path didn't find the EXTENT/METADATA_ITEM */
|
||||
if (path->slots[0] - extent_slot > 5)
|
||||
break;
|
||||
extent_slot--;
|
||||
@ -3377,13 +3353,14 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
|
||||
static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
|
||||
u64 bytenr)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
struct btrfs_delayed_ref_head *head;
|
||||
struct btrfs_delayed_ref_root *delayed_refs;
|
||||
int ret = 0;
|
||||
|
||||
delayed_refs = &trans->transaction->delayed_refs;
|
||||
spin_lock(&delayed_refs->lock);
|
||||
head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
|
||||
head = btrfs_find_delayed_ref_head(fs_info, delayed_refs, bytenr);
|
||||
if (!head)
|
||||
goto out_delayed_unlock;
|
||||
|
||||
@ -3401,7 +3378,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
|
||||
if (!mutex_trylock(&head->mutex))
|
||||
goto out;
|
||||
|
||||
btrfs_delete_ref_head(delayed_refs, head);
|
||||
btrfs_delete_ref_head(fs_info, delayed_refs, head);
|
||||
head->processing = false;
|
||||
|
||||
spin_unlock(&head->lock);
|
||||
@ -3411,7 +3388,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
|
||||
if (head->must_insert_reserved)
|
||||
ret = 1;
|
||||
|
||||
btrfs_cleanup_ref_head_accounting(trans->fs_info, delayed_refs, head);
|
||||
btrfs_cleanup_ref_head_accounting(fs_info, delayed_refs, head);
|
||||
mutex_unlock(&head->mutex);
|
||||
btrfs_put_delayed_ref_head(head);
|
||||
return ret;
|
||||
@ -5270,7 +5247,7 @@ struct walk_control {
|
||||
* corrupted file systems must have been caught before calling this function.
|
||||
*/
|
||||
static bool visit_node_for_delete(struct btrfs_root *root, struct walk_control *wc,
|
||||
struct extent_buffer *eb, u64 refs, u64 flags, int slot)
|
||||
struct extent_buffer *eb, u64 flags, int slot)
|
||||
{
|
||||
struct btrfs_key key;
|
||||
u64 generation;
|
||||
@ -5384,7 +5361,7 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
|
||||
continue;
|
||||
|
||||
/* If we don't need to visit this node don't reada. */
|
||||
if (!visit_node_for_delete(root, wc, eb, refs, flags, slot))
|
||||
if (!visit_node_for_delete(root, wc, eb, flags, slot))
|
||||
continue;
|
||||
reada:
|
||||
btrfs_readahead_node_child(eb, slot);
|
||||
@ -5518,7 +5495,7 @@ static int check_ref_exists(struct btrfs_trans_handle *trans,
|
||||
*/
|
||||
delayed_refs = &trans->transaction->delayed_refs;
|
||||
spin_lock(&delayed_refs->lock);
|
||||
head = btrfs_find_delayed_ref_head(delayed_refs, bytenr);
|
||||
head = btrfs_find_delayed_ref_head(root->fs_info, delayed_refs, bytenr);
|
||||
if (!head)
|
||||
goto out;
|
||||
if (!mutex_trylock(&head->mutex)) {
|
||||
@ -5737,8 +5714,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
|
||||
|
||||
/* If we don't have to walk into this node skip it. */
|
||||
if (!visit_node_for_delete(root, wc, path->nodes[level],
|
||||
wc->refs[level - 1], wc->flags[level - 1],
|
||||
path->slots[level]))
|
||||
wc->flags[level - 1], path->slots[level]))
|
||||
goto skip;
|
||||
|
||||
/*
|
||||
|
@ -190,7 +190,7 @@ static void process_one_folio(struct btrfs_fs_info *fs_info,
|
||||
btrfs_folio_clamp_clear_writeback(fs_info, folio, start, len);
|
||||
|
||||
if (folio != locked_folio && (page_ops & PAGE_UNLOCK))
|
||||
btrfs_folio_end_writer_lock(fs_info, folio, start, len);
|
||||
btrfs_folio_end_lock(fs_info, folio, start, len);
|
||||
}
|
||||
|
||||
static void __process_folios_contig(struct address_space *mapping,
|
||||
@ -276,7 +276,7 @@ static noinline int lock_delalloc_folios(struct inode *inode,
|
||||
range_start = max_t(u64, folio_pos(folio), start);
|
||||
range_len = min_t(u64, folio_pos(folio) + folio_size(folio),
|
||||
end + 1) - range_start;
|
||||
btrfs_folio_set_writer_lock(fs_info, folio, range_start, range_len);
|
||||
btrfs_folio_set_lock(fs_info, folio, range_start, range_len);
|
||||
|
||||
processed_end = range_start + range_len - 1;
|
||||
}
|
||||
@ -438,7 +438,7 @@ static void end_folio_read(struct folio *folio, bool uptodate, u64 start, u32 le
|
||||
if (!btrfs_is_subpage(fs_info, folio->mapping))
|
||||
folio_unlock(folio);
|
||||
else
|
||||
btrfs_subpage_end_reader(fs_info, folio, start, len);
|
||||
btrfs_folio_end_lock(fs_info, folio, start, len);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -495,7 +495,7 @@ static void begin_folio_read(struct btrfs_fs_info *fs_info, struct folio *folio)
|
||||
return;
|
||||
|
||||
ASSERT(folio_test_private(folio));
|
||||
btrfs_subpage_start_reader(fs_info, folio, folio_pos(folio), PAGE_SIZE);
|
||||
btrfs_folio_set_lock(fs_info, folio, folio_pos(folio), PAGE_SIZE);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1102,6 +1102,45 @@ int btrfs_read_folio(struct file *file, struct folio *folio)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void set_delalloc_bitmap(struct folio *folio, unsigned long *delalloc_bitmap,
|
||||
u64 start, u32 len)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = folio_to_fs_info(folio);
|
||||
const u64 folio_start = folio_pos(folio);
|
||||
unsigned int start_bit;
|
||||
unsigned int nbits;
|
||||
|
||||
ASSERT(start >= folio_start && start + len <= folio_start + PAGE_SIZE);
|
||||
start_bit = (start - folio_start) >> fs_info->sectorsize_bits;
|
||||
nbits = len >> fs_info->sectorsize_bits;
|
||||
ASSERT(bitmap_test_range_all_zero(delalloc_bitmap, start_bit, nbits));
|
||||
bitmap_set(delalloc_bitmap, start_bit, nbits);
|
||||
}
|
||||
|
||||
static bool find_next_delalloc_bitmap(struct folio *folio,
|
||||
unsigned long *delalloc_bitmap, u64 start,
|
||||
u64 *found_start, u32 *found_len)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = folio_to_fs_info(folio);
|
||||
const u64 folio_start = folio_pos(folio);
|
||||
const unsigned int bitmap_size = fs_info->sectors_per_page;
|
||||
unsigned int start_bit;
|
||||
unsigned int first_zero;
|
||||
unsigned int first_set;
|
||||
|
||||
ASSERT(start >= folio_start && start < folio_start + PAGE_SIZE);
|
||||
|
||||
start_bit = (start - folio_start) >> fs_info->sectorsize_bits;
|
||||
first_set = find_next_bit(delalloc_bitmap, bitmap_size, start_bit);
|
||||
if (first_set >= bitmap_size)
|
||||
return false;
|
||||
|
||||
*found_start = folio_start + (first_set << fs_info->sectorsize_bits);
|
||||
first_zero = find_next_zero_bit(delalloc_bitmap, bitmap_size, first_set);
|
||||
*found_len = (first_zero - first_set) << fs_info->sectorsize_bits;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* helper for extent_writepage(), doing all of the delayed allocation setup.
|
||||
*
|
||||
@ -1121,6 +1160,7 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
|
||||
const bool is_subpage = btrfs_is_subpage(fs_info, folio->mapping);
|
||||
const u64 page_start = folio_pos(folio);
|
||||
const u64 page_end = page_start + folio_size(folio) - 1;
|
||||
unsigned long delalloc_bitmap = 0;
|
||||
/*
|
||||
* Save the last found delalloc end. As the delalloc end can go beyond
|
||||
* page boundary, thus we cannot rely on subpage bitmap to locate the
|
||||
@ -1131,6 +1171,7 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
|
||||
u64 delalloc_end = page_end;
|
||||
u64 delalloc_to_write = 0;
|
||||
int ret = 0;
|
||||
int bit;
|
||||
|
||||
/* Save the dirty bitmap as our submission bitmap will be a subset of it. */
|
||||
if (btrfs_is_subpage(fs_info, inode->vfs_inode.i_mapping)) {
|
||||
@ -1140,6 +1181,12 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
|
||||
bio_ctrl->submit_bitmap = 1;
|
||||
}
|
||||
|
||||
for_each_set_bit(bit, &bio_ctrl->submit_bitmap, fs_info->sectors_per_page) {
|
||||
u64 start = page_start + (bit << fs_info->sectorsize_bits);
|
||||
|
||||
btrfs_folio_set_lock(fs_info, folio, start, fs_info->sectorsize);
|
||||
}
|
||||
|
||||
/* Lock all (subpage) delalloc ranges inside the folio first. */
|
||||
while (delalloc_start < page_end) {
|
||||
delalloc_end = page_end;
|
||||
@ -1148,9 +1195,8 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
|
||||
delalloc_start = delalloc_end + 1;
|
||||
continue;
|
||||
}
|
||||
btrfs_folio_set_writer_lock(fs_info, folio, delalloc_start,
|
||||
min(delalloc_end, page_end) + 1 -
|
||||
delalloc_start);
|
||||
set_delalloc_bitmap(folio, &delalloc_bitmap, delalloc_start,
|
||||
min(delalloc_end, page_end) + 1 - delalloc_start);
|
||||
last_delalloc_end = delalloc_end;
|
||||
delalloc_start = delalloc_end + 1;
|
||||
}
|
||||
@ -1175,7 +1221,7 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
|
||||
found_len = last_delalloc_end + 1 - found_start;
|
||||
found = true;
|
||||
} else {
|
||||
found = btrfs_subpage_find_writer_locked(fs_info, folio,
|
||||
found = find_next_delalloc_bitmap(folio, &delalloc_bitmap,
|
||||
delalloc_start, &found_start, &found_len);
|
||||
}
|
||||
if (!found)
|
||||
@ -1314,7 +1360,7 @@ static int submit_one_sector(struct btrfs_inode *inode,
|
||||
* a folio for a range already written to disk.
|
||||
*/
|
||||
btrfs_folio_clear_dirty(fs_info, folio, filepos, sectorsize);
|
||||
btrfs_set_range_writeback(inode, filepos, filepos + sectorsize - 1);
|
||||
btrfs_folio_set_writeback(fs_info, folio, filepos, sectorsize);
|
||||
/*
|
||||
* Above call should set the whole folio with writeback flag, even
|
||||
* just for a single subpage sector.
|
||||
@ -1391,8 +1437,6 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
|
||||
goto out;
|
||||
submitted_io = true;
|
||||
}
|
||||
|
||||
btrfs_folio_assert_not_dirty(fs_info, folio, start, len);
|
||||
out:
|
||||
/*
|
||||
* If we didn't submitted any sector (>= i_size), folio dirty get
|
||||
@ -1476,7 +1520,7 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl
|
||||
* Only unlock ranges that are submitted. As there can be some async
|
||||
* submitted ranges inside the folio.
|
||||
*/
|
||||
btrfs_folio_end_writer_lock_bitmap(fs_info, folio, bio_ctrl->submit_bitmap);
|
||||
btrfs_folio_end_lock_bitmap(fs_info, folio, bio_ctrl->submit_bitmap);
|
||||
ASSERT(ret <= 0);
|
||||
return ret;
|
||||
}
|
||||
@ -2115,7 +2159,27 @@ static int extent_write_cache_pages(struct address_space *mapping,
|
||||
continue;
|
||||
}
|
||||
|
||||
if (wbc->sync_mode != WB_SYNC_NONE) {
|
||||
/*
|
||||
* For subpage case, compression can lead to mixed
|
||||
* writeback and dirty flags, e.g:
|
||||
* 0 32K 64K 96K 128K
|
||||
* | |//////||/////| |//|
|
||||
*
|
||||
* In above case, [32K, 96K) is asynchronously submitted
|
||||
* for compression, and [124K, 128K) needs to be written back.
|
||||
*
|
||||
* If we didn't wait wrtiteback for page 64K, [128K, 128K)
|
||||
* won't be submitted as the page still has writeback flag
|
||||
* and will be skipped in the next check.
|
||||
*
|
||||
* This mixed writeback and dirty case is only possible for
|
||||
* subpage case.
|
||||
*
|
||||
* TODO: Remove this check after migrating compression to
|
||||
* regular submission.
|
||||
*/
|
||||
if (wbc->sync_mode != WB_SYNC_NONE ||
|
||||
btrfs_is_subpage(inode_to_fs_info(inode), mapping)) {
|
||||
if (folio_test_writeback(folio))
|
||||
submit_write_bio(bio_ctrl, 0);
|
||||
folio_wait_writeback(folio);
|
||||
@ -2200,7 +2264,7 @@ void extent_write_locked_range(struct inode *inode, const struct folio *locked_f
|
||||
u32 cur_len = cur_end + 1 - cur;
|
||||
struct folio *folio;
|
||||
|
||||
folio = __filemap_get_folio(mapping, cur >> PAGE_SHIFT, 0, 0);
|
||||
folio = filemap_get_folio(mapping, cur >> PAGE_SHIFT);
|
||||
|
||||
/*
|
||||
* This shouldn't happen, the pages are pinned and locked, this
|
||||
@ -2233,7 +2297,7 @@ void extent_write_locked_range(struct inode *inode, const struct folio *locked_f
|
||||
cur, cur_len, !ret);
|
||||
mapping_set_error(mapping, ret);
|
||||
}
|
||||
btrfs_folio_end_writer_lock(fs_info, folio, cur, cur_len);
|
||||
btrfs_folio_end_lock(fs_info, folio, cur, cur_len);
|
||||
if (ret < 0)
|
||||
found_error = true;
|
||||
next_page:
|
||||
@ -2317,7 +2381,7 @@ int extent_invalidate_folio(struct extent_io_tree *tree,
|
||||
* to drop the page.
|
||||
*/
|
||||
static bool try_release_extent_state(struct extent_io_tree *tree,
|
||||
struct folio *folio, gfp_t mask)
|
||||
struct folio *folio)
|
||||
{
|
||||
u64 start = folio_pos(folio);
|
||||
u64 end = start + PAGE_SIZE - 1;
|
||||
@ -2428,7 +2492,7 @@ bool try_release_extent_mapping(struct folio *folio, gfp_t mask)
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
return try_release_extent_state(io_tree, folio, mask);
|
||||
return try_release_extent_state(io_tree, folio);
|
||||
}
|
||||
|
||||
static void __free_extent_buffer(struct extent_buffer *eb)
|
||||
@ -2442,7 +2506,7 @@ static int extent_buffer_under_io(const struct extent_buffer *eb)
|
||||
test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
|
||||
}
|
||||
|
||||
static bool folio_range_has_eb(struct btrfs_fs_info *fs_info, struct folio *folio)
|
||||
static bool folio_range_has_eb(struct folio *folio)
|
||||
{
|
||||
struct btrfs_subpage *subpage;
|
||||
|
||||
@ -2452,12 +2516,6 @@ static bool folio_range_has_eb(struct btrfs_fs_info *fs_info, struct folio *foli
|
||||
subpage = folio_get_private(folio);
|
||||
if (atomic_read(&subpage->eb_refs))
|
||||
return true;
|
||||
/*
|
||||
* Even there is no eb refs here, we may still have
|
||||
* end_folio_read() call relying on page::private.
|
||||
*/
|
||||
if (atomic_read(&subpage->readers))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -2516,7 +2574,7 @@ static void detach_extent_buffer_folio(const struct extent_buffer *eb, struct fo
|
||||
* We can only detach the folio private if there are no other ebs in the
|
||||
* page range and no unfinished IO.
|
||||
*/
|
||||
if (!folio_range_has_eb(fs_info, folio))
|
||||
if (!folio_range_has_eb(folio))
|
||||
btrfs_detach_subpage(fs_info, folio);
|
||||
|
||||
spin_unlock(&folio->mapping->i_private_lock);
|
||||
@ -3121,7 +3179,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
/*
|
||||
* Now all pages of that extent buffer is unmapped, set UNMAPPED flag,
|
||||
* so it can be cleaned up without utlizing page->mapping.
|
||||
* so it can be cleaned up without utilizing page->mapping.
|
||||
*/
|
||||
set_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags);
|
||||
|
||||
@ -4221,7 +4279,6 @@ void btrfs_readahead_tree_block(struct btrfs_fs_info *fs_info,
|
||||
u64 bytenr, u64 owner_root, u64 gen, int level)
|
||||
{
|
||||
struct btrfs_tree_parent_check check = {
|
||||
.has_first_key = 0,
|
||||
.level = level,
|
||||
.transid = gen
|
||||
};
|
||||
|
@ -77,10 +77,13 @@ static u64 range_end(u64 start, u64 len)
|
||||
return start + len;
|
||||
}
|
||||
|
||||
static void dec_evictable_extent_maps(struct btrfs_inode *inode)
|
||||
static void remove_em(struct btrfs_inode *inode, struct extent_map *em)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
|
||||
rb_erase(&em->rb_node, &inode->extent_tree.root);
|
||||
RB_CLEAR_NODE(&em->rb_node);
|
||||
|
||||
if (!btrfs_is_testing(fs_info) && is_fstree(btrfs_root_id(inode->root)))
|
||||
percpu_counter_dec(&fs_info->evictable_extent_maps);
|
||||
}
|
||||
@ -339,7 +342,6 @@ static void validate_extent_map(struct btrfs_fs_info *fs_info, struct extent_map
|
||||
static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
struct extent_map_tree *tree = &inode->extent_tree;
|
||||
struct extent_map *merge = NULL;
|
||||
struct rb_node *rb;
|
||||
|
||||
@ -371,10 +373,8 @@ static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
|
||||
em->flags |= EXTENT_FLAG_MERGED;
|
||||
|
||||
validate_extent_map(fs_info, em);
|
||||
rb_erase(&merge->rb_node, &tree->root);
|
||||
RB_CLEAR_NODE(&merge->rb_node);
|
||||
remove_em(inode, merge);
|
||||
free_extent_map(merge);
|
||||
dec_evictable_extent_maps(inode);
|
||||
}
|
||||
}
|
||||
|
||||
@ -386,12 +386,10 @@ static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
|
||||
if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE)
|
||||
merge_ondisk_extents(em, merge, em);
|
||||
validate_extent_map(fs_info, em);
|
||||
rb_erase(&merge->rb_node, &tree->root);
|
||||
RB_CLEAR_NODE(&merge->rb_node);
|
||||
em->generation = max(em->generation, merge->generation);
|
||||
em->flags |= EXTENT_FLAG_MERGED;
|
||||
remove_em(inode, merge);
|
||||
free_extent_map(merge);
|
||||
dec_evictable_extent_maps(inode);
|
||||
}
|
||||
}
|
||||
|
||||
@ -588,12 +586,10 @@ void remove_extent_mapping(struct btrfs_inode *inode, struct extent_map *em)
|
||||
lockdep_assert_held_write(&tree->lock);
|
||||
|
||||
WARN_ON(em->flags & EXTENT_FLAG_PINNED);
|
||||
rb_erase(&em->rb_node, &tree->root);
|
||||
if (!(em->flags & EXTENT_FLAG_LOGGING))
|
||||
list_del_init(&em->list);
|
||||
RB_CLEAR_NODE(&em->rb_node);
|
||||
|
||||
dec_evictable_extent_maps(inode);
|
||||
remove_em(inode, em);
|
||||
}
|
||||
|
||||
static void replace_extent_mapping(struct btrfs_inode *inode,
|
||||
@ -1122,13 +1118,12 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
|
||||
struct btrfs_em_shrink_ctx {
|
||||
long nr_to_scan;
|
||||
long scanned;
|
||||
u64 last_ino;
|
||||
u64 last_root;
|
||||
};
|
||||
|
||||
static long btrfs_scan_inode(struct btrfs_inode *inode, struct btrfs_em_shrink_ctx *ctx)
|
||||
{
|
||||
const u64 cur_fs_gen = btrfs_get_fs_generation(inode->root->fs_info);
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
const u64 cur_fs_gen = btrfs_get_fs_generation(fs_info);
|
||||
struct extent_map_tree *tree = &inode->extent_tree;
|
||||
long nr_dropped = 0;
|
||||
struct rb_node *node;
|
||||
@ -1201,7 +1196,8 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, struct btrfs_em_shrink_c
|
||||
* lock. This is to avoid slowing other tasks trying to take the
|
||||
* lock.
|
||||
*/
|
||||
if (need_resched() || rwlock_needbreak(&tree->lock))
|
||||
if (need_resched() || rwlock_needbreak(&tree->lock) ||
|
||||
btrfs_fs_closing(fs_info))
|
||||
break;
|
||||
node = next;
|
||||
}
|
||||
@ -1213,19 +1209,21 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, struct btrfs_em_shrink_c
|
||||
|
||||
static long btrfs_scan_root(struct btrfs_root *root, struct btrfs_em_shrink_ctx *ctx)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_inode *inode;
|
||||
long nr_dropped = 0;
|
||||
u64 min_ino = ctx->last_ino + 1;
|
||||
u64 min_ino = fs_info->em_shrinker_last_ino + 1;
|
||||
|
||||
inode = btrfs_find_first_inode(root, min_ino);
|
||||
while (inode) {
|
||||
nr_dropped += btrfs_scan_inode(inode, ctx);
|
||||
|
||||
min_ino = btrfs_ino(inode) + 1;
|
||||
ctx->last_ino = btrfs_ino(inode);
|
||||
fs_info->em_shrinker_last_ino = btrfs_ino(inode);
|
||||
btrfs_add_delayed_iput(inode);
|
||||
|
||||
if (ctx->scanned >= ctx->nr_to_scan)
|
||||
if (ctx->scanned >= ctx->nr_to_scan ||
|
||||
btrfs_fs_closing(inode->root->fs_info))
|
||||
break;
|
||||
|
||||
cond_resched();
|
||||
@ -1241,52 +1239,43 @@ static long btrfs_scan_root(struct btrfs_root *root, struct btrfs_em_shrink_ctx
|
||||
* inode if there is one or we will find out this was the last
|
||||
* one and move to the next root.
|
||||
*/
|
||||
ctx->last_root = btrfs_root_id(root);
|
||||
fs_info->em_shrinker_last_root = btrfs_root_id(root);
|
||||
} else {
|
||||
/*
|
||||
* No more inodes in this root, set extent_map_shrinker_last_ino to 0 so
|
||||
* that when processing the next root we start from its first inode.
|
||||
*/
|
||||
ctx->last_ino = 0;
|
||||
ctx->last_root = btrfs_root_id(root) + 1;
|
||||
fs_info->em_shrinker_last_ino = 0;
|
||||
fs_info->em_shrinker_last_root = btrfs_root_id(root) + 1;
|
||||
}
|
||||
|
||||
return nr_dropped;
|
||||
}
|
||||
|
||||
long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan)
|
||||
static void btrfs_extent_map_shrinker_worker(struct work_struct *work)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info;
|
||||
struct btrfs_em_shrink_ctx ctx;
|
||||
u64 start_root_id;
|
||||
u64 next_root_id;
|
||||
bool cycled = false;
|
||||
long nr_dropped = 0;
|
||||
|
||||
fs_info = container_of(work, struct btrfs_fs_info, em_shrinker_work);
|
||||
|
||||
ctx.scanned = 0;
|
||||
ctx.nr_to_scan = nr_to_scan;
|
||||
ctx.nr_to_scan = atomic64_read(&fs_info->em_shrinker_nr_to_scan);
|
||||
|
||||
/*
|
||||
* In case we have multiple tasks running this shrinker, make the next
|
||||
* one start from the next inode in case it starts before we finish.
|
||||
*/
|
||||
spin_lock(&fs_info->extent_map_shrinker_lock);
|
||||
ctx.last_ino = fs_info->extent_map_shrinker_last_ino;
|
||||
fs_info->extent_map_shrinker_last_ino++;
|
||||
ctx.last_root = fs_info->extent_map_shrinker_last_root;
|
||||
spin_unlock(&fs_info->extent_map_shrinker_lock);
|
||||
|
||||
start_root_id = ctx.last_root;
|
||||
next_root_id = ctx.last_root;
|
||||
start_root_id = fs_info->em_shrinker_last_root;
|
||||
next_root_id = fs_info->em_shrinker_last_root;
|
||||
|
||||
if (trace_btrfs_extent_map_shrinker_scan_enter_enabled()) {
|
||||
s64 nr = percpu_counter_sum_positive(&fs_info->evictable_extent_maps);
|
||||
|
||||
trace_btrfs_extent_map_shrinker_scan_enter(fs_info, nr_to_scan,
|
||||
nr, ctx.last_root,
|
||||
ctx.last_ino);
|
||||
trace_btrfs_extent_map_shrinker_scan_enter(fs_info, nr);
|
||||
}
|
||||
|
||||
while (ctx.scanned < ctx.nr_to_scan) {
|
||||
while (ctx.scanned < ctx.nr_to_scan && !btrfs_fs_closing(fs_info)) {
|
||||
struct btrfs_root *root;
|
||||
unsigned long count;
|
||||
|
||||
@ -1300,8 +1289,8 @@ long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan)
|
||||
spin_unlock(&fs_info->fs_roots_radix_lock);
|
||||
if (start_root_id > 0 && !cycled) {
|
||||
next_root_id = 0;
|
||||
ctx.last_root = 0;
|
||||
ctx.last_ino = 0;
|
||||
fs_info->em_shrinker_last_root = 0;
|
||||
fs_info->em_shrinker_last_ino = 0;
|
||||
cycled = true;
|
||||
continue;
|
||||
}
|
||||
@ -1320,29 +1309,40 @@ long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan)
|
||||
btrfs_put_root(root);
|
||||
}
|
||||
|
||||
/*
|
||||
* In case of multiple tasks running this extent map shrinking code this
|
||||
* isn't perfect but it's simple and silences things like KCSAN. It's
|
||||
* not possible to know which task made more progress because we can
|
||||
* cycle back to the first root and first inode if it's not the first
|
||||
* time the shrinker ran, see the above logic. Also a task that started
|
||||
* later may finish ealier than another task and made less progress. So
|
||||
* make this simple and update to the progress of the last task that
|
||||
* finished, with the occasional possiblity of having two consecutive
|
||||
* runs of the shrinker process the same inodes.
|
||||
*/
|
||||
spin_lock(&fs_info->extent_map_shrinker_lock);
|
||||
fs_info->extent_map_shrinker_last_ino = ctx.last_ino;
|
||||
fs_info->extent_map_shrinker_last_root = ctx.last_root;
|
||||
spin_unlock(&fs_info->extent_map_shrinker_lock);
|
||||
|
||||
if (trace_btrfs_extent_map_shrinker_scan_exit_enabled()) {
|
||||
s64 nr = percpu_counter_sum_positive(&fs_info->evictable_extent_maps);
|
||||
|
||||
trace_btrfs_extent_map_shrinker_scan_exit(fs_info, nr_dropped,
|
||||
nr, ctx.last_root,
|
||||
ctx.last_ino);
|
||||
trace_btrfs_extent_map_shrinker_scan_exit(fs_info, nr_dropped, nr);
|
||||
}
|
||||
|
||||
return nr_dropped;
|
||||
atomic64_set(&fs_info->em_shrinker_nr_to_scan, 0);
|
||||
}
|
||||
|
||||
void btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan)
|
||||
{
|
||||
/*
|
||||
* Do nothing if the shrinker is already running. In case of high memory
|
||||
* pressure we can have a lot of tasks calling us and all passing the
|
||||
* same nr_to_scan value, but in reality we may need only to free
|
||||
* nr_to_scan extent maps (or less). In case we need to free more than
|
||||
* that, we will be called again by the fs shrinker, so no worries about
|
||||
* not doing enough work to reclaim memory from extent maps.
|
||||
* We can also be repeatedly called with the same nr_to_scan value
|
||||
* simply because the shrinker runs asynchronously and multiple calls
|
||||
* to this function are made before the shrinker does enough progress.
|
||||
*
|
||||
* That's why we set the atomic counter to nr_to_scan only if its
|
||||
* current value is zero, instead of incrementing the counter by
|
||||
* nr_to_scan.
|
||||
*/
|
||||
if (atomic64_cmpxchg(&fs_info->em_shrinker_nr_to_scan, 0, nr_to_scan) != 0)
|
||||
return;
|
||||
|
||||
queue_work(system_unbound_wq, &fs_info->em_shrinker_work);
|
||||
}
|
||||
|
||||
void btrfs_init_extent_map_shrinker_work(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
atomic64_set(&fs_info->em_shrinker_nr_to_scan, 0);
|
||||
INIT_WORK(&fs_info->em_shrinker_work, btrfs_extent_map_shrinker_worker);
|
||||
}
|
||||
|
@ -189,6 +189,7 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode,
|
||||
int btrfs_replace_extent_map_range(struct btrfs_inode *inode,
|
||||
struct extent_map *new_em,
|
||||
bool modified);
|
||||
long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan);
|
||||
void btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan);
|
||||
void btrfs_init_extent_map_shrinker_work(struct btrfs_fs_info *fs_info);
|
||||
|
||||
#endif
|
||||
|
@ -186,7 +186,7 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
|
||||
* we have in the cache is the last delalloc range we
|
||||
* found while the file extent item we found can be
|
||||
* either for a whole delalloc range we previously
|
||||
* emmitted or only a part of that range.
|
||||
* emitted or only a part of that range.
|
||||
*
|
||||
* We have two cases here:
|
||||
*
|
||||
@ -194,13 +194,13 @@ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
|
||||
* cached extent's end. In this case just ignore the
|
||||
* current file extent item because we don't want to
|
||||
* overlap with previous ranges that may have been
|
||||
* emmitted already;
|
||||
* emitted already;
|
||||
*
|
||||
* 2) The file extent item starts behind the currently
|
||||
* cached extent but its end offset goes beyond the
|
||||
* end offset of the cached extent. We don't want to
|
||||
* overlap with a previous range that may have been
|
||||
* emmitted already, so we emit the currently cached
|
||||
* emitted already, so we emit the currently cached
|
||||
* extent and then partially store the current file
|
||||
* extent item's range in the cache, for the subrange
|
||||
* going the cached extent's end to the end of the
|
||||
|
351
fs/btrfs/file.c
351
fs/btrfs/file.c
@ -37,33 +37,30 @@
|
||||
#include "file.h"
|
||||
#include "super.h"
|
||||
|
||||
/* simple helper to fault in pages and copy. This should go away
|
||||
* and be replaced with calls into generic code.
|
||||
/*
|
||||
* Helper to fault in page and copy. This should go away and be replaced with
|
||||
* calls into generic code.
|
||||
*/
|
||||
static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
|
||||
struct page **prepared_pages,
|
||||
struct iov_iter *i)
|
||||
struct folio *folio, struct iov_iter *i)
|
||||
{
|
||||
size_t copied = 0;
|
||||
size_t total_copied = 0;
|
||||
int pg = 0;
|
||||
int offset = offset_in_page(pos);
|
||||
|
||||
while (write_bytes > 0) {
|
||||
size_t count = min_t(size_t,
|
||||
PAGE_SIZE - offset, write_bytes);
|
||||
struct page *page = prepared_pages[pg];
|
||||
size_t count = min_t(size_t, PAGE_SIZE - offset, write_bytes);
|
||||
/*
|
||||
* Copy data from userspace to the current page
|
||||
*/
|
||||
copied = copy_page_from_iter_atomic(page, offset, count, i);
|
||||
copied = copy_folio_from_iter_atomic(folio, offset, count, i);
|
||||
|
||||
/* Flush processor's dcache for this page */
|
||||
flush_dcache_page(page);
|
||||
flush_dcache_folio(folio);
|
||||
|
||||
/*
|
||||
* if we get a partial write, we can end up with
|
||||
* partially up to date pages. These add
|
||||
* partially up to date page. These add
|
||||
* a lot of complexity, so make sure they don't
|
||||
* happen by forcing this copy to be retried.
|
||||
*
|
||||
@ -71,7 +68,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
|
||||
* back to page at a time copies after we return 0.
|
||||
*/
|
||||
if (unlikely(copied < count)) {
|
||||
if (!PageUptodate(page)) {
|
||||
if (!folio_test_uptodate(folio)) {
|
||||
iov_iter_revert(i, copied);
|
||||
copied = 0;
|
||||
}
|
||||
@ -82,54 +79,44 @@ static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
|
||||
write_bytes -= copied;
|
||||
total_copied += copied;
|
||||
offset += copied;
|
||||
if (offset == PAGE_SIZE) {
|
||||
pg++;
|
||||
offset = 0;
|
||||
}
|
||||
}
|
||||
return total_copied;
|
||||
}
|
||||
|
||||
/*
|
||||
* unlocks pages after btrfs_file_write is done with them
|
||||
* Unlock folio after btrfs_file_write() is done with it.
|
||||
*/
|
||||
static void btrfs_drop_pages(struct btrfs_fs_info *fs_info,
|
||||
struct page **pages, size_t num_pages,
|
||||
static void btrfs_drop_folio(struct btrfs_fs_info *fs_info, struct folio *folio,
|
||||
u64 pos, u64 copied)
|
||||
{
|
||||
size_t i;
|
||||
u64 block_start = round_down(pos, fs_info->sectorsize);
|
||||
u64 block_len = round_up(pos + copied, fs_info->sectorsize) - block_start;
|
||||
|
||||
ASSERT(block_len <= U32_MAX);
|
||||
for (i = 0; i < num_pages; i++) {
|
||||
/* page checked is some magic around finding pages that
|
||||
* have been modified without going through btrfs_set_page_dirty
|
||||
* clear it here. There should be no need to mark the pages
|
||||
* accessed as prepare_pages should have marked them accessed
|
||||
* in prepare_pages via find_or_create_page()
|
||||
*/
|
||||
btrfs_folio_clamp_clear_checked(fs_info, page_folio(pages[i]),
|
||||
block_start, block_len);
|
||||
unlock_page(pages[i]);
|
||||
put_page(pages[i]);
|
||||
}
|
||||
/*
|
||||
* Folio checked is some magic around finding folios that have been
|
||||
* modified without going through btrfs_dirty_folio(). Clear it here.
|
||||
* There should be no need to mark the pages accessed as
|
||||
* prepare_one_folio() should have marked them accessed in
|
||||
* prepare_one_folio() via find_or_create_page()
|
||||
*/
|
||||
btrfs_folio_clamp_clear_checked(fs_info, folio, block_start, block_len);
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
}
|
||||
|
||||
/*
|
||||
* After btrfs_copy_from_user(), update the following things for delalloc:
|
||||
* - Mark newly dirtied pages as DELALLOC in the io tree.
|
||||
* - Mark newly dirtied folio as DELALLOC in the io tree.
|
||||
* Used to advise which range is to be written back.
|
||||
* - Mark modified pages as Uptodate/Dirty and not needing COW fixup
|
||||
* - Mark modified folio as Uptodate/Dirty and not needing COW fixup
|
||||
* - Update inode size for past EOF write
|
||||
*/
|
||||
int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
|
||||
size_t num_pages, loff_t pos, size_t write_bytes,
|
||||
struct extent_state **cached, bool noreserve)
|
||||
int btrfs_dirty_folio(struct btrfs_inode *inode, struct folio *folio, loff_t pos,
|
||||
size_t write_bytes, struct extent_state **cached, bool noreserve)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
int ret = 0;
|
||||
int i;
|
||||
u64 num_bytes;
|
||||
u64 start_pos;
|
||||
u64 end_of_last_block;
|
||||
@ -147,6 +134,8 @@ int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
|
||||
num_bytes = round_up(write_bytes + pos - start_pos,
|
||||
fs_info->sectorsize);
|
||||
ASSERT(num_bytes <= U32_MAX);
|
||||
ASSERT(folio_pos(folio) <= pos &&
|
||||
folio_pos(folio) + folio_size(folio) >= pos + write_bytes);
|
||||
|
||||
end_of_last_block = start_pos + num_bytes - 1;
|
||||
|
||||
@ -163,16 +152,9 @@ int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
for (i = 0; i < num_pages; i++) {
|
||||
struct page *p = pages[i];
|
||||
|
||||
btrfs_folio_clamp_set_uptodate(fs_info, page_folio(p),
|
||||
start_pos, num_bytes);
|
||||
btrfs_folio_clamp_clear_checked(fs_info, page_folio(p),
|
||||
start_pos, num_bytes);
|
||||
btrfs_folio_clamp_set_dirty(fs_info, page_folio(p),
|
||||
start_pos, num_bytes);
|
||||
}
|
||||
btrfs_folio_clamp_set_uptodate(fs_info, folio, start_pos, num_bytes);
|
||||
btrfs_folio_clamp_clear_checked(fs_info, folio, start_pos, num_bytes);
|
||||
btrfs_folio_clamp_set_dirty(fs_info, folio, start_pos, num_bytes);
|
||||
|
||||
/*
|
||||
* we've only changed i_size in ram, and we haven't updated
|
||||
@ -851,55 +833,49 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
|
||||
}
|
||||
|
||||
/*
|
||||
* on error we return an unlocked page and the error value
|
||||
* on success we return a locked page and 0
|
||||
* On error return an unlocked folio and the error value
|
||||
* On success return a locked folio and 0
|
||||
*/
|
||||
static int prepare_uptodate_page(struct inode *inode,
|
||||
struct page *page, u64 pos,
|
||||
bool force_uptodate)
|
||||
static int prepare_uptodate_folio(struct inode *inode, struct folio *folio, u64 pos,
|
||||
u64 len, bool force_uptodate)
|
||||
{
|
||||
struct folio *folio = page_folio(page);
|
||||
u64 clamp_start = max_t(u64, pos, folio_pos(folio));
|
||||
u64 clamp_end = min_t(u64, pos + len, folio_pos(folio) + folio_size(folio));
|
||||
int ret = 0;
|
||||
|
||||
if (((pos & (PAGE_SIZE - 1)) || force_uptodate) &&
|
||||
!PageUptodate(page)) {
|
||||
ret = btrfs_read_folio(NULL, folio);
|
||||
if (ret)
|
||||
return ret;
|
||||
lock_page(page);
|
||||
if (!PageUptodate(page)) {
|
||||
unlock_page(page);
|
||||
return -EIO;
|
||||
}
|
||||
if (folio_test_uptodate(folio))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Since btrfs_read_folio() will unlock the folio before it
|
||||
* returns, there is a window where btrfs_release_folio() can be
|
||||
* called to release the page. Here we check both inode
|
||||
* mapping and PagePrivate() to make sure the page was not
|
||||
* released.
|
||||
*
|
||||
* The private flag check is essential for subpage as we need
|
||||
* to store extra bitmap using folio private.
|
||||
*/
|
||||
if (page->mapping != inode->i_mapping || !folio_test_private(folio)) {
|
||||
unlock_page(page);
|
||||
return -EAGAIN;
|
||||
}
|
||||
if (!force_uptodate &&
|
||||
IS_ALIGNED(clamp_start, PAGE_SIZE) &&
|
||||
IS_ALIGNED(clamp_end, PAGE_SIZE))
|
||||
return 0;
|
||||
|
||||
ret = btrfs_read_folio(NULL, folio);
|
||||
if (ret)
|
||||
return ret;
|
||||
folio_lock(folio);
|
||||
if (!folio_test_uptodate(folio)) {
|
||||
folio_unlock(folio);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/*
|
||||
* Since btrfs_read_folio() will unlock the folio before it returns,
|
||||
* there is a window where btrfs_release_folio() can be called to
|
||||
* release the page. Here we check both inode mapping and page
|
||||
* private to make sure the page was not released.
|
||||
*
|
||||
* The private flag check is essential for subpage as we need to store
|
||||
* extra bitmap using folio private.
|
||||
*/
|
||||
if (folio->mapping != inode->i_mapping || !folio_test_private(folio)) {
|
||||
folio_unlock(folio);
|
||||
return -EAGAIN;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static fgf_t get_prepare_fgp_flags(bool nowait)
|
||||
{
|
||||
fgf_t fgp_flags = FGP_LOCK | FGP_ACCESSED | FGP_CREAT;
|
||||
|
||||
if (nowait)
|
||||
fgp_flags |= FGP_NOWAIT;
|
||||
|
||||
return fgp_flags;
|
||||
}
|
||||
|
||||
static gfp_t get_prepare_gfp_flags(struct inode *inode, bool nowait)
|
||||
{
|
||||
gfp_t gfp;
|
||||
@ -914,89 +890,67 @@ static gfp_t get_prepare_gfp_flags(struct inode *inode, bool nowait)
|
||||
}
|
||||
|
||||
/*
|
||||
* this just gets pages into the page cache and locks them down.
|
||||
* Get folio into the page cache and lock it.
|
||||
*/
|
||||
static noinline int prepare_pages(struct inode *inode, struct page **pages,
|
||||
size_t num_pages, loff_t pos,
|
||||
size_t write_bytes, bool force_uptodate,
|
||||
bool nowait)
|
||||
static noinline int prepare_one_folio(struct inode *inode, struct folio **folio_ret,
|
||||
loff_t pos, size_t write_bytes,
|
||||
bool force_uptodate, bool nowait)
|
||||
{
|
||||
int i;
|
||||
unsigned long index = pos >> PAGE_SHIFT;
|
||||
gfp_t mask = get_prepare_gfp_flags(inode, nowait);
|
||||
fgf_t fgp_flags = get_prepare_fgp_flags(nowait);
|
||||
fgf_t fgp_flags = (nowait ? FGP_WRITEBEGIN | FGP_NOWAIT : FGP_WRITEBEGIN);
|
||||
struct folio *folio;
|
||||
int ret = 0;
|
||||
int faili;
|
||||
|
||||
for (i = 0; i < num_pages; i++) {
|
||||
again:
|
||||
pages[i] = pagecache_get_page(inode->i_mapping, index + i,
|
||||
fgp_flags, mask | __GFP_WRITE);
|
||||
if (!pages[i]) {
|
||||
faili = i - 1;
|
||||
if (nowait)
|
||||
ret = -EAGAIN;
|
||||
else
|
||||
ret = -ENOMEM;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
ret = set_page_extent_mapped(pages[i]);
|
||||
if (ret < 0) {
|
||||
faili = i;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (i == 0)
|
||||
ret = prepare_uptodate_page(inode, pages[i], pos,
|
||||
force_uptodate);
|
||||
if (!ret && i == num_pages - 1)
|
||||
ret = prepare_uptodate_page(inode, pages[i],
|
||||
pos + write_bytes, false);
|
||||
if (ret) {
|
||||
put_page(pages[i]);
|
||||
if (!nowait && ret == -EAGAIN) {
|
||||
ret = 0;
|
||||
goto again;
|
||||
}
|
||||
faili = i - 1;
|
||||
goto fail;
|
||||
}
|
||||
wait_on_page_writeback(pages[i]);
|
||||
folio = __filemap_get_folio(inode->i_mapping, index, fgp_flags, mask);
|
||||
if (IS_ERR(folio)) {
|
||||
if (nowait)
|
||||
ret = -EAGAIN;
|
||||
else
|
||||
ret = PTR_ERR(folio);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Only support page sized folio yet. */
|
||||
ASSERT(folio_order(folio) == 0);
|
||||
ret = set_folio_extent_mapped(folio);
|
||||
if (ret < 0) {
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
return ret;
|
||||
}
|
||||
ret = prepare_uptodate_folio(inode, folio, pos, write_bytes, force_uptodate);
|
||||
if (ret) {
|
||||
/* The folio is already unlocked. */
|
||||
folio_put(folio);
|
||||
if (!nowait && ret == -EAGAIN) {
|
||||
ret = 0;
|
||||
goto again;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
*folio_ret = folio;
|
||||
return 0;
|
||||
fail:
|
||||
while (faili >= 0) {
|
||||
unlock_page(pages[faili]);
|
||||
put_page(pages[faili]);
|
||||
faili--;
|
||||
}
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* This function locks the extent and properly waits for data=ordered extents
|
||||
* to finish before allowing the pages to be modified if need.
|
||||
* Locks the extent and properly waits for data=ordered extents to finish
|
||||
* before allowing the folios to be modified if need.
|
||||
*
|
||||
* The return value:
|
||||
* Return:
|
||||
* 1 - the extent is locked
|
||||
* 0 - the extent is not locked, and everything is OK
|
||||
* -EAGAIN - need re-prepare the pages
|
||||
* the other < 0 number - Something wrong happens
|
||||
* -EAGAIN - need to prepare the folios again
|
||||
*/
|
||||
static noinline int
|
||||
lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
|
||||
size_t num_pages, loff_t pos,
|
||||
size_t write_bytes,
|
||||
lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct folio *folio,
|
||||
loff_t pos, size_t write_bytes,
|
||||
u64 *lockstart, u64 *lockend, bool nowait,
|
||||
struct extent_state **cached_state)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
u64 start_pos;
|
||||
u64 last_pos;
|
||||
int i;
|
||||
int ret = 0;
|
||||
|
||||
start_pos = round_down(pos, fs_info->sectorsize);
|
||||
@ -1008,12 +962,8 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
|
||||
if (nowait) {
|
||||
if (!try_lock_extent(&inode->io_tree, start_pos, last_pos,
|
||||
cached_state)) {
|
||||
for (i = 0; i < num_pages; i++) {
|
||||
unlock_page(pages[i]);
|
||||
put_page(pages[i]);
|
||||
pages[i] = NULL;
|
||||
}
|
||||
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
return -EAGAIN;
|
||||
}
|
||||
} else {
|
||||
@ -1027,10 +977,8 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
|
||||
ordered->file_offset <= last_pos) {
|
||||
unlock_extent(&inode->io_tree, start_pos, last_pos,
|
||||
cached_state);
|
||||
for (i = 0; i < num_pages; i++) {
|
||||
unlock_page(pages[i]);
|
||||
put_page(pages[i]);
|
||||
}
|
||||
folio_unlock(folio);
|
||||
folio_put(folio);
|
||||
btrfs_start_ordered_extent(ordered);
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
return -EAGAIN;
|
||||
@ -1044,11 +992,10 @@ lock_and_cleanup_extent_if_need(struct btrfs_inode *inode, struct page **pages,
|
||||
}
|
||||
|
||||
/*
|
||||
* We should be called after prepare_pages() which should have locked
|
||||
* We should be called after prepare_one_folio() which should have locked
|
||||
* all pages in the range.
|
||||
*/
|
||||
for (i = 0; i < num_pages; i++)
|
||||
WARN_ON(!PageLocked(pages[i]));
|
||||
WARN_ON(!folio_test_locked(folio));
|
||||
|
||||
return ret;
|
||||
}
|
||||
@ -1120,7 +1067,7 @@ void btrfs_check_nocow_unlock(struct btrfs_inode *inode)
|
||||
btrfs_drew_write_unlock(&inode->root->snapshot_lock);
|
||||
}
|
||||
|
||||
int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from, size_t count)
|
||||
int btrfs_write_check(struct kiocb *iocb, size_t count)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct inode *inode = file_inode(file);
|
||||
@ -1175,20 +1122,17 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i)
|
||||
loff_t pos;
|
||||
struct inode *inode = file_inode(file);
|
||||
struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
|
||||
struct page **pages = NULL;
|
||||
struct extent_changeset *data_reserved = NULL;
|
||||
u64 release_bytes = 0;
|
||||
u64 lockstart;
|
||||
u64 lockend;
|
||||
size_t num_written = 0;
|
||||
int nrptrs;
|
||||
ssize_t ret;
|
||||
bool only_release_metadata = false;
|
||||
bool force_page_uptodate = false;
|
||||
loff_t old_isize = i_size_read(inode);
|
||||
unsigned int ilock_flags = 0;
|
||||
const bool nowait = (iocb->ki_flags & IOCB_NOWAIT);
|
||||
unsigned int bdp_flags = (nowait ? BDP_ASYNC : 0);
|
||||
bool only_release_metadata = false;
|
||||
|
||||
if (nowait)
|
||||
ilock_flags |= BTRFS_ILOCK_TRY;
|
||||
@ -1201,38 +1145,26 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i)
|
||||
if (ret <= 0)
|
||||
goto out;
|
||||
|
||||
ret = btrfs_write_check(iocb, i, ret);
|
||||
ret = btrfs_write_check(iocb, ret);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
pos = iocb->ki_pos;
|
||||
nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE),
|
||||
PAGE_SIZE / (sizeof(struct page *)));
|
||||
nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied);
|
||||
nrptrs = max(nrptrs, 8);
|
||||
pages = kmalloc_array(nrptrs, sizeof(struct page *), GFP_KERNEL);
|
||||
if (!pages) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
while (iov_iter_count(i) > 0) {
|
||||
struct extent_state *cached_state = NULL;
|
||||
size_t offset = offset_in_page(pos);
|
||||
size_t sector_offset;
|
||||
size_t write_bytes = min(iov_iter_count(i),
|
||||
nrptrs * (size_t)PAGE_SIZE -
|
||||
offset);
|
||||
size_t num_pages;
|
||||
size_t write_bytes = min(iov_iter_count(i), PAGE_SIZE - offset);
|
||||
size_t reserve_bytes;
|
||||
size_t dirty_pages;
|
||||
size_t copied;
|
||||
size_t dirty_sectors;
|
||||
size_t num_sectors;
|
||||
struct folio *folio = NULL;
|
||||
int extents_locked;
|
||||
bool force_page_uptodate = false;
|
||||
|
||||
/*
|
||||
* Fault pages before locking them in prepare_pages
|
||||
* Fault pages before locking them in prepare_one_folio()
|
||||
* to avoid recursive lock
|
||||
*/
|
||||
if (unlikely(fault_in_iov_iter_readable(i, write_bytes))) {
|
||||
@ -1271,8 +1203,6 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i)
|
||||
only_release_metadata = true;
|
||||
}
|
||||
|
||||
num_pages = DIV_ROUND_UP(write_bytes + offset, PAGE_SIZE);
|
||||
WARN_ON(num_pages > nrptrs);
|
||||
reserve_bytes = round_up(write_bytes + sector_offset,
|
||||
fs_info->sectorsize);
|
||||
WARN_ON(reserve_bytes == 0);
|
||||
@ -1300,23 +1230,17 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i)
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is going to setup the pages array with the number of
|
||||
* pages we want, so we don't really need to worry about the
|
||||
* contents of pages from loop to loop
|
||||
*/
|
||||
ret = prepare_pages(inode, pages, num_pages,
|
||||
pos, write_bytes, force_page_uptodate, false);
|
||||
ret = prepare_one_folio(inode, &folio, pos, write_bytes,
|
||||
force_page_uptodate, false);
|
||||
if (ret) {
|
||||
btrfs_delalloc_release_extents(BTRFS_I(inode),
|
||||
reserve_bytes);
|
||||
break;
|
||||
}
|
||||
|
||||
extents_locked = lock_and_cleanup_extent_if_need(
|
||||
BTRFS_I(inode), pages,
|
||||
num_pages, pos, write_bytes, &lockstart,
|
||||
&lockend, nowait, &cached_state);
|
||||
extents_locked = lock_and_cleanup_extent_if_need(BTRFS_I(inode),
|
||||
folio, pos, write_bytes, &lockstart,
|
||||
&lockend, nowait, &cached_state);
|
||||
if (extents_locked < 0) {
|
||||
if (!nowait && extents_locked == -EAGAIN)
|
||||
goto again;
|
||||
@ -1327,28 +1251,18 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i)
|
||||
break;
|
||||
}
|
||||
|
||||
copied = btrfs_copy_from_user(pos, write_bytes, pages, i);
|
||||
copied = btrfs_copy_from_user(pos, write_bytes, folio, i);
|
||||
|
||||
num_sectors = BTRFS_BYTES_TO_BLKS(fs_info, reserve_bytes);
|
||||
dirty_sectors = round_up(copied + sector_offset,
|
||||
fs_info->sectorsize);
|
||||
dirty_sectors = BTRFS_BYTES_TO_BLKS(fs_info, dirty_sectors);
|
||||
|
||||
/*
|
||||
* if we have trouble faulting in the pages, fall
|
||||
* back to one page at a time
|
||||
*/
|
||||
if (copied < write_bytes)
|
||||
nrptrs = 1;
|
||||
|
||||
if (copied == 0) {
|
||||
force_page_uptodate = true;
|
||||
dirty_sectors = 0;
|
||||
dirty_pages = 0;
|
||||
} else {
|
||||
force_page_uptodate = false;
|
||||
dirty_pages = DIV_ROUND_UP(copied + offset,
|
||||
PAGE_SIZE);
|
||||
}
|
||||
|
||||
if (num_sectors > dirty_sectors) {
|
||||
@ -1358,13 +1272,10 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i)
|
||||
btrfs_delalloc_release_metadata(BTRFS_I(inode),
|
||||
release_bytes, true);
|
||||
} else {
|
||||
u64 __pos;
|
||||
|
||||
__pos = round_down(pos,
|
||||
fs_info->sectorsize) +
|
||||
(dirty_pages << PAGE_SHIFT);
|
||||
u64 release_start = round_up(pos + copied,
|
||||
fs_info->sectorsize);
|
||||
btrfs_delalloc_release_space(BTRFS_I(inode),
|
||||
data_reserved, __pos,
|
||||
data_reserved, release_start,
|
||||
release_bytes, true);
|
||||
}
|
||||
}
|
||||
@ -1372,15 +1283,14 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i)
|
||||
release_bytes = round_up(copied + sector_offset,
|
||||
fs_info->sectorsize);
|
||||
|
||||
ret = btrfs_dirty_pages(BTRFS_I(inode), pages,
|
||||
dirty_pages, pos, copied,
|
||||
ret = btrfs_dirty_folio(BTRFS_I(inode), folio, pos, copied,
|
||||
&cached_state, only_release_metadata);
|
||||
|
||||
/*
|
||||
* If we have not locked the extent range, because the range's
|
||||
* start offset is >= i_size, we might still have a non-NULL
|
||||
* cached extent state, acquired while marking the extent range
|
||||
* as delalloc through btrfs_dirty_pages(). Therefore free any
|
||||
* as delalloc through btrfs_dirty_page(). Therefore free any
|
||||
* possible cached extent state to avoid a memory leak.
|
||||
*/
|
||||
if (extents_locked)
|
||||
@ -1391,7 +1301,7 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i)
|
||||
|
||||
btrfs_delalloc_release_extents(BTRFS_I(inode), reserve_bytes);
|
||||
if (ret) {
|
||||
btrfs_drop_pages(fs_info, pages, num_pages, pos, copied);
|
||||
btrfs_drop_folio(fs_info, folio, pos, copied);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1399,7 +1309,7 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i)
|
||||
if (only_release_metadata)
|
||||
btrfs_check_nocow_unlock(BTRFS_I(inode));
|
||||
|
||||
btrfs_drop_pages(fs_info, pages, num_pages, pos, copied);
|
||||
btrfs_drop_folio(fs_info, folio, pos, copied);
|
||||
|
||||
cond_resched();
|
||||
|
||||
@ -1407,8 +1317,6 @@ ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i)
|
||||
num_written += copied;
|
||||
}
|
||||
|
||||
kfree(pages);
|
||||
|
||||
if (release_bytes) {
|
||||
if (only_release_metadata) {
|
||||
btrfs_check_nocow_unlock(BTRFS_I(inode));
|
||||
@ -1453,7 +1361,7 @@ static ssize_t btrfs_encoded_write(struct kiocb *iocb, struct iov_iter *from,
|
||||
if (ret || encoded->len == 0)
|
||||
goto out;
|
||||
|
||||
ret = btrfs_write_check(iocb, from, encoded->len);
|
||||
ret = btrfs_write_check(iocb, encoded->len);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
@ -3785,6 +3693,7 @@ const struct file_operations btrfs_file_operations = {
|
||||
.compat_ioctl = btrfs_compat_ioctl,
|
||||
#endif
|
||||
.remap_file_range = btrfs_remap_file_range,
|
||||
.uring_cmd = btrfs_uring_cmd,
|
||||
.fop_flags = FOP_BUFFER_RASYNC | FOP_BUFFER_WASYNC,
|
||||
};
|
||||
|
||||
|
@ -34,9 +34,8 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
|
||||
ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
|
||||
const struct btrfs_ioctl_encoded_io_args *encoded);
|
||||
int btrfs_release_file(struct inode *inode, struct file *file);
|
||||
int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
|
||||
size_t num_pages, loff_t pos, size_t write_bytes,
|
||||
struct extent_state **cached, bool noreserve);
|
||||
int btrfs_dirty_folio(struct btrfs_inode *inode, struct folio *folio, loff_t pos,
|
||||
size_t write_bytes, struct extent_state **cached, bool noreserve);
|
||||
int btrfs_fdatawrite_range(struct btrfs_inode *inode, loff_t start, loff_t end);
|
||||
int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
|
||||
size_t *write_bytes, bool nowait);
|
||||
@ -44,7 +43,7 @@ void btrfs_check_nocow_unlock(struct btrfs_inode *inode);
|
||||
bool btrfs_find_delalloc_in_range(struct btrfs_inode *inode, u64 start, u64 end,
|
||||
struct extent_state **cached_state,
|
||||
u64 *delalloc_start_ret, u64 *delalloc_end_ret);
|
||||
int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from, size_t count);
|
||||
int btrfs_write_check(struct kiocb *iocb, size_t count);
|
||||
ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i);
|
||||
|
||||
#endif
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <linux/ratelimit.h>
|
||||
#include <linux/error-injection.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/string_choices.h>
|
||||
#include "ctree.h"
|
||||
#include "fs.h"
|
||||
#include "messages.h"
|
||||
@ -1387,6 +1388,7 @@ static int __btrfs_write_out_cache(struct inode *inode,
|
||||
int bitmaps = 0;
|
||||
int ret;
|
||||
int must_iput = 0;
|
||||
int i_size;
|
||||
|
||||
if (!i_size_read(inode))
|
||||
return -EIO;
|
||||
@ -1457,11 +1459,16 @@ static int __btrfs_write_out_cache(struct inode *inode,
|
||||
io_ctl_zero_remaining_pages(io_ctl);
|
||||
|
||||
/* Everything is written out, now we dirty the pages in the file. */
|
||||
ret = btrfs_dirty_pages(BTRFS_I(inode), io_ctl->pages,
|
||||
io_ctl->num_pages, 0, i_size_read(inode),
|
||||
&cached_state, false);
|
||||
if (ret)
|
||||
goto out_nospc;
|
||||
i_size = i_size_read(inode);
|
||||
for (int i = 0; i < round_up(i_size, PAGE_SIZE) / PAGE_SIZE; i++) {
|
||||
u64 dirty_start = i * PAGE_SIZE;
|
||||
u64 dirty_len = min_t(u64, dirty_start + PAGE_SIZE, i_size) - dirty_start;
|
||||
|
||||
ret = btrfs_dirty_folio(BTRFS_I(inode), page_folio(io_ctl->pages[i]),
|
||||
dirty_start, dirty_len, &cached_state, false);
|
||||
if (ret < 0)
|
||||
goto out_nospc;
|
||||
}
|
||||
|
||||
if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA))
|
||||
up_write(&block_group->data_rwsem);
|
||||
@ -2936,12 +2943,11 @@ void btrfs_dump_free_space(struct btrfs_block_group *block_group,
|
||||
if (info->bytes >= bytes && !block_group->ro)
|
||||
count++;
|
||||
btrfs_crit(fs_info, "entry offset %llu, bytes %llu, bitmap %s",
|
||||
info->offset, info->bytes,
|
||||
(info->bitmap) ? "yes" : "no");
|
||||
info->offset, info->bytes, str_yes_no(info->bitmap));
|
||||
}
|
||||
spin_unlock(&ctl->tree_lock);
|
||||
btrfs_info(fs_info, "block group has cluster?: %s",
|
||||
list_empty(&block_group->cluster_list) ? "no" : "yes");
|
||||
str_no_yes(list_empty(&block_group->cluster_list)));
|
||||
btrfs_info(fs_info,
|
||||
"%d free space entries at or bigger than %llu bytes",
|
||||
count, bytes);
|
||||
|
@ -263,10 +263,10 @@ enum {
|
||||
BTRFS_FEATURE_INCOMPAT_ZONED | \
|
||||
BTRFS_FEATURE_INCOMPAT_SIMPLE_QUOTA)
|
||||
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
#ifdef CONFIG_BTRFS_EXPERIMENTAL
|
||||
/*
|
||||
* Features under developmen like Extent tree v2 support is enabled
|
||||
* only under CONFIG_BTRFS_DEBUG.
|
||||
* only under CONFIG_BTRFS_EXPERIMENTAL
|
||||
*/
|
||||
#define BTRFS_FEATURE_INCOMPAT_SUPP \
|
||||
(BTRFS_FEATURE_INCOMPAT_SUPP_STABLE | \
|
||||
@ -317,6 +317,8 @@ struct btrfs_dev_replace {
|
||||
|
||||
struct percpu_counter bio_counter;
|
||||
wait_queue_head_t replace_wait;
|
||||
|
||||
struct task_struct *replace_task;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -633,9 +635,10 @@ struct btrfs_fs_info {
|
||||
s32 delalloc_batch;
|
||||
|
||||
struct percpu_counter evictable_extent_maps;
|
||||
spinlock_t extent_map_shrinker_lock;
|
||||
u64 extent_map_shrinker_last_root;
|
||||
u64 extent_map_shrinker_last_ino;
|
||||
u64 em_shrinker_last_root;
|
||||
u64 em_shrinker_last_ino;
|
||||
atomic64_t em_shrinker_nr_to_scan;
|
||||
struct work_struct em_shrinker_work;
|
||||
|
||||
/* Protected by 'trans_lock'. */
|
||||
struct list_head dirty_cowonly_roots;
|
||||
@ -876,12 +879,9 @@ struct btrfs_fs_info {
|
||||
#endif
|
||||
};
|
||||
|
||||
#define page_to_inode(_page) (BTRFS_I(_Generic((_page), \
|
||||
struct page *: (_page))->mapping->host))
|
||||
#define folio_to_inode(_folio) (BTRFS_I(_Generic((_folio), \
|
||||
struct folio *: (_folio))->mapping->host))
|
||||
|
||||
#define page_to_fs_info(_page) (page_to_inode(_page)->root->fs_info)
|
||||
#define folio_to_fs_info(_folio) (folio_to_inode(_folio)->root->fs_info)
|
||||
|
||||
#define inode_to_fs_info(_inode) (BTRFS_I(_Generic((_inode), \
|
||||
|
495
fs/btrfs/inode.c
495
fs/btrfs/inode.c
@ -421,7 +421,7 @@ static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode,
|
||||
index++;
|
||||
continue;
|
||||
}
|
||||
folio = __filemap_get_folio(inode->vfs_inode.i_mapping, index, 0, 0);
|
||||
folio = filemap_get_folio(inode->vfs_inode.i_mapping, index);
|
||||
index++;
|
||||
if (IS_ERR(folio))
|
||||
continue;
|
||||
@ -556,8 +556,7 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
|
||||
} else {
|
||||
struct folio *folio;
|
||||
|
||||
folio = __filemap_get_folio(inode->vfs_inode.i_mapping,
|
||||
0, 0, 0);
|
||||
folio = filemap_get_folio(inode->vfs_inode.i_mapping, 0);
|
||||
ASSERT(!IS_ERR(folio));
|
||||
btrfs_set_file_extent_compression(leaf, ei, 0);
|
||||
kaddr = kmap_local_folio(folio, 0);
|
||||
@ -646,7 +645,7 @@ static bool can_cow_file_range_inline(struct btrfs_inode *inode,
|
||||
* If being used directly, you must have already checked we're allowed to cow
|
||||
* the range by getting true from can_cow_file_range_inline().
|
||||
*/
|
||||
static noinline int __cow_file_range_inline(struct btrfs_inode *inode, u64 offset,
|
||||
static noinline int __cow_file_range_inline(struct btrfs_inode *inode,
|
||||
u64 size, size_t compressed_size,
|
||||
int compress_type,
|
||||
struct folio *compressed_folio,
|
||||
@ -736,7 +735,7 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode,
|
||||
return 1;
|
||||
|
||||
lock_extent(&inode->io_tree, offset, end, &cached);
|
||||
ret = __cow_file_range_inline(inode, offset, size, compressed_size,
|
||||
ret = __cow_file_range_inline(inode, size, compressed_size,
|
||||
compress_type, compressed_folio,
|
||||
update_i_size);
|
||||
if (ret > 0) {
|
||||
@ -832,32 +831,16 @@ static inline int inode_need_compress(struct btrfs_inode *inode, u64 start,
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* Special check for subpage.
|
||||
* Only enable sector perfect compression for experimental builds.
|
||||
*
|
||||
* We lock the full page then run each delalloc range in the page, thus
|
||||
* for the following case, we will hit some subpage specific corner case:
|
||||
* This is a big feature change for subpage cases, and can hit
|
||||
* different corner cases, so only limit this feature for
|
||||
* experimental build for now.
|
||||
*
|
||||
* 0 32K 64K
|
||||
* | |///////| |///////|
|
||||
* \- A \- B
|
||||
*
|
||||
* In above case, both range A and range B will try to unlock the full
|
||||
* page [0, 64K), causing the one finished later will have page
|
||||
* unlocked already, triggering various page lock requirement BUG_ON()s.
|
||||
*
|
||||
* So here we add an artificial limit that subpage compression can only
|
||||
* if the range is fully page aligned.
|
||||
*
|
||||
* In theory we only need to ensure the first page is fully covered, but
|
||||
* the tailing partial page will be locked until the full compression
|
||||
* finishes, delaying the write of other range.
|
||||
*
|
||||
* TODO: Make btrfs_run_delalloc_range() to lock all delalloc range
|
||||
* first to prevent any submitted async extent to unlock the full page.
|
||||
* By this, we can ensure for subpage case that only the last async_cow
|
||||
* will unlock the full page.
|
||||
* ETA for moving this out of experimental builds is 6.15.
|
||||
*/
|
||||
if (fs_info->sectorsize < PAGE_SIZE) {
|
||||
if (fs_info->sectorsize < PAGE_SIZE &&
|
||||
!IS_ENABLED(CONFIG_BTRFS_EXPERIMENTAL)) {
|
||||
if (!PAGE_ALIGNED(start) ||
|
||||
!PAGE_ALIGNED(end + 1))
|
||||
return 0;
|
||||
@ -896,13 +879,14 @@ static int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 e
|
||||
|
||||
for (unsigned long index = start >> PAGE_SHIFT;
|
||||
index <= end_index; index++) {
|
||||
folio = __filemap_get_folio(inode->i_mapping, index, 0, 0);
|
||||
folio = filemap_get_folio(inode->i_mapping, index);
|
||||
if (IS_ERR(folio)) {
|
||||
if (!ret)
|
||||
ret = PTR_ERR(folio);
|
||||
continue;
|
||||
}
|
||||
folio_clear_dirty_for_io(folio);
|
||||
btrfs_folio_clamp_clear_dirty(inode_to_fs_info(inode), folio, start,
|
||||
end + 1 - start);
|
||||
folio_put(folio);
|
||||
}
|
||||
return ret;
|
||||
@ -1001,17 +985,6 @@ static void compress_file_range(struct btrfs_work *work)
|
||||
(start > 0 || end + 1 < inode->disk_i_size))
|
||||
goto cleanup_and_bail_uncompressed;
|
||||
|
||||
/*
|
||||
* For subpage case, we require full page alignment for the sector
|
||||
* aligned range.
|
||||
* Thus we must also check against @actual_end, not just @end.
|
||||
*/
|
||||
if (blocksize < PAGE_SIZE) {
|
||||
if (!PAGE_ALIGNED(start) ||
|
||||
!PAGE_ALIGNED(round_up(actual_end, blocksize)))
|
||||
goto cleanup_and_bail_uncompressed;
|
||||
}
|
||||
|
||||
total_compressed = min_t(unsigned long, total_compressed,
|
||||
BTRFS_MAX_UNCOMPRESSED);
|
||||
total_in = 0;
|
||||
@ -1359,7 +1332,6 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
|
||||
u64 alloc_hint = 0;
|
||||
u64 orig_start = start;
|
||||
u64 num_bytes;
|
||||
unsigned long ram_size;
|
||||
u64 cur_alloc_size = 0;
|
||||
u64 min_alloc_size;
|
||||
u64 blocksize = fs_info->sectorsize;
|
||||
@ -1367,7 +1339,6 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
|
||||
struct extent_map *em;
|
||||
unsigned clear_bits;
|
||||
unsigned long page_ops;
|
||||
bool extent_reserved = false;
|
||||
int ret = 0;
|
||||
|
||||
if (btrfs_is_free_space_inode(inode)) {
|
||||
@ -1421,8 +1392,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
struct btrfs_file_extent file_extent;
|
||||
|
||||
cur_alloc_size = num_bytes;
|
||||
ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
|
||||
ret = btrfs_reserve_extent(root, num_bytes, num_bytes,
|
||||
min_alloc_size, 0, alloc_hint,
|
||||
&ins, 1, 1);
|
||||
if (ret == -EAGAIN) {
|
||||
@ -1453,9 +1423,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
|
||||
if (ret < 0)
|
||||
goto out_unlock;
|
||||
cur_alloc_size = ins.offset;
|
||||
extent_reserved = true;
|
||||
|
||||
ram_size = ins.offset;
|
||||
file_extent.disk_bytenr = ins.objectid;
|
||||
file_extent.disk_num_bytes = ins.offset;
|
||||
file_extent.num_bytes = ins.offset;
|
||||
@ -1463,14 +1431,14 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
|
||||
file_extent.offset = 0;
|
||||
file_extent.compression = BTRFS_COMPRESS_NONE;
|
||||
|
||||
lock_extent(&inode->io_tree, start, start + ram_size - 1,
|
||||
lock_extent(&inode->io_tree, start, start + cur_alloc_size - 1,
|
||||
&cached);
|
||||
|
||||
em = btrfs_create_io_em(inode, start, &file_extent,
|
||||
BTRFS_ORDERED_REGULAR);
|
||||
if (IS_ERR(em)) {
|
||||
unlock_extent(&inode->io_tree, start,
|
||||
start + ram_size - 1, &cached);
|
||||
start + cur_alloc_size - 1, &cached);
|
||||
ret = PTR_ERR(em);
|
||||
goto out_reserve;
|
||||
}
|
||||
@ -1480,7 +1448,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
|
||||
1 << BTRFS_ORDERED_REGULAR);
|
||||
if (IS_ERR(ordered)) {
|
||||
unlock_extent(&inode->io_tree, start,
|
||||
start + ram_size - 1, &cached);
|
||||
start + cur_alloc_size - 1, &cached);
|
||||
ret = PTR_ERR(ordered);
|
||||
goto out_drop_extent_cache;
|
||||
}
|
||||
@ -1501,7 +1469,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
|
||||
*/
|
||||
if (ret)
|
||||
btrfs_drop_extent_map_range(inode, start,
|
||||
start + ram_size - 1,
|
||||
start + cur_alloc_size - 1,
|
||||
false);
|
||||
}
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
@ -1519,7 +1487,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
|
||||
page_ops = (keep_locked ? 0 : PAGE_UNLOCK);
|
||||
page_ops |= PAGE_SET_ORDERED;
|
||||
|
||||
extent_clear_unlock_delalloc(inode, start, start + ram_size - 1,
|
||||
extent_clear_unlock_delalloc(inode, start, start + cur_alloc_size - 1,
|
||||
locked_folio, &cached,
|
||||
EXTENT_LOCKED | EXTENT_DELALLOC,
|
||||
page_ops);
|
||||
@ -1529,7 +1497,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
|
||||
num_bytes -= cur_alloc_size;
|
||||
alloc_hint = ins.objectid + ins.offset;
|
||||
start += cur_alloc_size;
|
||||
extent_reserved = false;
|
||||
cur_alloc_size = 0;
|
||||
|
||||
/*
|
||||
* btrfs_reloc_clone_csums() error, since start is increased
|
||||
@ -1545,7 +1513,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
|
||||
return ret;
|
||||
|
||||
out_drop_extent_cache:
|
||||
btrfs_drop_extent_map_range(inode, start, start + ram_size - 1, false);
|
||||
btrfs_drop_extent_map_range(inode, start, start + cur_alloc_size - 1, false);
|
||||
out_reserve:
|
||||
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
|
||||
btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
|
||||
@ -1599,13 +1567,12 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
|
||||
* to decrement again the data space_info's bytes_may_use counter,
|
||||
* therefore we do not pass it the flag EXTENT_CLEAR_DATA_RESV.
|
||||
*/
|
||||
if (extent_reserved) {
|
||||
if (cur_alloc_size) {
|
||||
extent_clear_unlock_delalloc(inode, start,
|
||||
start + cur_alloc_size - 1,
|
||||
locked_folio, &cached, clear_bits,
|
||||
page_ops);
|
||||
btrfs_qgroup_free_data(inode, NULL, start, cur_alloc_size, NULL);
|
||||
start += cur_alloc_size;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1614,11 +1581,13 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
|
||||
* space_info's bytes_may_use counter, reserved in
|
||||
* btrfs_check_data_free_space().
|
||||
*/
|
||||
if (start < end) {
|
||||
if (start + cur_alloc_size < end) {
|
||||
clear_bits |= EXTENT_CLEAR_DATA_RESV;
|
||||
extent_clear_unlock_delalloc(inode, start, end, locked_folio,
|
||||
extent_clear_unlock_delalloc(inode, start + cur_alloc_size,
|
||||
end, locked_folio,
|
||||
&cached, clear_bits, page_ops);
|
||||
btrfs_qgroup_free_data(inode, NULL, start, end - start + 1, NULL);
|
||||
btrfs_qgroup_free_data(inode, NULL, start + cur_alloc_size,
|
||||
end - start - cur_alloc_size + 1, NULL);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
@ -3094,34 +3063,6 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
|
||||
BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */
|
||||
|
||||
btrfs_inode_safe_disk_i_size_write(inode, 0);
|
||||
if (freespace_inode)
|
||||
trans = btrfs_join_transaction_spacecache(root);
|
||||
else
|
||||
trans = btrfs_join_transaction(root);
|
||||
if (IS_ERR(trans)) {
|
||||
ret = PTR_ERR(trans);
|
||||
trans = NULL;
|
||||
goto out;
|
||||
}
|
||||
trans->block_rsv = &inode->block_rsv;
|
||||
ret = btrfs_update_inode_fallback(trans, inode);
|
||||
if (ret) /* -ENOMEM or corruption */
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
|
||||
ret = btrfs_insert_raid_extent(trans, ordered_extent);
|
||||
if (ret)
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
clear_bits |= EXTENT_LOCKED;
|
||||
lock_extent(io_tree, start, end, &cached_state);
|
||||
|
||||
if (freespace_inode)
|
||||
trans = btrfs_join_transaction_spacecache(root);
|
||||
else
|
||||
@ -3135,8 +3076,31 @@ int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)
|
||||
trans->block_rsv = &inode->block_rsv;
|
||||
|
||||
ret = btrfs_insert_raid_extent(trans, ordered_extent);
|
||||
if (ret)
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
|
||||
/* Logic error */
|
||||
ASSERT(list_empty(&ordered_extent->list));
|
||||
if (!list_empty(&ordered_extent->list)) {
|
||||
ret = -EINVAL;
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
btrfs_inode_safe_disk_i_size_write(inode, 0);
|
||||
ret = btrfs_update_inode_fallback(trans, inode);
|
||||
if (ret) {
|
||||
/* -ENOMEM or corruption */
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
clear_bits |= EXTENT_LOCKED;
|
||||
lock_extent(io_tree, start, end, &cached_state);
|
||||
|
||||
if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
|
||||
compress_type = ordered_extent->compress_type;
|
||||
@ -3791,14 +3755,45 @@ static int btrfs_init_file_extent_tree(struct btrfs_inode *inode)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int btrfs_add_inode_to_root(struct btrfs_inode *inode, bool prealloc)
|
||||
{
|
||||
struct btrfs_root *root = inode->root;
|
||||
struct btrfs_inode *existing;
|
||||
const u64 ino = btrfs_ino(inode);
|
||||
int ret;
|
||||
|
||||
if (inode_unhashed(&inode->vfs_inode))
|
||||
return 0;
|
||||
|
||||
if (prealloc) {
|
||||
ret = xa_reserve(&root->inodes, ino, GFP_NOFS);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
existing = xa_store(&root->inodes, ino, inode, GFP_ATOMIC);
|
||||
|
||||
if (xa_is_err(existing)) {
|
||||
ret = xa_err(existing);
|
||||
ASSERT(ret != -EINVAL);
|
||||
ASSERT(ret != -ENOMEM);
|
||||
return ret;
|
||||
} else if (existing) {
|
||||
WARN_ON(!(existing->vfs_inode.i_state & (I_WILL_FREE | I_FREEING)));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* read an inode from the btree into the in-memory inode
|
||||
* Read a locked inode from the btree into the in-memory inode and add it to
|
||||
* its root list/tree.
|
||||
*
|
||||
* On failure clean up the inode.
|
||||
*/
|
||||
static int btrfs_read_locked_inode(struct inode *inode,
|
||||
struct btrfs_path *in_path)
|
||||
static int btrfs_read_locked_inode(struct inode *inode, struct btrfs_path *path)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
|
||||
struct btrfs_path *path = in_path;
|
||||
struct extent_buffer *leaf;
|
||||
struct btrfs_inode_item *inode_item;
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
@ -3812,25 +3807,25 @@ static int btrfs_read_locked_inode(struct inode *inode,
|
||||
|
||||
ret = btrfs_init_file_extent_tree(BTRFS_I(inode));
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out;
|
||||
|
||||
ret = btrfs_fill_inode(inode, &rdev);
|
||||
if (!ret)
|
||||
filled = true;
|
||||
|
||||
if (!path) {
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
}
|
||||
ASSERT(path);
|
||||
|
||||
btrfs_get_inode_key(BTRFS_I(inode), &location);
|
||||
|
||||
ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
|
||||
if (ret) {
|
||||
if (path != in_path)
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
/*
|
||||
* ret > 0 can come from btrfs_search_slot called by
|
||||
* btrfs_lookup_inode(), this means the inode was not found.
|
||||
*/
|
||||
if (ret > 0)
|
||||
ret = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
leaf = path->nodes[0];
|
||||
@ -3965,8 +3960,6 @@ static int btrfs_read_locked_inode(struct inode *inode,
|
||||
btrfs_ino(BTRFS_I(inode)),
|
||||
btrfs_root_id(root), ret);
|
||||
}
|
||||
if (path != in_path)
|
||||
btrfs_free_path(path);
|
||||
|
||||
if (!maybe_acls)
|
||||
cache_no_acl(inode);
|
||||
@ -3993,7 +3986,15 @@ static int btrfs_read_locked_inode(struct inode *inode,
|
||||
}
|
||||
|
||||
btrfs_sync_inode_flags_to_i_flags(inode);
|
||||
|
||||
ret = btrfs_add_inode_to_root(BTRFS_I(inode), true);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
return 0;
|
||||
out:
|
||||
iget_failed(inode);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -5502,35 +5503,7 @@ static int fixup_tree_root_location(struct btrfs_fs_info *fs_info,
|
||||
return err;
|
||||
}
|
||||
|
||||
static int btrfs_add_inode_to_root(struct btrfs_inode *inode, bool prealloc)
|
||||
{
|
||||
struct btrfs_root *root = inode->root;
|
||||
struct btrfs_inode *existing;
|
||||
const u64 ino = btrfs_ino(inode);
|
||||
int ret;
|
||||
|
||||
if (inode_unhashed(&inode->vfs_inode))
|
||||
return 0;
|
||||
|
||||
if (prealloc) {
|
||||
ret = xa_reserve(&root->inodes, ino, GFP_NOFS);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
existing = xa_store(&root->inodes, ino, inode, GFP_ATOMIC);
|
||||
|
||||
if (xa_is_err(existing)) {
|
||||
ret = xa_err(existing);
|
||||
ASSERT(ret != -EINVAL);
|
||||
ASSERT(ret != -ENOMEM);
|
||||
return ret;
|
||||
} else if (existing) {
|
||||
WARN_ON(!(existing->vfs_inode.i_state & (I_WILL_FREE | I_FREEING)));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void btrfs_del_inode_from_root(struct btrfs_inode *inode)
|
||||
{
|
||||
@ -5592,10 +5565,8 @@ static struct inode *btrfs_iget_locked(u64 ino, struct btrfs_root *root)
|
||||
}
|
||||
|
||||
/*
|
||||
* Get an inode object given its inode number and corresponding root.
|
||||
* Path can be preallocated to prevent recursing back to iget through
|
||||
* allocator. NULL is also valid but may require an additional allocation
|
||||
* later.
|
||||
* Get an inode object given its inode number and corresponding root. Path is
|
||||
* preallocated to prevent recursing back to iget through allocator.
|
||||
*/
|
||||
struct inode *btrfs_iget_path(u64 ino, struct btrfs_root *root,
|
||||
struct btrfs_path *path)
|
||||
@ -5611,30 +5582,40 @@ struct inode *btrfs_iget_path(u64 ino, struct btrfs_root *root,
|
||||
return inode;
|
||||
|
||||
ret = btrfs_read_locked_inode(inode, path);
|
||||
/*
|
||||
* ret > 0 can come from btrfs_search_slot called by
|
||||
* btrfs_read_locked_inode(), this means the inode item was not found.
|
||||
*/
|
||||
if (ret > 0)
|
||||
ret = -ENOENT;
|
||||
if (ret < 0)
|
||||
goto error;
|
||||
|
||||
ret = btrfs_add_inode_to_root(BTRFS_I(inode), true);
|
||||
if (ret < 0)
|
||||
goto error;
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
unlock_new_inode(inode);
|
||||
|
||||
return inode;
|
||||
error:
|
||||
iget_failed(inode);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get an inode object given its inode number and corresponding root.
|
||||
*/
|
||||
struct inode *btrfs_iget(u64 ino, struct btrfs_root *root)
|
||||
{
|
||||
return btrfs_iget_path(ino, root, NULL);
|
||||
struct inode *inode;
|
||||
struct btrfs_path *path;
|
||||
int ret;
|
||||
|
||||
inode = btrfs_iget_locked(ino, root);
|
||||
if (!inode)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
if (!(inode->i_state & I_NEW))
|
||||
return inode;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
ret = btrfs_read_locked_inode(inode, path);
|
||||
btrfs_free_path(path);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
unlock_new_inode(inode);
|
||||
return inode;
|
||||
}
|
||||
|
||||
static struct inode *new_simple_dir(struct inode *dir,
|
||||
@ -6023,7 +6004,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
|
||||
* offset. This means that new entries created during readdir
|
||||
* are *guaranteed* to be seen in the future by that readdir.
|
||||
* This has broken buggy programs which operate on names as
|
||||
* they're returned by readdir. Until we re-use freed offsets
|
||||
* they're returned by readdir. Until we reuse freed offsets
|
||||
* we have this hack to stop new entries from being returned
|
||||
* under the assumption that they'll never reach this huge
|
||||
* offset.
|
||||
@ -6765,8 +6746,7 @@ static noinline int uncompress_inline(struct btrfs_path *path,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int read_inline_extent(struct btrfs_inode *inode, struct btrfs_path *path,
|
||||
struct folio *folio)
|
||||
static int read_inline_extent(struct btrfs_path *path, struct folio *folio)
|
||||
{
|
||||
struct btrfs_file_extent_item *fi;
|
||||
void *kaddr;
|
||||
@ -6964,7 +6944,7 @@ struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
|
||||
ASSERT(em->disk_bytenr == EXTENT_MAP_INLINE);
|
||||
ASSERT(em->len == fs_info->sectorsize);
|
||||
|
||||
ret = read_inline_extent(inode, path, folio);
|
||||
ret = read_inline_extent(path, folio);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
goto insert;
|
||||
@ -8972,28 +8952,6 @@ static int btrfs_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
|
||||
return finish_open_simple(file, ret);
|
||||
}
|
||||
|
||||
void btrfs_set_range_writeback(struct btrfs_inode *inode, u64 start, u64 end)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
unsigned long index = start >> PAGE_SHIFT;
|
||||
unsigned long end_index = end >> PAGE_SHIFT;
|
||||
struct folio *folio;
|
||||
u32 len;
|
||||
|
||||
ASSERT(end + 1 - start <= U32_MAX);
|
||||
len = end + 1 - start;
|
||||
while (index <= end_index) {
|
||||
folio = __filemap_get_folio(inode->vfs_inode.i_mapping, index, 0, 0);
|
||||
ASSERT(!IS_ERR(folio)); /* folios should be in the extent_io_tree */
|
||||
|
||||
/* This is for data, which doesn't yet support larger folio. */
|
||||
ASSERT(folio_order(folio) == 0);
|
||||
btrfs_folio_set_writeback(fs_info, folio, start, len);
|
||||
folio_put(folio);
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
int btrfs_encoded_io_compression_from_extent(struct btrfs_fs_info *fs_info,
|
||||
int compress_type)
|
||||
{
|
||||
@ -9038,12 +8996,16 @@ static ssize_t btrfs_encoded_read_inline(
|
||||
unsigned long ptr;
|
||||
void *tmp;
|
||||
ssize_t ret;
|
||||
const bool nowait = (iocb->ki_flags & IOCB_NOWAIT);
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
path->nowait = nowait;
|
||||
|
||||
ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode),
|
||||
extent_start, 0);
|
||||
if (ret) {
|
||||
@ -9107,6 +9069,7 @@ static ssize_t btrfs_encoded_read_inline(
|
||||
|
||||
struct btrfs_encoded_read_private {
|
||||
wait_queue_head_t wait;
|
||||
void *uring_ctx;
|
||||
atomic_t pending;
|
||||
blk_status_t status;
|
||||
};
|
||||
@ -9126,26 +9089,40 @@ static void btrfs_encoded_read_endio(struct btrfs_bio *bbio)
|
||||
*/
|
||||
WRITE_ONCE(priv->status, bbio->bio.bi_status);
|
||||
}
|
||||
if (!atomic_dec_return(&priv->pending))
|
||||
wake_up(&priv->wait);
|
||||
if (atomic_dec_return(&priv->pending) == 0) {
|
||||
int err = blk_status_to_errno(READ_ONCE(priv->status));
|
||||
|
||||
if (priv->uring_ctx) {
|
||||
btrfs_uring_read_extent_endio(priv->uring_ctx, err);
|
||||
kfree(priv);
|
||||
} else {
|
||||
wake_up(&priv->wait);
|
||||
}
|
||||
}
|
||||
bio_put(&bbio->bio);
|
||||
}
|
||||
|
||||
int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
|
||||
u64 file_offset, u64 disk_bytenr,
|
||||
u64 disk_io_size, struct page **pages)
|
||||
u64 disk_bytenr, u64 disk_io_size,
|
||||
struct page **pages, void *uring_ctx)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
struct btrfs_encoded_read_private priv = {
|
||||
.pending = ATOMIC_INIT(1),
|
||||
};
|
||||
struct btrfs_encoded_read_private *priv;
|
||||
unsigned long i = 0;
|
||||
struct btrfs_bio *bbio;
|
||||
int ret;
|
||||
|
||||
init_waitqueue_head(&priv.wait);
|
||||
priv = kmalloc(sizeof(struct btrfs_encoded_read_private), GFP_NOFS);
|
||||
if (!priv)
|
||||
return -ENOMEM;
|
||||
|
||||
init_waitqueue_head(&priv->wait);
|
||||
atomic_set(&priv->pending, 1);
|
||||
priv->status = 0;
|
||||
priv->uring_ctx = uring_ctx;
|
||||
|
||||
bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, fs_info,
|
||||
btrfs_encoded_read_endio, &priv);
|
||||
btrfs_encoded_read_endio, priv);
|
||||
bbio->bio.bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
|
||||
bbio->inode = inode;
|
||||
|
||||
@ -9153,11 +9130,11 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
|
||||
size_t bytes = min_t(u64, disk_io_size, PAGE_SIZE);
|
||||
|
||||
if (bio_add_page(&bbio->bio, pages[i], bytes, 0) < bytes) {
|
||||
atomic_inc(&priv.pending);
|
||||
atomic_inc(&priv->pending);
|
||||
btrfs_submit_bbio(bbio, 0);
|
||||
|
||||
bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, fs_info,
|
||||
btrfs_encoded_read_endio, &priv);
|
||||
btrfs_encoded_read_endio, priv);
|
||||
bbio->bio.bi_iter.bi_sector = disk_bytenr >> SECTOR_SHIFT;
|
||||
bbio->inode = inode;
|
||||
continue;
|
||||
@ -9168,22 +9145,33 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
|
||||
disk_io_size -= bytes;
|
||||
} while (disk_io_size);
|
||||
|
||||
atomic_inc(&priv.pending);
|
||||
atomic_inc(&priv->pending);
|
||||
btrfs_submit_bbio(bbio, 0);
|
||||
|
||||
if (atomic_dec_return(&priv.pending))
|
||||
io_wait_event(priv.wait, !atomic_read(&priv.pending));
|
||||
/* See btrfs_encoded_read_endio() for ordering. */
|
||||
return blk_status_to_errno(READ_ONCE(priv.status));
|
||||
if (uring_ctx) {
|
||||
if (atomic_dec_return(&priv->pending) == 0) {
|
||||
ret = blk_status_to_errno(READ_ONCE(priv->status));
|
||||
btrfs_uring_read_extent_endio(uring_ctx, ret);
|
||||
kfree(priv);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return -EIOCBQUEUED;
|
||||
} else {
|
||||
if (atomic_dec_return(&priv->pending) != 0)
|
||||
io_wait_event(priv->wait, !atomic_read(&priv->pending));
|
||||
/* See btrfs_encoded_read_endio() for ordering. */
|
||||
ret = blk_status_to_errno(READ_ONCE(priv->status));
|
||||
kfree(priv);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
static ssize_t btrfs_encoded_read_regular(struct kiocb *iocb,
|
||||
struct iov_iter *iter,
|
||||
u64 start, u64 lockend,
|
||||
struct extent_state **cached_state,
|
||||
u64 disk_bytenr, u64 disk_io_size,
|
||||
size_t count, bool compressed,
|
||||
bool *unlocked)
|
||||
ssize_t btrfs_encoded_read_regular(struct kiocb *iocb, struct iov_iter *iter,
|
||||
u64 start, u64 lockend,
|
||||
struct extent_state **cached_state,
|
||||
u64 disk_bytenr, u64 disk_io_size,
|
||||
size_t count, bool compressed, bool *unlocked)
|
||||
{
|
||||
struct btrfs_inode *inode = BTRFS_I(file_inode(iocb->ki_filp));
|
||||
struct extent_io_tree *io_tree = &inode->io_tree;
|
||||
@ -9203,8 +9191,8 @@ static ssize_t btrfs_encoded_read_regular(struct kiocb *iocb,
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_encoded_read_regular_fill_pages(inode, start, disk_bytenr,
|
||||
disk_io_size, pages);
|
||||
ret = btrfs_encoded_read_regular_fill_pages(inode, disk_bytenr,
|
||||
disk_io_size, pages, NULL);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@ -9244,21 +9232,26 @@ static ssize_t btrfs_encoded_read_regular(struct kiocb *iocb,
|
||||
}
|
||||
|
||||
ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
|
||||
struct btrfs_ioctl_encoded_io_args *encoded)
|
||||
struct btrfs_ioctl_encoded_io_args *encoded,
|
||||
struct extent_state **cached_state,
|
||||
u64 *disk_bytenr, u64 *disk_io_size)
|
||||
{
|
||||
struct btrfs_inode *inode = BTRFS_I(file_inode(iocb->ki_filp));
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
struct extent_io_tree *io_tree = &inode->io_tree;
|
||||
ssize_t ret;
|
||||
size_t count = iov_iter_count(iter);
|
||||
u64 start, lockend, disk_bytenr, disk_io_size;
|
||||
struct extent_state *cached_state = NULL;
|
||||
u64 start, lockend;
|
||||
struct extent_map *em;
|
||||
const bool nowait = (iocb->ki_flags & IOCB_NOWAIT);
|
||||
bool unlocked = false;
|
||||
|
||||
file_accessed(iocb->ki_filp);
|
||||
|
||||
btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED);
|
||||
ret = btrfs_inode_lock(inode,
|
||||
BTRFS_ILOCK_SHARED | (nowait ? BTRFS_ILOCK_TRY : 0));
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (iocb->ki_pos >= inode->vfs_inode.i_size) {
|
||||
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
|
||||
@ -9271,21 +9264,46 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
|
||||
*/
|
||||
lockend = start + BTRFS_MAX_UNCOMPRESSED - 1;
|
||||
|
||||
for (;;) {
|
||||
if (nowait) {
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
|
||||
ret = btrfs_wait_ordered_range(inode, start,
|
||||
lockend - start + 1);
|
||||
if (ret)
|
||||
if (filemap_range_needs_writeback(inode->vfs_inode.i_mapping,
|
||||
start, lockend)) {
|
||||
ret = -EAGAIN;
|
||||
goto out_unlock_inode;
|
||||
lock_extent(io_tree, start, lockend, &cached_state);
|
||||
}
|
||||
|
||||
if (!try_lock_extent(io_tree, start, lockend, cached_state)) {
|
||||
ret = -EAGAIN;
|
||||
goto out_unlock_inode;
|
||||
}
|
||||
|
||||
ordered = btrfs_lookup_ordered_range(inode, start,
|
||||
lockend - start + 1);
|
||||
if (!ordered)
|
||||
break;
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
unlock_extent(io_tree, start, lockend, &cached_state);
|
||||
cond_resched();
|
||||
if (ordered) {
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
unlock_extent(io_tree, start, lockend, cached_state);
|
||||
ret = -EAGAIN;
|
||||
goto out_unlock_inode;
|
||||
}
|
||||
} else {
|
||||
for (;;) {
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
|
||||
ret = btrfs_wait_ordered_range(inode, start,
|
||||
lockend - start + 1);
|
||||
if (ret)
|
||||
goto out_unlock_inode;
|
||||
|
||||
lock_extent(io_tree, start, lockend, cached_state);
|
||||
ordered = btrfs_lookup_ordered_range(inode, start,
|
||||
lockend - start + 1);
|
||||
if (!ordered)
|
||||
break;
|
||||
btrfs_put_ordered_extent(ordered);
|
||||
unlock_extent(io_tree, start, lockend, cached_state);
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
|
||||
em = btrfs_get_extent(inode, NULL, start, lockend - start + 1);
|
||||
@ -9304,9 +9322,9 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
|
||||
free_extent_map(em);
|
||||
em = NULL;
|
||||
ret = btrfs_encoded_read_inline(iocb, iter, start, lockend,
|
||||
&cached_state, extent_start,
|
||||
cached_state, extent_start,
|
||||
count, encoded, &unlocked);
|
||||
goto out;
|
||||
goto out_unlock_extent;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -9317,12 +9335,12 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
|
||||
inode->vfs_inode.i_size) - iocb->ki_pos;
|
||||
if (em->disk_bytenr == EXTENT_MAP_HOLE ||
|
||||
(em->flags & EXTENT_FLAG_PREALLOC)) {
|
||||
disk_bytenr = EXTENT_MAP_HOLE;
|
||||
*disk_bytenr = EXTENT_MAP_HOLE;
|
||||
count = min_t(u64, count, encoded->len);
|
||||
encoded->len = count;
|
||||
encoded->unencoded_len = count;
|
||||
} else if (extent_map_is_compressed(em)) {
|
||||
disk_bytenr = em->disk_bytenr;
|
||||
*disk_bytenr = em->disk_bytenr;
|
||||
/*
|
||||
* Bail if the buffer isn't large enough to return the whole
|
||||
* compressed extent.
|
||||
@ -9331,7 +9349,7 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
|
||||
ret = -ENOBUFS;
|
||||
goto out_em;
|
||||
}
|
||||
disk_io_size = em->disk_num_bytes;
|
||||
*disk_io_size = em->disk_num_bytes;
|
||||
count = em->disk_num_bytes;
|
||||
encoded->unencoded_len = em->ram_bytes;
|
||||
encoded->unencoded_offset = iocb->ki_pos - (em->start - em->offset);
|
||||
@ -9341,47 +9359,42 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
|
||||
goto out_em;
|
||||
encoded->compression = ret;
|
||||
} else {
|
||||
disk_bytenr = extent_map_block_start(em) + (start - em->start);
|
||||
*disk_bytenr = extent_map_block_start(em) + (start - em->start);
|
||||
if (encoded->len > count)
|
||||
encoded->len = count;
|
||||
/*
|
||||
* Don't read beyond what we locked. This also limits the page
|
||||
* allocations that we'll do.
|
||||
*/
|
||||
disk_io_size = min(lockend + 1, iocb->ki_pos + encoded->len) - start;
|
||||
count = start + disk_io_size - iocb->ki_pos;
|
||||
*disk_io_size = min(lockend + 1, iocb->ki_pos + encoded->len) - start;
|
||||
count = start + *disk_io_size - iocb->ki_pos;
|
||||
encoded->len = count;
|
||||
encoded->unencoded_len = count;
|
||||
disk_io_size = ALIGN(disk_io_size, fs_info->sectorsize);
|
||||
*disk_io_size = ALIGN(*disk_io_size, fs_info->sectorsize);
|
||||
}
|
||||
free_extent_map(em);
|
||||
em = NULL;
|
||||
|
||||
if (disk_bytenr == EXTENT_MAP_HOLE) {
|
||||
unlock_extent(io_tree, start, lockend, &cached_state);
|
||||
if (*disk_bytenr == EXTENT_MAP_HOLE) {
|
||||
unlock_extent(io_tree, start, lockend, cached_state);
|
||||
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
|
||||
unlocked = true;
|
||||
ret = iov_iter_zero(count, iter);
|
||||
if (ret != count)
|
||||
ret = -EFAULT;
|
||||
} else {
|
||||
ret = btrfs_encoded_read_regular(iocb, iter, start, lockend,
|
||||
&cached_state, disk_bytenr,
|
||||
disk_io_size, count,
|
||||
encoded->compression,
|
||||
&unlocked);
|
||||
ret = -EIOCBQUEUED;
|
||||
goto out_unlock_extent;
|
||||
}
|
||||
|
||||
out:
|
||||
if (ret >= 0)
|
||||
iocb->ki_pos += encoded->len;
|
||||
out_em:
|
||||
free_extent_map(em);
|
||||
out_unlock_extent:
|
||||
if (!unlocked)
|
||||
unlock_extent(io_tree, start, lockend, &cached_state);
|
||||
/* Leave inode and extent locked if we need to do a read. */
|
||||
if (!unlocked && ret != -EIOCBQUEUED)
|
||||
unlock_extent(io_tree, start, lockend, cached_state);
|
||||
out_unlock_inode:
|
||||
if (!unlocked)
|
||||
if (!unlocked && ret != -EIOCBQUEUED)
|
||||
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
|
||||
return ret;
|
||||
}
|
||||
@ -9492,7 +9505,7 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
|
||||
*/
|
||||
disk_num_bytes = ALIGN(orig_count, fs_info->sectorsize);
|
||||
nr_folios = DIV_ROUND_UP(disk_num_bytes, PAGE_SIZE);
|
||||
folios = kvcalloc(nr_folios, sizeof(struct page *), GFP_KERNEL_ACCOUNT);
|
||||
folios = kvcalloc(nr_folios, sizeof(struct folio *), GFP_KERNEL_ACCOUNT);
|
||||
if (!folios)
|
||||
return -ENOMEM;
|
||||
for (i = 0; i < nr_folios; i++) {
|
||||
@ -9556,7 +9569,7 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
|
||||
if (encoded->unencoded_len == encoded->len &&
|
||||
encoded->unencoded_offset == 0 &&
|
||||
can_cow_file_range_inline(inode, start, encoded->len, orig_count)) {
|
||||
ret = __cow_file_range_inline(inode, start, encoded->len,
|
||||
ret = __cow_file_range_inline(inode, encoded->len,
|
||||
orig_count, compression, folios[0],
|
||||
true);
|
||||
if (ret <= 0) {
|
||||
|
478
fs/btrfs/ioctl.c
478
fs/btrfs/ioctl.c
@ -29,6 +29,7 @@
|
||||
#include <linux/fileattr.h>
|
||||
#include <linux/fsverity.h>
|
||||
#include <linux/sched/xacct.h>
|
||||
#include <linux/io_uring/cmd.h>
|
||||
#include "ctree.h"
|
||||
#include "disk-io.h"
|
||||
#include "export.h"
|
||||
@ -1048,7 +1049,6 @@ static noinline int btrfs_mksnapshot(const struct path *parent,
|
||||
struct btrfs_qgroup_inherit *inherit)
|
||||
{
|
||||
int ret;
|
||||
bool snapshot_force_cow = false;
|
||||
|
||||
/*
|
||||
* Force new buffered writes to reserve space even when NOCOW is
|
||||
@ -1067,15 +1067,13 @@ static noinline int btrfs_mksnapshot(const struct path *parent,
|
||||
* creation.
|
||||
*/
|
||||
atomic_inc(&root->snapshot_force_cow);
|
||||
snapshot_force_cow = true;
|
||||
|
||||
btrfs_wait_ordered_extents(root, U64_MAX, NULL);
|
||||
|
||||
ret = btrfs_mksubvol(parent, idmap, name, namelen,
|
||||
root, readonly, inherit);
|
||||
atomic_dec(&root->snapshot_force_cow);
|
||||
out:
|
||||
if (snapshot_force_cow)
|
||||
atomic_dec(&root->snapshot_force_cow);
|
||||
btrfs_drew_read_unlock(&root->snapshot_lock);
|
||||
return ret;
|
||||
}
|
||||
@ -4057,8 +4055,7 @@ static long btrfs_ioctl_quota_rescan_status(struct btrfs_fs_info *fs_info,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long btrfs_ioctl_quota_rescan_wait(struct btrfs_fs_info *fs_info,
|
||||
void __user *arg)
|
||||
static long btrfs_ioctl_quota_rescan_wait(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
@ -4513,12 +4510,17 @@ static int btrfs_ioctl_encoded_read(struct file *file, void __user *argp,
|
||||
size_t copy_end_kernel = offsetofend(struct btrfs_ioctl_encoded_io_args,
|
||||
flags);
|
||||
size_t copy_end;
|
||||
struct btrfs_inode *inode = BTRFS_I(file_inode(file));
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
struct extent_io_tree *io_tree = &inode->io_tree;
|
||||
struct iovec iovstack[UIO_FASTIOV];
|
||||
struct iovec *iov = iovstack;
|
||||
struct iov_iter iter;
|
||||
loff_t pos;
|
||||
struct kiocb kiocb;
|
||||
ssize_t ret;
|
||||
u64 disk_bytenr, disk_io_size;
|
||||
struct extent_state *cached_state = NULL;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN)) {
|
||||
ret = -EPERM;
|
||||
@ -4571,7 +4573,32 @@ static int btrfs_ioctl_encoded_read(struct file *file, void __user *argp,
|
||||
init_sync_kiocb(&kiocb, file);
|
||||
kiocb.ki_pos = pos;
|
||||
|
||||
ret = btrfs_encoded_read(&kiocb, &iter, &args);
|
||||
ret = btrfs_encoded_read(&kiocb, &iter, &args, &cached_state,
|
||||
&disk_bytenr, &disk_io_size);
|
||||
|
||||
if (ret == -EIOCBQUEUED) {
|
||||
bool unlocked = false;
|
||||
u64 start, lockend, count;
|
||||
|
||||
start = ALIGN_DOWN(kiocb.ki_pos, fs_info->sectorsize);
|
||||
lockend = start + BTRFS_MAX_UNCOMPRESSED - 1;
|
||||
|
||||
if (args.compression)
|
||||
count = disk_io_size;
|
||||
else
|
||||
count = args.len;
|
||||
|
||||
ret = btrfs_encoded_read_regular(&kiocb, &iter, start, lockend,
|
||||
&cached_state, disk_bytenr,
|
||||
disk_io_size, count,
|
||||
args.compression, &unlocked);
|
||||
|
||||
if (!unlocked) {
|
||||
unlock_extent(io_tree, start, lockend, &cached_state);
|
||||
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
|
||||
}
|
||||
}
|
||||
|
||||
if (ret >= 0) {
|
||||
fsnotify_access(file);
|
||||
if (copy_to_user(argp + copy_end,
|
||||
@ -4689,6 +4716,439 @@ static int btrfs_ioctl_encoded_write(struct file *file, void __user *argp, bool
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Context that's attached to an encoded read io_uring command, in cmd->pdu. It
|
||||
* contains the fields in btrfs_uring_read_extent that are necessary to finish
|
||||
* off and cleanup the I/O in btrfs_uring_read_finished.
|
||||
*/
|
||||
struct btrfs_uring_priv {
|
||||
struct io_uring_cmd *cmd;
|
||||
struct page **pages;
|
||||
unsigned long nr_pages;
|
||||
struct kiocb iocb;
|
||||
struct iovec *iov;
|
||||
struct iov_iter iter;
|
||||
struct extent_state *cached_state;
|
||||
u64 count;
|
||||
u64 start;
|
||||
u64 lockend;
|
||||
int err;
|
||||
bool compressed;
|
||||
};
|
||||
|
||||
struct io_btrfs_cmd {
|
||||
struct btrfs_uring_priv *priv;
|
||||
};
|
||||
|
||||
static void btrfs_uring_read_finished(struct io_uring_cmd *cmd, unsigned int issue_flags)
|
||||
{
|
||||
struct io_btrfs_cmd *bc = io_uring_cmd_to_pdu(cmd, struct io_btrfs_cmd);
|
||||
struct btrfs_uring_priv *priv = bc->priv;
|
||||
struct btrfs_inode *inode = BTRFS_I(file_inode(priv->iocb.ki_filp));
|
||||
struct extent_io_tree *io_tree = &inode->io_tree;
|
||||
unsigned long index;
|
||||
u64 cur;
|
||||
size_t page_offset;
|
||||
ssize_t ret;
|
||||
|
||||
if (priv->err) {
|
||||
ret = priv->err;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (priv->compressed) {
|
||||
index = 0;
|
||||
page_offset = 0;
|
||||
} else {
|
||||
index = (priv->iocb.ki_pos - priv->start) >> PAGE_SHIFT;
|
||||
page_offset = offset_in_page(priv->iocb.ki_pos - priv->start);
|
||||
}
|
||||
cur = 0;
|
||||
while (cur < priv->count) {
|
||||
size_t bytes = min_t(size_t, priv->count - cur, PAGE_SIZE - page_offset);
|
||||
|
||||
if (copy_page_to_iter(priv->pages[index], page_offset, bytes,
|
||||
&priv->iter) != bytes) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
index++;
|
||||
cur += bytes;
|
||||
page_offset = 0;
|
||||
}
|
||||
ret = priv->count;
|
||||
|
||||
out:
|
||||
unlock_extent(io_tree, priv->start, priv->lockend, &priv->cached_state);
|
||||
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
|
||||
|
||||
io_uring_cmd_done(cmd, ret, 0, issue_flags);
|
||||
add_rchar(current, ret);
|
||||
|
||||
for (index = 0; index < priv->nr_pages; index++)
|
||||
__free_page(priv->pages[index]);
|
||||
|
||||
kfree(priv->pages);
|
||||
kfree(priv->iov);
|
||||
kfree(priv);
|
||||
}
|
||||
|
||||
void btrfs_uring_read_extent_endio(void *ctx, int err)
|
||||
{
|
||||
struct btrfs_uring_priv *priv = ctx;
|
||||
struct io_btrfs_cmd *bc = io_uring_cmd_to_pdu(priv->cmd, struct io_btrfs_cmd);
|
||||
|
||||
priv->err = err;
|
||||
bc->priv = priv;
|
||||
|
||||
io_uring_cmd_complete_in_task(priv->cmd, btrfs_uring_read_finished);
|
||||
}
|
||||
|
||||
static int btrfs_uring_read_extent(struct kiocb *iocb, struct iov_iter *iter,
|
||||
u64 start, u64 lockend,
|
||||
struct extent_state *cached_state,
|
||||
u64 disk_bytenr, u64 disk_io_size,
|
||||
size_t count, bool compressed,
|
||||
struct iovec *iov, struct io_uring_cmd *cmd)
|
||||
{
|
||||
struct btrfs_inode *inode = BTRFS_I(file_inode(iocb->ki_filp));
|
||||
struct extent_io_tree *io_tree = &inode->io_tree;
|
||||
struct page **pages;
|
||||
struct btrfs_uring_priv *priv = NULL;
|
||||
unsigned long nr_pages;
|
||||
int ret;
|
||||
|
||||
nr_pages = DIV_ROUND_UP(disk_io_size, PAGE_SIZE);
|
||||
pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
|
||||
if (!pages)
|
||||
return -ENOMEM;
|
||||
ret = btrfs_alloc_page_array(nr_pages, pages, 0);
|
||||
if (ret) {
|
||||
ret = -ENOMEM;
|
||||
goto out_fail;
|
||||
}
|
||||
|
||||
priv = kmalloc(sizeof(*priv), GFP_NOFS);
|
||||
if (!priv) {
|
||||
ret = -ENOMEM;
|
||||
goto out_fail;
|
||||
}
|
||||
|
||||
priv->iocb = *iocb;
|
||||
priv->iov = iov;
|
||||
priv->iter = *iter;
|
||||
priv->count = count;
|
||||
priv->cmd = cmd;
|
||||
priv->cached_state = cached_state;
|
||||
priv->compressed = compressed;
|
||||
priv->nr_pages = nr_pages;
|
||||
priv->pages = pages;
|
||||
priv->start = start;
|
||||
priv->lockend = lockend;
|
||||
priv->err = 0;
|
||||
|
||||
ret = btrfs_encoded_read_regular_fill_pages(inode, disk_bytenr,
|
||||
disk_io_size, pages, priv);
|
||||
if (ret && ret != -EIOCBQUEUED)
|
||||
goto out_fail;
|
||||
|
||||
/*
|
||||
* If we return -EIOCBQUEUED, we're deferring the cleanup to
|
||||
* btrfs_uring_read_finished(), which will handle unlocking the extent
|
||||
* and inode and freeing the allocations.
|
||||
*/
|
||||
|
||||
return -EIOCBQUEUED;
|
||||
|
||||
out_fail:
|
||||
unlock_extent(io_tree, start, lockend, &cached_state);
|
||||
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
|
||||
kfree(priv);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btrfs_uring_encoded_read(struct io_uring_cmd *cmd, unsigned int issue_flags)
|
||||
{
|
||||
size_t copy_end_kernel = offsetofend(struct btrfs_ioctl_encoded_io_args, flags);
|
||||
size_t copy_end;
|
||||
struct btrfs_ioctl_encoded_io_args args = { 0 };
|
||||
int ret;
|
||||
u64 disk_bytenr, disk_io_size;
|
||||
struct file *file;
|
||||
struct btrfs_inode *inode;
|
||||
struct btrfs_fs_info *fs_info;
|
||||
struct extent_io_tree *io_tree;
|
||||
struct iovec iovstack[UIO_FASTIOV];
|
||||
struct iovec *iov = iovstack;
|
||||
struct iov_iter iter;
|
||||
loff_t pos;
|
||||
struct kiocb kiocb;
|
||||
struct extent_state *cached_state = NULL;
|
||||
u64 start, lockend;
|
||||
void __user *sqe_addr;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN)) {
|
||||
ret = -EPERM;
|
||||
goto out_acct;
|
||||
}
|
||||
file = cmd->file;
|
||||
inode = BTRFS_I(file->f_inode);
|
||||
fs_info = inode->root->fs_info;
|
||||
io_tree = &inode->io_tree;
|
||||
sqe_addr = u64_to_user_ptr(READ_ONCE(cmd->sqe->addr));
|
||||
|
||||
if (issue_flags & IO_URING_F_COMPAT) {
|
||||
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
|
||||
struct btrfs_ioctl_encoded_io_args_32 args32;
|
||||
|
||||
copy_end = offsetofend(struct btrfs_ioctl_encoded_io_args_32, flags);
|
||||
if (copy_from_user(&args32, sqe_addr, copy_end)) {
|
||||
ret = -EFAULT;
|
||||
goto out_acct;
|
||||
}
|
||||
args.iov = compat_ptr(args32.iov);
|
||||
args.iovcnt = args32.iovcnt;
|
||||
args.offset = args32.offset;
|
||||
args.flags = args32.flags;
|
||||
#else
|
||||
return -ENOTTY;
|
||||
#endif
|
||||
} else {
|
||||
copy_end = copy_end_kernel;
|
||||
if (copy_from_user(&args, sqe_addr, copy_end)) {
|
||||
ret = -EFAULT;
|
||||
goto out_acct;
|
||||
}
|
||||
}
|
||||
|
||||
if (args.flags != 0)
|
||||
return -EINVAL;
|
||||
|
||||
ret = import_iovec(ITER_DEST, args.iov, args.iovcnt, ARRAY_SIZE(iovstack),
|
||||
&iov, &iter);
|
||||
if (ret < 0)
|
||||
goto out_acct;
|
||||
|
||||
if (iov_iter_count(&iter) == 0) {
|
||||
ret = 0;
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
pos = args.offset;
|
||||
ret = rw_verify_area(READ, file, &pos, args.len);
|
||||
if (ret < 0)
|
||||
goto out_free;
|
||||
|
||||
init_sync_kiocb(&kiocb, file);
|
||||
kiocb.ki_pos = pos;
|
||||
|
||||
if (issue_flags & IO_URING_F_NONBLOCK)
|
||||
kiocb.ki_flags |= IOCB_NOWAIT;
|
||||
|
||||
start = ALIGN_DOWN(pos, fs_info->sectorsize);
|
||||
lockend = start + BTRFS_MAX_UNCOMPRESSED - 1;
|
||||
|
||||
ret = btrfs_encoded_read(&kiocb, &iter, &args, &cached_state,
|
||||
&disk_bytenr, &disk_io_size);
|
||||
if (ret < 0 && ret != -EIOCBQUEUED)
|
||||
goto out_free;
|
||||
|
||||
file_accessed(file);
|
||||
|
||||
if (copy_to_user(sqe_addr + copy_end, (const char *)&args + copy_end_kernel,
|
||||
sizeof(args) - copy_end_kernel)) {
|
||||
if (ret == -EIOCBQUEUED) {
|
||||
unlock_extent(io_tree, start, lockend, &cached_state);
|
||||
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
|
||||
}
|
||||
ret = -EFAULT;
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
if (ret == -EIOCBQUEUED) {
|
||||
u64 count;
|
||||
|
||||
/*
|
||||
* If we've optimized things by storing the iovecs on the stack,
|
||||
* undo this.
|
||||
*/
|
||||
if (!iov) {
|
||||
iov = kmalloc(sizeof(struct iovec) * args.iovcnt, GFP_NOFS);
|
||||
if (!iov) {
|
||||
unlock_extent(io_tree, start, lockend, &cached_state);
|
||||
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
|
||||
ret = -ENOMEM;
|
||||
goto out_acct;
|
||||
}
|
||||
|
||||
memcpy(iov, iovstack, sizeof(struct iovec) * args.iovcnt);
|
||||
}
|
||||
|
||||
count = min_t(u64, iov_iter_count(&iter), disk_io_size);
|
||||
|
||||
/* Match ioctl by not returning past EOF if uncompressed. */
|
||||
if (!args.compression)
|
||||
count = min_t(u64, count, args.len);
|
||||
|
||||
ret = btrfs_uring_read_extent(&kiocb, &iter, start, lockend,
|
||||
cached_state, disk_bytenr,
|
||||
disk_io_size, count,
|
||||
args.compression, iov, cmd);
|
||||
|
||||
goto out_acct;
|
||||
}
|
||||
|
||||
out_free:
|
||||
kfree(iov);
|
||||
|
||||
out_acct:
|
||||
if (ret > 0)
|
||||
add_rchar(current, ret);
|
||||
inc_syscr(current);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
|
||||
{
|
||||
switch (cmd->cmd_op) {
|
||||
case BTRFS_IOC_ENCODED_READ:
|
||||
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
|
||||
case BTRFS_IOC_ENCODED_READ_32:
|
||||
#endif
|
||||
return btrfs_uring_encoded_read(cmd, issue_flags);
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int btrfs_ioctl_subvol_sync(struct btrfs_fs_info *fs_info, void __user *argp)
|
||||
{
|
||||
struct btrfs_root *root;
|
||||
struct btrfs_ioctl_subvol_wait args = { 0 };
|
||||
signed long sched_ret;
|
||||
int refs;
|
||||
u64 root_flags;
|
||||
bool wait_for_deletion = false;
|
||||
bool found = false;
|
||||
|
||||
if (copy_from_user(&args, argp, sizeof(args)))
|
||||
return -EFAULT;
|
||||
|
||||
switch (args.mode) {
|
||||
case BTRFS_SUBVOL_SYNC_WAIT_FOR_QUEUED:
|
||||
/*
|
||||
* Wait for the first one deleted that waits until all previous
|
||||
* are cleaned.
|
||||
*/
|
||||
spin_lock(&fs_info->trans_lock);
|
||||
if (!list_empty(&fs_info->dead_roots)) {
|
||||
root = list_last_entry(&fs_info->dead_roots,
|
||||
struct btrfs_root, root_list);
|
||||
args.subvolid = btrfs_root_id(root);
|
||||
found = true;
|
||||
}
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
if (!found)
|
||||
return -ENOENT;
|
||||
|
||||
fallthrough;
|
||||
case BTRFS_SUBVOL_SYNC_WAIT_FOR_ONE:
|
||||
if ((0 < args.subvolid && args.subvolid < BTRFS_FIRST_FREE_OBJECTID) ||
|
||||
BTRFS_LAST_FREE_OBJECTID < args.subvolid)
|
||||
return -EINVAL;
|
||||
break;
|
||||
case BTRFS_SUBVOL_SYNC_COUNT:
|
||||
spin_lock(&fs_info->trans_lock);
|
||||
args.count = list_count_nodes(&fs_info->dead_roots);
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
if (copy_to_user(argp, &args, sizeof(args)))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
case BTRFS_SUBVOL_SYNC_PEEK_FIRST:
|
||||
spin_lock(&fs_info->trans_lock);
|
||||
/* Last in the list was deleted first. */
|
||||
if (!list_empty(&fs_info->dead_roots)) {
|
||||
root = list_last_entry(&fs_info->dead_roots,
|
||||
struct btrfs_root, root_list);
|
||||
args.subvolid = btrfs_root_id(root);
|
||||
} else {
|
||||
args.subvolid = 0;
|
||||
}
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
if (copy_to_user(argp, &args, sizeof(args)))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
case BTRFS_SUBVOL_SYNC_PEEK_LAST:
|
||||
spin_lock(&fs_info->trans_lock);
|
||||
/* First in the list was deleted last. */
|
||||
if (!list_empty(&fs_info->dead_roots)) {
|
||||
root = list_first_entry(&fs_info->dead_roots,
|
||||
struct btrfs_root, root_list);
|
||||
args.subvolid = btrfs_root_id(root);
|
||||
} else {
|
||||
args.subvolid = 0;
|
||||
}
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
if (copy_to_user(argp, &args, sizeof(args)))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* 32bit limitation: fs_roots_radix key is not wide enough. */
|
||||
if (sizeof(unsigned long) != sizeof(u64) && args.subvolid > U32_MAX)
|
||||
return -EOVERFLOW;
|
||||
|
||||
while (1) {
|
||||
/* Wait for the specific one. */
|
||||
if (down_read_interruptible(&fs_info->subvol_sem) == -EINTR)
|
||||
return -EINTR;
|
||||
refs = -1;
|
||||
spin_lock(&fs_info->fs_roots_radix_lock);
|
||||
root = radix_tree_lookup(&fs_info->fs_roots_radix,
|
||||
(unsigned long)args.subvolid);
|
||||
if (root) {
|
||||
spin_lock(&root->root_item_lock);
|
||||
refs = btrfs_root_refs(&root->root_item);
|
||||
root_flags = btrfs_root_flags(&root->root_item);
|
||||
spin_unlock(&root->root_item_lock);
|
||||
}
|
||||
spin_unlock(&fs_info->fs_roots_radix_lock);
|
||||
up_read(&fs_info->subvol_sem);
|
||||
|
||||
/* Subvolume does not exist. */
|
||||
if (!root)
|
||||
return -ENOENT;
|
||||
|
||||
/* Subvolume not deleted at all. */
|
||||
if (refs > 0)
|
||||
return -EEXIST;
|
||||
/* We've waited and now the subvolume is gone. */
|
||||
if (wait_for_deletion && refs == -1) {
|
||||
/* Return the one we waited for as the last one. */
|
||||
if (copy_to_user(argp, &args, sizeof(args)))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Subvolume not found on the first try (deleted or never existed). */
|
||||
if (refs == -1)
|
||||
return -ENOENT;
|
||||
|
||||
wait_for_deletion = true;
|
||||
ASSERT(root_flags & BTRFS_ROOT_SUBVOL_DEAD);
|
||||
sched_ret = schedule_timeout_interruptible(HZ);
|
||||
/* Early wake up or error. */
|
||||
if (sched_ret != 0)
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
long btrfs_ioctl(struct file *file, unsigned int
|
||||
cmd, unsigned long arg)
|
||||
{
|
||||
@ -4811,7 +5271,7 @@ long btrfs_ioctl(struct file *file, unsigned int
|
||||
case BTRFS_IOC_QUOTA_RESCAN_STATUS:
|
||||
return btrfs_ioctl_quota_rescan_status(fs_info, argp);
|
||||
case BTRFS_IOC_QUOTA_RESCAN_WAIT:
|
||||
return btrfs_ioctl_quota_rescan_wait(fs_info, argp);
|
||||
return btrfs_ioctl_quota_rescan_wait(fs_info);
|
||||
case BTRFS_IOC_DEV_REPLACE:
|
||||
return btrfs_ioctl_dev_replace(fs_info, argp);
|
||||
case BTRFS_IOC_GET_SUPPORTED_FEATURES:
|
||||
@ -4840,6 +5300,8 @@ long btrfs_ioctl(struct file *file, unsigned int
|
||||
case BTRFS_IOC_ENCODED_WRITE_32:
|
||||
return btrfs_ioctl_encoded_write(file, argp, true);
|
||||
#endif
|
||||
case BTRFS_IOC_SUBVOL_SYNC_WAIT:
|
||||
return btrfs_ioctl_subvol_sync(fs_info, argp);
|
||||
}
|
||||
|
||||
return -ENOTTY;
|
||||
|
@ -22,5 +22,7 @@ void btrfs_sync_inode_flags_to_i_flags(struct inode *inode);
|
||||
int __pure btrfs_is_empty_uuid(const u8 *uuid);
|
||||
void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_ioctl_balance_args *bargs);
|
||||
int btrfs_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags);
|
||||
void btrfs_uring_read_extent_endio(void *ctx, int err);
|
||||
|
||||
#endif
|
||||
|
@ -161,21 +161,6 @@ int btrfs_try_tree_read_lock(struct extent_buffer *eb)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Try-lock for write.
|
||||
*
|
||||
* Return 1 if the rwlock has been taken, 0 otherwise
|
||||
*/
|
||||
int btrfs_try_tree_write_lock(struct extent_buffer *eb)
|
||||
{
|
||||
if (down_write_trylock(&eb->lock)) {
|
||||
btrfs_set_eb_lock_owner(eb, current->pid);
|
||||
trace_btrfs_try_tree_write_lock(eb);
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Release read lock.
|
||||
*/
|
||||
|
@ -180,7 +180,6 @@ static inline void btrfs_tree_read_lock(struct extent_buffer *eb)
|
||||
|
||||
void btrfs_tree_read_unlock(struct extent_buffer *eb);
|
||||
int btrfs_try_tree_read_lock(struct extent_buffer *eb);
|
||||
int btrfs_try_tree_write_lock(struct extent_buffer *eb);
|
||||
struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root);
|
||||
struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root);
|
||||
struct extent_buffer *btrfs_try_read_lock_root_node(struct btrfs_root *root);
|
||||
|
@ -80,7 +80,7 @@ void lzo_free_workspace(struct list_head *ws)
|
||||
kfree(workspace);
|
||||
}
|
||||
|
||||
struct list_head *lzo_alloc_workspace(unsigned int level)
|
||||
struct list_head *lzo_alloc_workspace(void)
|
||||
{
|
||||
struct workspace *workspace;
|
||||
|
||||
|
@ -226,8 +226,7 @@ static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
|
||||
return qgroup;
|
||||
}
|
||||
|
||||
static void __del_qgroup_rb(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_qgroup *qgroup)
|
||||
static void __del_qgroup_rb(struct btrfs_qgroup *qgroup)
|
||||
{
|
||||
struct btrfs_qgroup_list *list;
|
||||
|
||||
@ -258,7 +257,7 @@ static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
|
||||
return -ENOENT;
|
||||
|
||||
rb_erase(&qgroup->node, &fs_info->qgroup_tree);
|
||||
__del_qgroup_rb(fs_info, qgroup);
|
||||
__del_qgroup_rb(qgroup);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -469,7 +468,7 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info)
|
||||
/*
|
||||
* If a qgroup exists for a subvolume ID, it is possible
|
||||
* that subvolume has been deleted, in which case
|
||||
* re-using that ID would lead to incorrect accounting.
|
||||
* reusing that ID would lead to incorrect accounting.
|
||||
*
|
||||
* Ensure that we skip any such subvol ids.
|
||||
*
|
||||
@ -643,7 +642,7 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
|
||||
while ((n = rb_first(&fs_info->qgroup_tree))) {
|
||||
qgroup = rb_entry(n, struct btrfs_qgroup, node);
|
||||
rb_erase(n, &fs_info->qgroup_tree);
|
||||
__del_qgroup_rb(fs_info, qgroup);
|
||||
__del_qgroup_rb(qgroup);
|
||||
btrfs_sysfs_del_one_qgroup(fs_info, qgroup);
|
||||
kfree(qgroup);
|
||||
}
|
||||
@ -2001,27 +2000,27 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid,
|
||||
* Return <0 for insertion failure, caller can free @record safely.
|
||||
*/
|
||||
int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_delayed_ref_root *delayed_refs,
|
||||
struct btrfs_qgroup_extent_record *record)
|
||||
struct btrfs_delayed_ref_root *delayed_refs,
|
||||
struct btrfs_qgroup_extent_record *record,
|
||||
u64 bytenr)
|
||||
{
|
||||
struct btrfs_qgroup_extent_record *existing, *ret;
|
||||
const unsigned long index = (record->bytenr >> fs_info->sectorsize_bits);
|
||||
const unsigned long index = (bytenr >> fs_info->sectorsize_bits);
|
||||
|
||||
if (!btrfs_qgroup_full_accounting(fs_info))
|
||||
return 1;
|
||||
|
||||
#if BITS_PER_LONG == 32
|
||||
if (record->bytenr >= MAX_LFS_FILESIZE) {
|
||||
if (bytenr >= MAX_LFS_FILESIZE) {
|
||||
btrfs_err_rl(fs_info,
|
||||
"qgroup record for extent at %llu is beyond 32bit page cache and xarray index limit",
|
||||
record->bytenr);
|
||||
bytenr);
|
||||
btrfs_err_32bit_limit(fs_info);
|
||||
return -EOVERFLOW;
|
||||
}
|
||||
#endif
|
||||
|
||||
lockdep_assert_held(&delayed_refs->lock);
|
||||
trace_btrfs_qgroup_trace_extent(fs_info, record);
|
||||
trace_btrfs_qgroup_trace_extent(fs_info, record, bytenr);
|
||||
|
||||
xa_lock(&delayed_refs->dirty_extents);
|
||||
existing = xa_load(&delayed_refs->dirty_extents, index);
|
||||
@ -2066,12 +2065,17 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info,
|
||||
* transaction committing, but not now as qgroup accounting will be wrong again.
|
||||
*/
|
||||
int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_qgroup_extent_record *qrecord)
|
||||
struct btrfs_qgroup_extent_record *qrecord,
|
||||
u64 bytenr)
|
||||
{
|
||||
struct btrfs_backref_walk_ctx ctx = { 0 };
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
struct btrfs_backref_walk_ctx ctx = {
|
||||
.bytenr = bytenr,
|
||||
.fs_info = fs_info,
|
||||
};
|
||||
int ret;
|
||||
|
||||
if (!btrfs_qgroup_full_accounting(trans->fs_info))
|
||||
if (!btrfs_qgroup_full_accounting(fs_info))
|
||||
return 0;
|
||||
/*
|
||||
* We are always called in a context where we are already holding a
|
||||
@ -2094,16 +2098,13 @@ int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans,
|
||||
*/
|
||||
ASSERT(trans != NULL);
|
||||
|
||||
if (trans->fs_info->qgroup_flags & BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING)
|
||||
if (fs_info->qgroup_flags & BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING)
|
||||
return 0;
|
||||
|
||||
ctx.bytenr = qrecord->bytenr;
|
||||
ctx.fs_info = trans->fs_info;
|
||||
|
||||
ret = btrfs_find_all_roots(&ctx, true);
|
||||
if (ret < 0) {
|
||||
qgroup_mark_inconsistent(trans->fs_info);
|
||||
btrfs_warn(trans->fs_info,
|
||||
qgroup_mark_inconsistent(fs_info);
|
||||
btrfs_warn(fs_info,
|
||||
"error accounting new delayed refs extent (err code: %d), quota inconsistent",
|
||||
ret);
|
||||
return 0;
|
||||
@ -2138,7 +2139,7 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
struct btrfs_qgroup_extent_record *record;
|
||||
struct btrfs_delayed_ref_root *delayed_refs;
|
||||
struct btrfs_delayed_ref_root *delayed_refs = &trans->transaction->delayed_refs;
|
||||
const unsigned long index = (bytenr >> fs_info->sectorsize_bits);
|
||||
int ret;
|
||||
|
||||
@ -2148,26 +2149,21 @@ int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
|
||||
if (!record)
|
||||
return -ENOMEM;
|
||||
|
||||
if (xa_reserve(&trans->transaction->delayed_refs.dirty_extents, index, GFP_NOFS)) {
|
||||
if (xa_reserve(&delayed_refs->dirty_extents, index, GFP_NOFS)) {
|
||||
kfree(record);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
delayed_refs = &trans->transaction->delayed_refs;
|
||||
record->bytenr = bytenr;
|
||||
record->num_bytes = num_bytes;
|
||||
record->old_roots = NULL;
|
||||
|
||||
spin_lock(&delayed_refs->lock);
|
||||
ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record);
|
||||
spin_unlock(&delayed_refs->lock);
|
||||
ret = btrfs_qgroup_trace_extent_nolock(fs_info, delayed_refs, record, bytenr);
|
||||
if (ret) {
|
||||
/* Clean up if insertion fails or item exists. */
|
||||
xa_release(&delayed_refs->dirty_extents, index);
|
||||
kfree(record);
|
||||
return 0;
|
||||
}
|
||||
return btrfs_qgroup_trace_extent_post(trans, record);
|
||||
return btrfs_qgroup_trace_extent_post(trans, record, bytenr);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2652,7 +2648,6 @@ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans,
|
||||
|
||||
if (!extent_buffer_uptodate(root_eb)) {
|
||||
struct btrfs_tree_parent_check check = {
|
||||
.has_first_key = false,
|
||||
.transid = root_gen,
|
||||
.level = root_level
|
||||
};
|
||||
@ -3043,14 +3038,16 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
|
||||
delayed_refs = &trans->transaction->delayed_refs;
|
||||
qgroup_to_skip = delayed_refs->qgroup_to_skip;
|
||||
xa_for_each(&delayed_refs->dirty_extents, index, record) {
|
||||
const u64 bytenr = (((u64)index) << fs_info->sectorsize_bits);
|
||||
|
||||
num_dirty_extents++;
|
||||
trace_btrfs_qgroup_account_extents(fs_info, record);
|
||||
trace_btrfs_qgroup_account_extents(fs_info, record, bytenr);
|
||||
|
||||
if (!ret && !(fs_info->qgroup_flags &
|
||||
BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING)) {
|
||||
struct btrfs_backref_walk_ctx ctx = { 0 };
|
||||
|
||||
ctx.bytenr = record->bytenr;
|
||||
ctx.bytenr = bytenr;
|
||||
ctx.fs_info = fs_info;
|
||||
|
||||
/*
|
||||
@ -3092,7 +3089,7 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans)
|
||||
ulist_del(record->old_roots, qgroup_to_skip,
|
||||
0);
|
||||
}
|
||||
ret = btrfs_qgroup_account_extent(trans, record->bytenr,
|
||||
ret = btrfs_qgroup_account_extent(trans, bytenr,
|
||||
record->num_bytes,
|
||||
record->old_roots,
|
||||
new_roots);
|
||||
@ -4196,13 +4193,20 @@ static int try_flush_qgroup(struct btrfs_root *root)
|
||||
return 0;
|
||||
}
|
||||
|
||||
btrfs_run_delayed_iputs(root->fs_info);
|
||||
btrfs_wait_on_delayed_iputs(root->fs_info);
|
||||
ret = btrfs_start_delalloc_snapshot(root, true);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
btrfs_wait_ordered_extents(root, U64_MAX, NULL);
|
||||
|
||||
/*
|
||||
* After waiting for ordered extents run delayed iputs in order to free
|
||||
* space from unlinked files before committing the current transaction,
|
||||
* as ordered extents may have been holding the last reference of an
|
||||
* inode and they add a delayed iput when they complete.
|
||||
*/
|
||||
btrfs_run_delayed_iputs(root->fs_info);
|
||||
btrfs_wait_on_delayed_iputs(root->fs_info);
|
||||
|
||||
ret = btrfs_commit_current_transaction(root);
|
||||
out:
|
||||
clear_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state);
|
||||
@ -4687,8 +4691,7 @@ void btrfs_qgroup_clean_swapped_blocks(struct btrfs_root *root)
|
||||
* BOTH POINTERS ARE BEFORE TREE SWAP
|
||||
* @last_snapshot: last snapshot generation of the subvolume tree
|
||||
*/
|
||||
int btrfs_qgroup_add_swapped_blocks(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *subvol_root,
|
||||
int btrfs_qgroup_add_swapped_blocks(struct btrfs_root *subvol_root,
|
||||
struct btrfs_block_group *bg,
|
||||
struct extent_buffer *subvol_parent, int subvol_slot,
|
||||
struct extent_buffer *reloc_parent, int reloc_slot,
|
||||
@ -4894,17 +4897,6 @@ void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans)
|
||||
xa_destroy(&trans->delayed_refs.dirty_extents);
|
||||
}
|
||||
|
||||
void btrfs_free_squota_rsv(struct btrfs_fs_info *fs_info, u64 root, u64 rsv_bytes)
|
||||
{
|
||||
if (btrfs_qgroup_mode(fs_info) != BTRFS_QGROUP_MODE_SIMPLE)
|
||||
return;
|
||||
|
||||
if (!is_fstree(root))
|
||||
return;
|
||||
|
||||
btrfs_qgroup_free_refroot(fs_info, root, rsv_bytes, BTRFS_QGROUP_RSV_DATA);
|
||||
}
|
||||
|
||||
int btrfs_record_squota_delta(struct btrfs_fs_info *fs_info,
|
||||
const struct btrfs_squota_delta *delta)
|
||||
{
|
||||
|
@ -127,7 +127,12 @@ struct btrfs_inode;
|
||||
* Record a dirty extent, and info qgroup to update quota on it
|
||||
*/
|
||||
struct btrfs_qgroup_extent_record {
|
||||
u64 bytenr;
|
||||
/*
|
||||
* The bytenr of the extent is given by its index in the dirty_extents
|
||||
* xarray of struct btrfs_delayed_ref_root left shifted by
|
||||
* fs_info->sectorsize_bits.
|
||||
*/
|
||||
|
||||
u64 num_bytes;
|
||||
|
||||
/*
|
||||
@ -345,9 +350,11 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_qgroup_trace_extent_nolock(
|
||||
struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_delayed_ref_root *delayed_refs,
|
||||
struct btrfs_qgroup_extent_record *record);
|
||||
struct btrfs_qgroup_extent_record *record,
|
||||
u64 bytenr);
|
||||
int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_qgroup_extent_record *qrecord);
|
||||
struct btrfs_qgroup_extent_record *qrecord,
|
||||
u64 bytenr);
|
||||
int btrfs_qgroup_trace_extent(struct btrfs_trans_handle *trans, u64 bytenr,
|
||||
u64 num_bytes);
|
||||
int btrfs_qgroup_trace_leaf_items(struct btrfs_trans_handle *trans,
|
||||
@ -432,8 +439,7 @@ void btrfs_qgroup_init_swapped_blocks(
|
||||
struct btrfs_qgroup_swapped_blocks *swapped_blocks);
|
||||
|
||||
void btrfs_qgroup_clean_swapped_blocks(struct btrfs_root *root);
|
||||
int btrfs_qgroup_add_swapped_blocks(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *subvol_root,
|
||||
int btrfs_qgroup_add_swapped_blocks(struct btrfs_root *subvol_root,
|
||||
struct btrfs_block_group *bg,
|
||||
struct extent_buffer *subvol_parent, int subvol_slot,
|
||||
struct extent_buffer *reloc_parent, int reloc_slot,
|
||||
@ -442,7 +448,6 @@ int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, struct extent_buffer *eb);
|
||||
void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans);
|
||||
bool btrfs_check_quota_leak(const struct btrfs_fs_info *fs_info);
|
||||
void btrfs_free_squota_rsv(struct btrfs_fs_info *fs_info, u64 root, u64 rsv_bytes);
|
||||
int btrfs_record_squota_delta(struct btrfs_fs_info *fs_info,
|
||||
const struct btrfs_squota_delta *delta);
|
||||
|
||||
|
@ -13,6 +13,39 @@
|
||||
#include "volumes.h"
|
||||
#include "print-tree.h"
|
||||
|
||||
static void btrfs_partially_delete_raid_extent(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_path *path,
|
||||
const struct btrfs_key *oldkey,
|
||||
u64 newlen, u64 frontpad)
|
||||
{
|
||||
struct btrfs_stripe_extent *extent;
|
||||
struct extent_buffer *leaf;
|
||||
int slot;
|
||||
size_t item_size;
|
||||
struct btrfs_key newkey = {
|
||||
.objectid = oldkey->objectid + frontpad,
|
||||
.type = BTRFS_RAID_STRIPE_KEY,
|
||||
.offset = newlen,
|
||||
};
|
||||
|
||||
ASSERT(oldkey->type == BTRFS_RAID_STRIPE_KEY);
|
||||
|
||||
leaf = path->nodes[0];
|
||||
slot = path->slots[0];
|
||||
item_size = btrfs_item_size(leaf, slot);
|
||||
extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent);
|
||||
|
||||
for (int i = 0; i < btrfs_num_raid_stripes(item_size); i++) {
|
||||
struct btrfs_raid_stride *stride = &extent->strides[i];
|
||||
u64 phys;
|
||||
|
||||
phys = btrfs_raid_stride_physical(leaf, stride);
|
||||
btrfs_set_raid_stride_physical(leaf, stride, phys + frontpad);
|
||||
}
|
||||
|
||||
btrfs_set_item_key_safe(trans, path, &newkey);
|
||||
}
|
||||
|
||||
int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 length)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
@ -36,23 +69,24 @@ int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 le
|
||||
while (1) {
|
||||
key.objectid = start;
|
||||
key.type = BTRFS_RAID_STRIPE_KEY;
|
||||
key.offset = length;
|
||||
key.offset = 0;
|
||||
|
||||
ret = btrfs_search_slot(trans, stripe_root, &key, path, -1, 1);
|
||||
if (ret < 0)
|
||||
break;
|
||||
if (ret > 0) {
|
||||
ret = 0;
|
||||
if (path->slots[0] == 0)
|
||||
break;
|
||||
|
||||
if (path->slots[0] == btrfs_header_nritems(path->nodes[0]))
|
||||
path->slots[0]--;
|
||||
}
|
||||
|
||||
leaf = path->nodes[0];
|
||||
slot = path->slots[0];
|
||||
btrfs_item_key_to_cpu(leaf, &key, slot);
|
||||
found_start = key.objectid;
|
||||
found_end = found_start + key.offset;
|
||||
ret = 0;
|
||||
|
||||
if (key.type != BTRFS_RAID_STRIPE_KEY)
|
||||
break;
|
||||
|
||||
/* That stripe ends before we start, we're done. */
|
||||
if (found_end <= start)
|
||||
@ -61,7 +95,40 @@ int btrfs_delete_raid_extent(struct btrfs_trans_handle *trans, u64 start, u64 le
|
||||
trace_btrfs_raid_extent_delete(fs_info, start, end,
|
||||
found_start, found_end);
|
||||
|
||||
ASSERT(found_start >= start && found_end <= end);
|
||||
/*
|
||||
* The stripe extent starts before the range we want to delete:
|
||||
*
|
||||
* |--- RAID Stripe Extent ---|
|
||||
* |--- keep ---|--- drop ---|
|
||||
*
|
||||
* This means we have to duplicate the tree item, truncate the
|
||||
* length to the new size and then re-insert the item.
|
||||
*/
|
||||
if (found_start < start) {
|
||||
u64 diff = start - found_start;
|
||||
|
||||
btrfs_partially_delete_raid_extent(trans, path, &key,
|
||||
diff, 0);
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* The stripe extent ends after the range we want to delete:
|
||||
*
|
||||
* |--- RAID Stripe Extent ---|
|
||||
* |--- drop ---|--- keep ---|
|
||||
*
|
||||
* This means we have to duplicate the tree item, truncate the
|
||||
* length to the new size and then re-insert the item.
|
||||
*/
|
||||
if (found_end > end) {
|
||||
u64 diff = found_end - end;
|
||||
|
||||
btrfs_partially_delete_raid_extent(trans, path, &key,
|
||||
diff, diff);
|
||||
break;
|
||||
}
|
||||
|
||||
ret = btrfs_del_item(trans, stripe_root, path);
|
||||
if (ret)
|
||||
break;
|
||||
@ -108,8 +175,9 @@ static int update_raid_extent_item(struct btrfs_trans_handle *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btrfs_insert_one_raid_extent(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_io_context *bioc)
|
||||
EXPORT_FOR_TESTS
|
||||
int btrfs_insert_one_raid_extent(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_io_context *bioc)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
struct btrfs_key stripe_key;
|
||||
@ -233,7 +301,7 @@ int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
|
||||
found_end = found_logical + found_length;
|
||||
|
||||
if (found_logical > end) {
|
||||
ret = -ENOENT;
|
||||
ret = -ENODATA;
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -279,10 +347,10 @@ int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
|
||||
/* If we're here, we haven't found the requested devid in the stripe. */
|
||||
ret = -ENOENT;
|
||||
ret = -ENODATA;
|
||||
out:
|
||||
if (ret > 0)
|
||||
ret = -ENOENT;
|
||||
ret = -ENODATA;
|
||||
if (ret && ret != -EIO && !stripe->rst_search_commit_root) {
|
||||
btrfs_debug(fs_info,
|
||||
"cannot find raid-stripe for logical [%llu, %llu] devid %llu, profile %s",
|
||||
|
@ -28,6 +28,11 @@ int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
|
||||
int btrfs_insert_raid_extent(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_ordered_extent *ordered_extent);
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
int btrfs_insert_one_raid_extent(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_io_context *bioc);
|
||||
#endif
|
||||
|
||||
static inline bool btrfs_need_stripe_tree_update(struct btrfs_fs_info *fs_info,
|
||||
u64 map_type)
|
||||
{
|
||||
|
@ -1272,8 +1272,7 @@ static inline void bio_list_put(struct bio_list *bio_list)
|
||||
|
||||
static void assert_rbio(struct btrfs_raid_bio *rbio)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_BTRFS_DEBUG) ||
|
||||
!IS_ENABLED(CONFIG_BTRFS_ASSERT))
|
||||
if (!IS_ENABLED(CONFIG_BTRFS_ASSERT))
|
||||
return;
|
||||
|
||||
/*
|
||||
|
@ -1244,7 +1244,7 @@ int replace_path(struct btrfs_trans_handle *trans, struct reloc_control *rc,
|
||||
* The real subtree rescan is delayed until we have new
|
||||
* CoW on the subtree root node before transaction commit.
|
||||
*/
|
||||
ret = btrfs_qgroup_add_swapped_blocks(trans, dest,
|
||||
ret = btrfs_qgroup_add_swapped_blocks(dest,
|
||||
rc->block_group, parent, slot,
|
||||
path->nodes[level], path->slots[level],
|
||||
last_snapshot);
|
||||
|
@ -1656,8 +1656,7 @@ static u32 stripe_length(const struct scrub_stripe *stripe)
|
||||
stripe->bg->start + stripe->bg->length - stripe->logical);
|
||||
}
|
||||
|
||||
static void scrub_submit_extent_sector_read(struct scrub_ctx *sctx,
|
||||
struct scrub_stripe *stripe)
|
||||
static void scrub_submit_extent_sector_read(struct scrub_stripe *stripe)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
|
||||
struct btrfs_bio *bbio = NULL;
|
||||
@ -1704,8 +1703,18 @@ static void scrub_submit_extent_sector_read(struct scrub_ctx *sctx,
|
||||
&stripe_len, &bioc, &io_stripe, &mirror);
|
||||
btrfs_put_bioc(bioc);
|
||||
if (err < 0) {
|
||||
set_bit(i, &stripe->io_error_bitmap);
|
||||
set_bit(i, &stripe->error_bitmap);
|
||||
if (err != -ENODATA) {
|
||||
/*
|
||||
* Earlier btrfs_get_raid_extent_offset()
|
||||
* returned -ENODATA, which means there's
|
||||
* no entry for the corresponding range
|
||||
* in the stripe tree. But if it's in
|
||||
* the extent tree, then it's a preallocated
|
||||
* extent and not an error.
|
||||
*/
|
||||
set_bit(i, &stripe->io_error_bitmap);
|
||||
set_bit(i, &stripe->error_bitmap);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -1743,7 +1752,7 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx,
|
||||
ASSERT(test_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &stripe->state));
|
||||
|
||||
if (btrfs_need_stripe_tree_update(fs_info, stripe->bg->flags)) {
|
||||
scrub_submit_extent_sector_read(sctx, stripe);
|
||||
scrub_submit_extent_sector_read(stripe);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1954,7 +1963,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
|
||||
ASSERT(sctx->raid56_data_stripes);
|
||||
|
||||
/*
|
||||
* For data stripe search, we cannot re-use the same extent/csum paths,
|
||||
* For data stripe search, we cannot reuse the same extent/csum paths,
|
||||
* as the data stripe bytenr may be smaller than previous extent. Thus
|
||||
* we have to use our own extent/csum paths.
|
||||
*/
|
||||
@ -2103,7 +2112,6 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
|
||||
*/
|
||||
static int scrub_simple_mirror(struct scrub_ctx *sctx,
|
||||
struct btrfs_block_group *bg,
|
||||
struct btrfs_chunk_map *map,
|
||||
u64 logical_start, u64 logical_length,
|
||||
struct btrfs_device *device,
|
||||
u64 physical, int mirror_num)
|
||||
@ -2222,7 +2230,7 @@ static int scrub_simple_stripe(struct scrub_ctx *sctx,
|
||||
* just RAID1, so we can reuse scrub_simple_mirror() to scrub
|
||||
* this stripe.
|
||||
*/
|
||||
ret = scrub_simple_mirror(sctx, bg, map, cur_logical,
|
||||
ret = scrub_simple_mirror(sctx, bg, cur_logical,
|
||||
BTRFS_STRIPE_LEN, device, cur_physical,
|
||||
mirror_num);
|
||||
if (ret)
|
||||
@ -2256,7 +2264,6 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
|
||||
/* Offset inside the chunk */
|
||||
u64 offset;
|
||||
u64 stripe_logical;
|
||||
int stop_loop = 0;
|
||||
|
||||
/* Extent_path should be released by now. */
|
||||
ASSERT(sctx->extent_path.nodes[0] == NULL);
|
||||
@ -2307,7 +2314,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
|
||||
* Only @physical and @mirror_num needs to calculated using
|
||||
* @stripe_index.
|
||||
*/
|
||||
ret = scrub_simple_mirror(sctx, bg, map, bg->start, bg->length,
|
||||
ret = scrub_simple_mirror(sctx, bg, bg->start, bg->length,
|
||||
scrub_dev, map->stripes[stripe_index].physical,
|
||||
stripe_index + 1);
|
||||
offset = 0;
|
||||
@ -2362,7 +2369,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
|
||||
* We can reuse scrub_simple_mirror() here, as the repair part
|
||||
* is still based on @mirror_num.
|
||||
*/
|
||||
ret = scrub_simple_mirror(sctx, bg, map, logical, BTRFS_STRIPE_LEN,
|
||||
ret = scrub_simple_mirror(sctx, bg, logical, BTRFS_STRIPE_LEN,
|
||||
scrub_dev, physical, 1);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
@ -2370,14 +2377,8 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
|
||||
logical += increment;
|
||||
physical += BTRFS_STRIPE_LEN;
|
||||
spin_lock(&sctx->stat_lock);
|
||||
if (stop_loop)
|
||||
sctx->stat.last_physical =
|
||||
map->stripes[stripe_index].physical + dev_stripe_len;
|
||||
else
|
||||
sctx->stat.last_physical = physical;
|
||||
sctx->stat.last_physical = physical;
|
||||
spin_unlock(&sctx->stat_lock);
|
||||
if (stop_loop)
|
||||
break;
|
||||
}
|
||||
out:
|
||||
ret2 = flush_scrub_stripes(sctx);
|
||||
|
@ -980,9 +980,7 @@ static int get_inode_gen(struct btrfs_root *root, u64 ino, u64 *gen)
|
||||
return ret;
|
||||
}
|
||||
|
||||
typedef int (*iterate_inode_ref_t)(int num, u64 dir, int index,
|
||||
struct fs_path *p,
|
||||
void *ctx);
|
||||
typedef int (*iterate_inode_ref_t)(u64 dir, struct fs_path *p, void *ctx);
|
||||
|
||||
/*
|
||||
* Helper function to iterate the entries in ONE btrfs_inode_ref or
|
||||
@ -1007,8 +1005,6 @@ static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path,
|
||||
u32 name_len;
|
||||
char *start;
|
||||
int ret = 0;
|
||||
int num = 0;
|
||||
int index;
|
||||
u64 dir;
|
||||
unsigned long name_off;
|
||||
unsigned long elem_size;
|
||||
@ -1043,13 +1039,11 @@ static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path,
|
||||
iref = (struct btrfs_inode_ref *)(ptr + cur);
|
||||
name_len = btrfs_inode_ref_name_len(eb, iref);
|
||||
name_off = (unsigned long)(iref + 1);
|
||||
index = btrfs_inode_ref_index(eb, iref);
|
||||
dir = found_key->offset;
|
||||
} else {
|
||||
extref = (struct btrfs_inode_extref *)(ptr + cur);
|
||||
name_len = btrfs_inode_extref_name_len(eb, extref);
|
||||
name_off = (unsigned long)&extref->name;
|
||||
index = btrfs_inode_extref_index(eb, extref);
|
||||
dir = btrfs_inode_extref_parent(eb, extref);
|
||||
}
|
||||
|
||||
@ -1094,10 +1088,9 @@ static int iterate_inode_ref(struct btrfs_root *root, struct btrfs_path *path,
|
||||
}
|
||||
|
||||
cur += elem_size + name_len;
|
||||
ret = iterate(num, dir, index, p, ctx);
|
||||
ret = iterate(dir, p, ctx);
|
||||
if (ret)
|
||||
goto out;
|
||||
num++;
|
||||
}
|
||||
|
||||
out:
|
||||
@ -1227,8 +1220,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __copy_first_ref(int num, u64 dir, int index,
|
||||
struct fs_path *p, void *ctx)
|
||||
static int __copy_first_ref(u64 dir, struct fs_path *p, void *ctx)
|
||||
{
|
||||
int ret;
|
||||
struct fs_path *pt = ctx;
|
||||
@ -3768,7 +3760,6 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx,
|
||||
struct recorded_ref *parent_ref,
|
||||
const bool is_orphan)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = sctx->parent_root->fs_info;
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_key key;
|
||||
struct btrfs_key di_key;
|
||||
@ -3797,7 +3788,7 @@ static int wait_for_dest_dir_move(struct send_ctx *sctx,
|
||||
goto out;
|
||||
}
|
||||
|
||||
di = btrfs_match_dir_item_name(fs_info, path, parent_ref->name,
|
||||
di = btrfs_match_dir_item_name(path, parent_ref->name,
|
||||
parent_ref->name_len);
|
||||
if (!di) {
|
||||
ret = 0;
|
||||
@ -4708,8 +4699,7 @@ static int record_ref_in_tree(struct rb_root *root, struct list_head *refs,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int record_new_ref_if_needed(int num, u64 dir, int index,
|
||||
struct fs_path *name, void *ctx)
|
||||
static int record_new_ref_if_needed(u64 dir, struct fs_path *name, void *ctx)
|
||||
{
|
||||
int ret = 0;
|
||||
struct send_ctx *sctx = ctx;
|
||||
@ -4738,8 +4728,7 @@ static int record_new_ref_if_needed(int num, u64 dir, int index,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int record_deleted_ref_if_needed(int num, u64 dir, int index,
|
||||
struct fs_path *name, void *ctx)
|
||||
static int record_deleted_ref_if_needed(u64 dir, struct fs_path *name, void *ctx)
|
||||
{
|
||||
int ret = 0;
|
||||
struct send_ctx *sctx = ctx;
|
||||
@ -5677,10 +5666,11 @@ static int send_encoded_extent(struct send_ctx *sctx, struct btrfs_path *path,
|
||||
* Note that send_buf is a mapping of send_buf_pages, so this is really
|
||||
* reading into send_buf.
|
||||
*/
|
||||
ret = btrfs_encoded_read_regular_fill_pages(BTRFS_I(inode), offset,
|
||||
ret = btrfs_encoded_read_regular_fill_pages(BTRFS_I(inode),
|
||||
disk_bytenr, disk_num_bytes,
|
||||
sctx->send_buf_pages +
|
||||
(data_offset >> PAGE_SHIFT));
|
||||
(data_offset >> PAGE_SHIFT),
|
||||
NULL);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@ -8135,7 +8125,20 @@ long btrfs_ioctl_send(struct btrfs_inode *inode, const struct btrfs_ioctl_send_a
|
||||
* making it RW. This also protects against deletion.
|
||||
*/
|
||||
spin_lock(&send_root->root_item_lock);
|
||||
if (btrfs_root_readonly(send_root) && send_root->dedupe_in_progress) {
|
||||
/*
|
||||
* Unlikely but possible, if the subvolume is marked for deletion but
|
||||
* is slow to remove the directory entry, send can still be started.
|
||||
*/
|
||||
if (btrfs_root_dead(send_root)) {
|
||||
spin_unlock(&send_root->root_item_lock);
|
||||
return -EPERM;
|
||||
}
|
||||
/* Userspace tools do the checks and warn the user if it's not RO. */
|
||||
if (!btrfs_root_readonly(send_root)) {
|
||||
spin_unlock(&send_root->root_item_lock);
|
||||
return -EPERM;
|
||||
}
|
||||
if (send_root->dedupe_in_progress) {
|
||||
dedupe_in_progress_warn(send_root);
|
||||
spin_unlock(&send_root->root_item_lock);
|
||||
return -EAGAIN;
|
||||
@ -8143,15 +8146,6 @@ long btrfs_ioctl_send(struct btrfs_inode *inode, const struct btrfs_ioctl_send_a
|
||||
send_root->send_in_progress++;
|
||||
spin_unlock(&send_root->root_item_lock);
|
||||
|
||||
/*
|
||||
* Userspace tools do the checks and warn the user if it's
|
||||
* not RO.
|
||||
*/
|
||||
if (!btrfs_root_readonly(send_root)) {
|
||||
ret = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that we don't overflow at later allocations, we request
|
||||
* clone_sources_count + 1 items, and compare to unsigned long inside
|
||||
@ -8217,15 +8211,6 @@ long btrfs_ioctl_send(struct btrfs_inode *inode, const struct btrfs_ioctl_send_a
|
||||
}
|
||||
|
||||
sctx->send_root = send_root;
|
||||
/*
|
||||
* Unlikely but possible, if the subvolume is marked for deletion but
|
||||
* is slow to remove the directory entry, send can still be started
|
||||
*/
|
||||
if (btrfs_root_dead(sctx->send_root)) {
|
||||
ret = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
sctx->clone_roots_cnt = arg->clone_sources_count;
|
||||
|
||||
if (sctx->proto >= 2) {
|
||||
|
@ -16,7 +16,7 @@ struct btrfs_ioctl_send_args;
|
||||
|
||||
#define BTRFS_SEND_STREAM_MAGIC "btrfs-stream"
|
||||
/* Conditional support for the upcoming protocol version. */
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
#ifdef CONFIG_BTRFS_EXPERIMENTAL
|
||||
#define BTRFS_SEND_STREAM_VERSION 3
|
||||
#else
|
||||
#define BTRFS_SEND_STREAM_VERSION 2
|
||||
|
@ -1279,7 +1279,7 @@ static void btrfs_preempt_reclaim_metadata_space(struct work_struct *work)
|
||||
* If we are freeing inodes, we want to make sure all delayed iputs have
|
||||
* completed, because they could have been on an inode with i_nlink == 0, and
|
||||
* thus have been truncated and freed up space. But again this space is not
|
||||
* immediately re-usable, it comes in the form of a delayed ref, which must be
|
||||
* immediately reusable, it comes in the form of a delayed ref, which must be
|
||||
* run and then the transaction must be committed.
|
||||
*
|
||||
* COMMIT_TRANS
|
||||
@ -1488,8 +1488,7 @@ static void priority_reclaim_data_space(struct btrfs_fs_info *fs_info,
|
||||
spin_unlock(&space_info->lock);
|
||||
}
|
||||
|
||||
static void wait_reserve_ticket(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_space_info *space_info,
|
||||
static void wait_reserve_ticket(struct btrfs_space_info *space_info,
|
||||
struct reserve_ticket *ticket)
|
||||
|
||||
{
|
||||
@ -1547,7 +1546,7 @@ static int handle_reserve_ticket(struct btrfs_fs_info *fs_info,
|
||||
case BTRFS_RESERVE_FLUSH_DATA:
|
||||
case BTRFS_RESERVE_FLUSH_ALL:
|
||||
case BTRFS_RESERVE_FLUSH_ALL_STEAL:
|
||||
wait_reserve_ticket(fs_info, space_info, ticket);
|
||||
wait_reserve_ticket(space_info, ticket);
|
||||
break;
|
||||
case BTRFS_RESERVE_FLUSH_LIMIT:
|
||||
priority_reclaim_metadata_space(fs_info, space_info, ticket,
|
||||
@ -1984,8 +1983,7 @@ static bool is_reclaim_urgent(struct btrfs_space_info *space_info)
|
||||
return unalloc < data_chunk_size;
|
||||
}
|
||||
|
||||
static void do_reclaim_sweep(const struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_space_info *space_info, int raid)
|
||||
static void do_reclaim_sweep(struct btrfs_space_info *space_info, int raid)
|
||||
{
|
||||
struct btrfs_block_group *bg;
|
||||
int thresh_pct;
|
||||
@ -2081,6 +2079,6 @@ void btrfs_reclaim_sweep(const struct btrfs_fs_info *fs_info)
|
||||
if (!btrfs_should_periodic_reclaim(space_info))
|
||||
continue;
|
||||
for (raid = 0; raid < BTRFS_NR_RAID_TYPES; raid++)
|
||||
do_reclaim_sweep(fs_info, space_info, raid);
|
||||
do_reclaim_sweep(space_info, raid);
|
||||
}
|
||||
}
|
||||
|
@ -140,12 +140,10 @@ struct btrfs_subpage *btrfs_alloc_subpage(const struct btrfs_fs_info *fs_info,
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
spin_lock_init(&ret->lock);
|
||||
if (type == BTRFS_SUBPAGE_METADATA) {
|
||||
if (type == BTRFS_SUBPAGE_METADATA)
|
||||
atomic_set(&ret->eb_refs, 0);
|
||||
} else {
|
||||
atomic_set(&ret->readers, 0);
|
||||
atomic_set(&ret->writers, 0);
|
||||
}
|
||||
else
|
||||
atomic_set(&ret->nr_locked, 0);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -221,62 +219,6 @@ static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info,
|
||||
__start_bit; \
|
||||
})
|
||||
|
||||
void btrfs_subpage_start_reader(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, u64 start, u32 len)
|
||||
{
|
||||
struct btrfs_subpage *subpage = folio_get_private(folio);
|
||||
const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len);
|
||||
const int nbits = len >> fs_info->sectorsize_bits;
|
||||
unsigned long flags;
|
||||
|
||||
|
||||
btrfs_subpage_assert(fs_info, folio, start, len);
|
||||
|
||||
spin_lock_irqsave(&subpage->lock, flags);
|
||||
/*
|
||||
* Even though it's just for reading the page, no one should have
|
||||
* locked the subpage range.
|
||||
*/
|
||||
ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits));
|
||||
bitmap_set(subpage->bitmaps, start_bit, nbits);
|
||||
atomic_add(nbits, &subpage->readers);
|
||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||
}
|
||||
|
||||
void btrfs_subpage_end_reader(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, u64 start, u32 len)
|
||||
{
|
||||
struct btrfs_subpage *subpage = folio_get_private(folio);
|
||||
const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len);
|
||||
const int nbits = len >> fs_info->sectorsize_bits;
|
||||
unsigned long flags;
|
||||
bool is_data;
|
||||
bool last;
|
||||
|
||||
btrfs_subpage_assert(fs_info, folio, start, len);
|
||||
is_data = is_data_inode(BTRFS_I(folio->mapping->host));
|
||||
|
||||
spin_lock_irqsave(&subpage->lock, flags);
|
||||
|
||||
/* The range should have already been locked. */
|
||||
ASSERT(bitmap_test_range_all_set(subpage->bitmaps, start_bit, nbits));
|
||||
ASSERT(atomic_read(&subpage->readers) >= nbits);
|
||||
|
||||
bitmap_clear(subpage->bitmaps, start_bit, nbits);
|
||||
last = atomic_sub_and_test(nbits, &subpage->readers);
|
||||
|
||||
/*
|
||||
* For data we need to unlock the page if the last read has finished.
|
||||
*
|
||||
* And please don't replace @last with atomic_sub_and_test() call
|
||||
* inside if () condition.
|
||||
* As we want the atomic_sub_and_test() to be always executed.
|
||||
*/
|
||||
if (is_data && last)
|
||||
folio_unlock(folio);
|
||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||
}
|
||||
|
||||
static void btrfs_subpage_clamp_range(struct folio *folio, u64 *start, u32 *len)
|
||||
{
|
||||
u64 orig_start = *start;
|
||||
@ -295,28 +237,8 @@ static void btrfs_subpage_clamp_range(struct folio *folio, u64 *start, u32 *len)
|
||||
orig_start + orig_len) - *start;
|
||||
}
|
||||
|
||||
static void btrfs_subpage_start_writer(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, u64 start, u32 len)
|
||||
{
|
||||
struct btrfs_subpage *subpage = folio_get_private(folio);
|
||||
const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len);
|
||||
const int nbits = (len >> fs_info->sectorsize_bits);
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
btrfs_subpage_assert(fs_info, folio, start, len);
|
||||
|
||||
spin_lock_irqsave(&subpage->lock, flags);
|
||||
ASSERT(atomic_read(&subpage->readers) == 0);
|
||||
ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits));
|
||||
bitmap_set(subpage->bitmaps, start_bit, nbits);
|
||||
ret = atomic_add_return(nbits, &subpage->writers);
|
||||
ASSERT(ret == nbits);
|
||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||
}
|
||||
|
||||
static bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, u64 start, u32 len)
|
||||
static bool btrfs_subpage_end_and_test_lock(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, u64 start, u32 len)
|
||||
{
|
||||
struct btrfs_subpage *subpage = folio_get_private(folio);
|
||||
const int start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len);
|
||||
@ -334,9 +256,9 @@ static bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_inf
|
||||
* extent_clear_unlock_delalloc() for compression path.
|
||||
*
|
||||
* This @locked_page is locked by plain lock_page(), thus its
|
||||
* subpage::writers is 0. Handle them in a special way.
|
||||
* subpage::locked is 0. Handle them in a special way.
|
||||
*/
|
||||
if (atomic_read(&subpage->writers) == 0) {
|
||||
if (atomic_read(&subpage->nr_locked) == 0) {
|
||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||
return true;
|
||||
}
|
||||
@ -345,39 +267,12 @@ static bool btrfs_subpage_end_and_test_writer(const struct btrfs_fs_info *fs_inf
|
||||
clear_bit(bit, subpage->bitmaps);
|
||||
cleared++;
|
||||
}
|
||||
ASSERT(atomic_read(&subpage->writers) >= cleared);
|
||||
last = atomic_sub_and_test(cleared, &subpage->writers);
|
||||
ASSERT(atomic_read(&subpage->nr_locked) >= cleared);
|
||||
last = atomic_sub_and_test(cleared, &subpage->nr_locked);
|
||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||
return last;
|
||||
}
|
||||
|
||||
/*
|
||||
* Lock a folio for delalloc page writeback.
|
||||
*
|
||||
* Return -EAGAIN if the page is not properly initialized.
|
||||
* Return 0 with the page locked, and writer counter updated.
|
||||
*
|
||||
* Even with 0 returned, the page still need extra check to make sure
|
||||
* it's really the correct page, as the caller is using
|
||||
* filemap_get_folios_contig(), which can race with page invalidating.
|
||||
*/
|
||||
int btrfs_folio_start_writer_lock(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, u64 start, u32 len)
|
||||
{
|
||||
if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio->mapping)) {
|
||||
folio_lock(folio);
|
||||
return 0;
|
||||
}
|
||||
folio_lock(folio);
|
||||
if (!folio_test_private(folio) || !folio_get_private(folio)) {
|
||||
folio_unlock(folio);
|
||||
return -EAGAIN;
|
||||
}
|
||||
btrfs_subpage_clamp_range(folio, &start, &len);
|
||||
btrfs_subpage_start_writer(fs_info, folio, start, len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle different locked folios:
|
||||
*
|
||||
@ -394,8 +289,8 @@ int btrfs_folio_start_writer_lock(const struct btrfs_fs_info *fs_info,
|
||||
* bitmap, reduce the writer lock number, and unlock the page if that's
|
||||
* the last locked range.
|
||||
*/
|
||||
void btrfs_folio_end_writer_lock(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, u64 start, u32 len)
|
||||
void btrfs_folio_end_lock(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, u64 start, u32 len)
|
||||
{
|
||||
struct btrfs_subpage *subpage = folio_get_private(folio);
|
||||
|
||||
@ -408,24 +303,24 @@ void btrfs_folio_end_writer_lock(const struct btrfs_fs_info *fs_info,
|
||||
|
||||
/*
|
||||
* For subpage case, there are two types of locked page. With or
|
||||
* without writers number.
|
||||
* without locked number.
|
||||
*
|
||||
* Since we own the page lock, no one else could touch subpage::writers
|
||||
* Since we own the page lock, no one else could touch subpage::locked
|
||||
* and we are safe to do several atomic operations without spinlock.
|
||||
*/
|
||||
if (atomic_read(&subpage->writers) == 0) {
|
||||
/* No writers, locked by plain lock_page(). */
|
||||
if (atomic_read(&subpage->nr_locked) == 0) {
|
||||
/* No subpage lock, locked by plain lock_page(). */
|
||||
folio_unlock(folio);
|
||||
return;
|
||||
}
|
||||
|
||||
btrfs_subpage_clamp_range(folio, &start, &len);
|
||||
if (btrfs_subpage_end_and_test_writer(fs_info, folio, start, len))
|
||||
if (btrfs_subpage_end_and_test_lock(fs_info, folio, start, len))
|
||||
folio_unlock(folio);
|
||||
}
|
||||
|
||||
void btrfs_folio_end_writer_lock_bitmap(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, unsigned long bitmap)
|
||||
void btrfs_folio_end_lock_bitmap(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, unsigned long bitmap)
|
||||
{
|
||||
struct btrfs_subpage *subpage = folio_get_private(folio);
|
||||
const int start_bit = fs_info->sectors_per_page * btrfs_bitmap_nr_locked;
|
||||
@ -434,13 +329,13 @@ void btrfs_folio_end_writer_lock_bitmap(const struct btrfs_fs_info *fs_info,
|
||||
int cleared = 0;
|
||||
int bit;
|
||||
|
||||
if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio->mapping)) {
|
||||
if (!btrfs_is_subpage(fs_info, folio->mapping)) {
|
||||
folio_unlock(folio);
|
||||
return;
|
||||
}
|
||||
|
||||
if (atomic_read(&subpage->writers) == 0) {
|
||||
/* No writers, locked by plain lock_page(). */
|
||||
if (atomic_read(&subpage->nr_locked) == 0) {
|
||||
/* No subpage lock, locked by plain lock_page(). */
|
||||
folio_unlock(folio);
|
||||
return;
|
||||
}
|
||||
@ -450,8 +345,8 @@ void btrfs_folio_end_writer_lock_bitmap(const struct btrfs_fs_info *fs_info,
|
||||
if (test_and_clear_bit(bit + start_bit, subpage->bitmaps))
|
||||
cleared++;
|
||||
}
|
||||
ASSERT(atomic_read(&subpage->writers) >= cleared);
|
||||
last = atomic_sub_and_test(cleared, &subpage->writers);
|
||||
ASSERT(atomic_read(&subpage->nr_locked) >= cleared);
|
||||
last = atomic_sub_and_test(cleared, &subpage->nr_locked);
|
||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||
if (last)
|
||||
folio_unlock(folio);
|
||||
@ -776,8 +671,8 @@ void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info,
|
||||
* This populates the involved subpage ranges so that subpage helpers can
|
||||
* properly unlock them.
|
||||
*/
|
||||
void btrfs_folio_set_writer_lock(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, u64 start, u32 len)
|
||||
void btrfs_folio_set_lock(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, u64 start, u32 len)
|
||||
{
|
||||
struct btrfs_subpage *subpage;
|
||||
unsigned long flags;
|
||||
@ -796,58 +691,11 @@ void btrfs_folio_set_writer_lock(const struct btrfs_fs_info *fs_info,
|
||||
/* Target range should not yet be locked. */
|
||||
ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits));
|
||||
bitmap_set(subpage->bitmaps, start_bit, nbits);
|
||||
ret = atomic_add_return(nbits, &subpage->writers);
|
||||
ret = atomic_add_return(nbits, &subpage->nr_locked);
|
||||
ASSERT(ret <= fs_info->sectors_per_page);
|
||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Find any subpage writer locked range inside @folio, starting at file offset
|
||||
* @search_start. The caller should ensure the folio is locked.
|
||||
*
|
||||
* Return true and update @found_start_ret and @found_len_ret to the first
|
||||
* writer locked range.
|
||||
* Return false if there is no writer locked range.
|
||||
*/
|
||||
bool btrfs_subpage_find_writer_locked(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, u64 search_start,
|
||||
u64 *found_start_ret, u32 *found_len_ret)
|
||||
{
|
||||
struct btrfs_subpage *subpage = folio_get_private(folio);
|
||||
const u32 sectors_per_page = fs_info->sectors_per_page;
|
||||
const unsigned int len = PAGE_SIZE - offset_in_page(search_start);
|
||||
const unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
|
||||
locked, search_start, len);
|
||||
const unsigned int locked_bitmap_start = sectors_per_page * btrfs_bitmap_nr_locked;
|
||||
const unsigned int locked_bitmap_end = locked_bitmap_start + sectors_per_page;
|
||||
unsigned long flags;
|
||||
int first_zero;
|
||||
int first_set;
|
||||
bool found = false;
|
||||
|
||||
ASSERT(folio_test_locked(folio));
|
||||
spin_lock_irqsave(&subpage->lock, flags);
|
||||
first_set = find_next_bit(subpage->bitmaps, locked_bitmap_end, start_bit);
|
||||
if (first_set >= locked_bitmap_end)
|
||||
goto out;
|
||||
|
||||
found = true;
|
||||
|
||||
*found_start_ret = folio_pos(folio) +
|
||||
((first_set - locked_bitmap_start) << fs_info->sectorsize_bits);
|
||||
/*
|
||||
* Since @first_set is ensured to be smaller than locked_bitmap_end
|
||||
* here, @found_start_ret should be inside the folio.
|
||||
*/
|
||||
ASSERT(*found_start_ret < folio_pos(folio) + PAGE_SIZE);
|
||||
|
||||
first_zero = find_next_zero_bit(subpage->bitmaps, locked_bitmap_end, first_set);
|
||||
*found_len_ret = (first_zero - first_set) << fs_info->sectorsize_bits;
|
||||
out:
|
||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||
return found;
|
||||
}
|
||||
|
||||
#define GET_SUBPAGE_BITMAP(subpage, fs_info, name, dst) \
|
||||
{ \
|
||||
const int sectors_per_page = fs_info->sectors_per_page; \
|
||||
|
@ -45,14 +45,6 @@ enum {
|
||||
struct btrfs_subpage {
|
||||
/* Common members for both data and metadata pages */
|
||||
spinlock_t lock;
|
||||
/*
|
||||
* Both data and metadata needs to track how many readers are for the
|
||||
* page.
|
||||
* Data relies on @readers to unlock the page when last reader finished.
|
||||
* While metadata doesn't need page unlock, it needs to prevent
|
||||
* page::private get cleared before the last end_page_read().
|
||||
*/
|
||||
atomic_t readers;
|
||||
union {
|
||||
/*
|
||||
* Structures only used by metadata
|
||||
@ -62,8 +54,12 @@ struct btrfs_subpage {
|
||||
*/
|
||||
atomic_t eb_refs;
|
||||
|
||||
/* Structures only used by data */
|
||||
atomic_t writers;
|
||||
/*
|
||||
* Structures only used by data,
|
||||
*
|
||||
* How many sectors inside the page is locked.
|
||||
*/
|
||||
atomic_t nr_locked;
|
||||
};
|
||||
unsigned long bitmaps[];
|
||||
};
|
||||
@ -95,23 +91,12 @@ void btrfs_free_subpage(struct btrfs_subpage *subpage);
|
||||
void btrfs_folio_inc_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio);
|
||||
void btrfs_folio_dec_eb_refs(const struct btrfs_fs_info *fs_info, struct folio *folio);
|
||||
|
||||
void btrfs_subpage_start_reader(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, u64 start, u32 len);
|
||||
void btrfs_subpage_end_reader(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, u64 start, u32 len);
|
||||
|
||||
int btrfs_folio_start_writer_lock(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, u64 start, u32 len);
|
||||
void btrfs_folio_end_writer_lock(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, u64 start, u32 len);
|
||||
void btrfs_folio_set_writer_lock(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, u64 start, u32 len);
|
||||
void btrfs_folio_end_writer_lock_bitmap(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, unsigned long bitmap);
|
||||
bool btrfs_subpage_find_writer_locked(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, u64 search_start,
|
||||
u64 *found_start_ret, u32 *found_len_ret);
|
||||
|
||||
void btrfs_folio_end_lock(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, u64 start, u32 len);
|
||||
void btrfs_folio_set_lock(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, u64 start, u32 len);
|
||||
void btrfs_folio_end_lock_bitmap(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, unsigned long bitmap);
|
||||
/*
|
||||
* Template for subpage related operations.
|
||||
*
|
||||
|
@ -28,7 +28,6 @@
|
||||
#include <linux/btrfs.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/fs_parser.h>
|
||||
#include <linux/swap.h>
|
||||
#include "messages.h"
|
||||
#include "delayed-inode.h"
|
||||
#include "ctree.h"
|
||||
@ -946,8 +945,7 @@ static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objec
|
||||
}
|
||||
|
||||
static int btrfs_fill_super(struct super_block *sb,
|
||||
struct btrfs_fs_devices *fs_devices,
|
||||
void *data)
|
||||
struct btrfs_fs_devices *fs_devices)
|
||||
{
|
||||
struct inode *inode;
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
|
||||
@ -971,7 +969,7 @@ static int btrfs_fill_super(struct super_block *sb,
|
||||
return err;
|
||||
}
|
||||
|
||||
err = open_ctree(sb, fs_devices, (char *)data);
|
||||
err = open_ctree(sb, fs_devices);
|
||||
if (err) {
|
||||
btrfs_err(fs_info, "open_ctree failed");
|
||||
return err;
|
||||
@ -1893,7 +1891,7 @@ static int btrfs_get_tree_super(struct fs_context *fc)
|
||||
snprintf(sb->s_id, sizeof(sb->s_id), "%pg", bdev);
|
||||
shrinker_debugfs_rename(sb->s_shrink, "sb-btrfs:%s", sb->s_id);
|
||||
btrfs_sb(sb)->bdev_holder = &btrfs_fs_type;
|
||||
ret = btrfs_fill_super(sb, fs_devices, NULL);
|
||||
ret = btrfs_fill_super(sb, fs_devices);
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
@ -2257,7 +2255,10 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
|
||||
device = btrfs_scan_one_device(vol->name, BLK_OPEN_READ, false);
|
||||
if (IS_ERR_OR_NULL(device)) {
|
||||
mutex_unlock(&uuid_mutex);
|
||||
ret = PTR_ERR(device);
|
||||
if (IS_ERR(device))
|
||||
ret = PTR_ERR(device);
|
||||
else
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
ret = !(device->fs_devices->num_devices ==
|
||||
@ -2396,13 +2397,7 @@ static long btrfs_nr_cached_objects(struct super_block *sb, struct shrink_contro
|
||||
|
||||
trace_btrfs_extent_map_shrinker_count(fs_info, nr);
|
||||
|
||||
/*
|
||||
* Only report the real number for DEBUG builds, as there are reports of
|
||||
* serious performance degradation caused by too frequent shrinks.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_BTRFS_DEBUG))
|
||||
return nr;
|
||||
return 0;
|
||||
return nr;
|
||||
}
|
||||
|
||||
static long btrfs_free_cached_objects(struct super_block *sb, struct shrink_control *sc)
|
||||
@ -2410,16 +2405,10 @@ static long btrfs_free_cached_objects(struct super_block *sb, struct shrink_cont
|
||||
const long nr_to_scan = min_t(unsigned long, LONG_MAX, sc->nr_to_scan);
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
|
||||
|
||||
/*
|
||||
* We may be called from any task trying to allocate memory and we don't
|
||||
* want to slow it down with scanning and dropping extent maps. It would
|
||||
* also cause heavy lock contention if many tasks concurrently enter
|
||||
* here. Therefore only allow kswapd tasks to scan and drop extent maps.
|
||||
*/
|
||||
if (!current_is_kswapd())
|
||||
return 0;
|
||||
btrfs_free_extent_maps(fs_info, nr_to_scan);
|
||||
|
||||
return btrfs_free_extent_maps(fs_info, nr_to_scan);
|
||||
/* The extent map shrinker runs asynchronously, so always return 0. */
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct super_operations btrfs_super_ops = {
|
||||
|
@ -1390,7 +1390,7 @@ static ssize_t btrfs_bg_reclaim_threshold_store(struct kobject *kobj,
|
||||
BTRFS_ATTR_RW(, bg_reclaim_threshold, btrfs_bg_reclaim_threshold_show,
|
||||
btrfs_bg_reclaim_threshold_store);
|
||||
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
#ifdef CONFIG_BTRFS_EXPERIMENTAL
|
||||
static ssize_t btrfs_offload_csum_show(struct kobject *kobj,
|
||||
struct kobj_attribute *a, char *buf)
|
||||
{
|
||||
@ -1450,7 +1450,7 @@ static const struct attribute *btrfs_attrs[] = {
|
||||
BTRFS_ATTR_PTR(, bg_reclaim_threshold),
|
||||
BTRFS_ATTR_PTR(, commit_stats),
|
||||
BTRFS_ATTR_PTR(, temp_fsid),
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
#ifdef CONFIG_BTRFS_EXPERIMENTAL
|
||||
BTRFS_ATTR_PTR(, offload_csum),
|
||||
#endif
|
||||
NULL,
|
||||
|
@ -29,6 +29,7 @@ const char *test_error[] = {
|
||||
[TEST_ALLOC_BLOCK_GROUP] = "cannot allocate block group",
|
||||
[TEST_ALLOC_EXTENT_MAP] = "cannot allocate extent map",
|
||||
[TEST_ALLOC_CHUNK_MAP] = "cannot allocate chunk map",
|
||||
[TEST_ALLOC_IO_CONTEXT] = "cannot allocate io context",
|
||||
};
|
||||
|
||||
static const struct super_operations btrfs_test_super_ops = {
|
||||
@ -291,6 +292,9 @@ int btrfs_run_sanity_tests(void)
|
||||
ret = btrfs_test_free_space_tree(sectorsize, nodesize);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = btrfs_test_raid_stripe_tree(sectorsize, nodesize);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
ret = btrfs_test_extent_map();
|
||||
|
@ -24,6 +24,7 @@ enum {
|
||||
TEST_ALLOC_BLOCK_GROUP,
|
||||
TEST_ALLOC_EXTENT_MAP,
|
||||
TEST_ALLOC_CHUNK_MAP,
|
||||
TEST_ALLOC_IO_CONTEXT,
|
||||
};
|
||||
|
||||
extern const char *test_error[];
|
||||
@ -37,6 +38,7 @@ int btrfs_test_extent_io(u32 sectorsize, u32 nodesize);
|
||||
int btrfs_test_inodes(u32 sectorsize, u32 nodesize);
|
||||
int btrfs_test_qgroups(u32 sectorsize, u32 nodesize);
|
||||
int btrfs_test_free_space_tree(u32 sectorsize, u32 nodesize);
|
||||
int btrfs_test_raid_stripe_tree(u32 sectorsize, u32 nodesize);
|
||||
int btrfs_test_extent_map(void);
|
||||
struct inode *btrfs_new_test_inode(void);
|
||||
struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 nodesize, u32 sectorsize);
|
||||
|
538
fs/btrfs/tests/raid-stripe-tree-tests.c
Normal file
538
fs/btrfs/tests/raid-stripe-tree-tests.c
Normal file
@ -0,0 +1,538 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2024 Western Digital Corporation or its affiliates.
|
||||
*/
|
||||
|
||||
#include <linux/sizes.h>
|
||||
#include "../fs.h"
|
||||
#include "../disk-io.h"
|
||||
#include "../transaction.h"
|
||||
#include "../volumes.h"
|
||||
#include "../raid-stripe-tree.h"
|
||||
#include "btrfs-tests.h"
|
||||
|
||||
#define RST_TEST_NUM_DEVICES (2)
|
||||
#define RST_TEST_RAID1_TYPE (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_RAID1)
|
||||
|
||||
typedef int (*test_func_t)(struct btrfs_trans_handle *trans);
|
||||
|
||||
static struct btrfs_device *btrfs_device_by_devid(struct btrfs_fs_devices *fs_devices,
|
||||
u64 devid)
|
||||
{
|
||||
struct btrfs_device *dev;
|
||||
|
||||
list_for_each_entry(dev, &fs_devices->devices, dev_list) {
|
||||
if (dev->devid == devid)
|
||||
return dev;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Test a 64K RST write on a 2 disk RAID1 at a logical address of 1M and then
|
||||
* delete the 1st 32K, making the new start address 1M+32K.
|
||||
*/
|
||||
static int test_front_delete(struct btrfs_trans_handle *trans)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
struct btrfs_io_context *bioc;
|
||||
struct btrfs_io_stripe io_stripe = { 0 };
|
||||
u64 map_type = RST_TEST_RAID1_TYPE;
|
||||
u64 logical = SZ_1M;
|
||||
u64 len = SZ_64K;
|
||||
int ret;
|
||||
|
||||
bioc = alloc_btrfs_io_context(fs_info, logical, RST_TEST_NUM_DEVICES);
|
||||
if (!bioc) {
|
||||
test_std_err(TEST_ALLOC_IO_CONTEXT);
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
io_stripe.dev = btrfs_device_by_devid(fs_info->fs_devices, 0);
|
||||
bioc->map_type = map_type;
|
||||
bioc->size = len;
|
||||
|
||||
for (int i = 0; i < RST_TEST_NUM_DEVICES; i++) {
|
||||
struct btrfs_io_stripe *stripe = &bioc->stripes[i];
|
||||
|
||||
stripe->dev = btrfs_device_by_devid(fs_info->fs_devices, i);
|
||||
if (!stripe->dev) {
|
||||
test_err("cannot find device with devid %d", i);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
stripe->physical = logical + i * SZ_1G;
|
||||
}
|
||||
|
||||
ret = btrfs_insert_one_raid_extent(trans, bioc);
|
||||
if (ret) {
|
||||
test_err("inserting RAID extent failed: %d", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_get_raid_extent_offset(fs_info, logical, &len, map_type, 0, &io_stripe);
|
||||
if (ret) {
|
||||
test_err("lookup of RAID extent [%llu, %llu] failed", logical,
|
||||
logical + len);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (io_stripe.physical != logical) {
|
||||
test_err("invalid physical address, expected %llu got %llu",
|
||||
logical, io_stripe.physical);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (len != SZ_64K) {
|
||||
test_err("invalid stripe length, expected %llu got %llu",
|
||||
(u64)SZ_64K, len);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_delete_raid_extent(trans, logical, SZ_32K);
|
||||
if (ret) {
|
||||
test_err("deleting RAID extent [%llu, %llu] failed", logical,
|
||||
logical + SZ_32K);
|
||||
goto out;
|
||||
}
|
||||
|
||||
len = SZ_32K;
|
||||
ret = btrfs_get_raid_extent_offset(fs_info, logical + SZ_32K, &len,
|
||||
map_type, 0, &io_stripe);
|
||||
if (ret) {
|
||||
test_err("lookup of RAID extent [%llu, %llu] failed",
|
||||
logical + SZ_32K, logical + SZ_32K + len);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (io_stripe.physical != logical + SZ_32K) {
|
||||
test_err("invalid physical address, expected %llu, got %llu",
|
||||
logical + SZ_32K, io_stripe.physical);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (len != SZ_32K) {
|
||||
test_err("invalid stripe length, expected %llu, got %llu",
|
||||
(u64)SZ_32K, len);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_get_raid_extent_offset(fs_info, logical, &len, map_type, 0, &io_stripe);
|
||||
if (!ret) {
|
||||
ret = -EINVAL;
|
||||
test_err("lookup of RAID extent [%llu, %llu] succeeded, should fail",
|
||||
logical, logical + SZ_32K);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_delete_raid_extent(trans, logical + SZ_32K, SZ_32K);
|
||||
out:
|
||||
btrfs_put_bioc(bioc);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Test a 64K RST write on a 2 disk RAID1 at a logical address of 1M and then
|
||||
* truncate the stripe extent down to 32K.
|
||||
*/
|
||||
static int test_tail_delete(struct btrfs_trans_handle *trans)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
struct btrfs_io_context *bioc;
|
||||
struct btrfs_io_stripe io_stripe = { 0 };
|
||||
u64 map_type = RST_TEST_RAID1_TYPE;
|
||||
u64 logical = SZ_1M;
|
||||
u64 len = SZ_64K;
|
||||
int ret;
|
||||
|
||||
bioc = alloc_btrfs_io_context(fs_info, logical, RST_TEST_NUM_DEVICES);
|
||||
if (!bioc) {
|
||||
test_std_err(TEST_ALLOC_IO_CONTEXT);
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
io_stripe.dev = btrfs_device_by_devid(fs_info->fs_devices, 0);
|
||||
bioc->map_type = map_type;
|
||||
bioc->size = len;
|
||||
|
||||
for (int i = 0; i < RST_TEST_NUM_DEVICES; i++) {
|
||||
struct btrfs_io_stripe *stripe = &bioc->stripes[i];
|
||||
|
||||
stripe->dev = btrfs_device_by_devid(fs_info->fs_devices, i);
|
||||
if (!stripe->dev) {
|
||||
test_err("cannot find device with devid %d", i);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
stripe->physical = logical + i * SZ_1G;
|
||||
}
|
||||
|
||||
ret = btrfs_insert_one_raid_extent(trans, bioc);
|
||||
if (ret) {
|
||||
test_err("inserting RAID extent failed: %d", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
io_stripe.dev = btrfs_device_by_devid(fs_info->fs_devices, 0);
|
||||
if (!io_stripe.dev) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_get_raid_extent_offset(fs_info, logical, &len, map_type, 0, &io_stripe);
|
||||
if (ret) {
|
||||
test_err("lookup of RAID extent [%llu, %llu] failed", logical,
|
||||
logical + len);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (io_stripe.physical != logical) {
|
||||
test_err("invalid physical address, expected %llu got %llu",
|
||||
logical, io_stripe.physical);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (len != SZ_64K) {
|
||||
test_err("invalid stripe length, expected %llu got %llu",
|
||||
(u64)SZ_64K, len);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_delete_raid_extent(trans, logical + SZ_32K, SZ_32K);
|
||||
if (ret) {
|
||||
test_err("deleting RAID extent [%llu, %llu] failed",
|
||||
logical + SZ_32K, logical + SZ_64K);
|
||||
goto out;
|
||||
}
|
||||
|
||||
len = SZ_32K;
|
||||
ret = btrfs_get_raid_extent_offset(fs_info, logical, &len, map_type, 0, &io_stripe);
|
||||
if (ret) {
|
||||
test_err("lookup of RAID extent [%llu, %llu] failed", logical,
|
||||
logical + len);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (io_stripe.physical != logical) {
|
||||
test_err("invalid physical address, expected %llu, got %llu",
|
||||
logical, io_stripe.physical);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (len != SZ_32K) {
|
||||
test_err("invalid stripe length, expected %llu, got %llu",
|
||||
(u64)SZ_32K, len);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_delete_raid_extent(trans, logical, len);
|
||||
if (ret)
|
||||
test_err("deleting RAID extent [%llu, %llu] failed", logical,
|
||||
logical + len);
|
||||
|
||||
out:
|
||||
btrfs_put_bioc(bioc);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Test a 64K RST write on a 2 disk RAID1 at a logical address of 1M and then
|
||||
* overwrite the whole range giving it new physical address at an offset of 1G.
|
||||
* The intent of this test is to exercise the 'update_raid_extent_item()'
|
||||
* function called be btrfs_insert_one_raid_extent().
|
||||
*/
|
||||
static int test_create_update_delete(struct btrfs_trans_handle *trans)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
struct btrfs_io_context *bioc;
|
||||
struct btrfs_io_stripe io_stripe = { 0 };
|
||||
u64 map_type = RST_TEST_RAID1_TYPE;
|
||||
u64 logical = SZ_1M;
|
||||
u64 len = SZ_64K;
|
||||
int ret;
|
||||
|
||||
bioc = alloc_btrfs_io_context(fs_info, logical, RST_TEST_NUM_DEVICES);
|
||||
if (!bioc) {
|
||||
test_std_err(TEST_ALLOC_IO_CONTEXT);
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
io_stripe.dev = btrfs_device_by_devid(fs_info->fs_devices, 0);
|
||||
bioc->map_type = map_type;
|
||||
bioc->size = len;
|
||||
|
||||
for (int i = 0; i < RST_TEST_NUM_DEVICES; i++) {
|
||||
struct btrfs_io_stripe *stripe = &bioc->stripes[i];
|
||||
|
||||
stripe->dev = btrfs_device_by_devid(fs_info->fs_devices, i);
|
||||
if (!stripe->dev) {
|
||||
test_err("cannot find device with devid %d", i);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
stripe->physical = logical + i * SZ_1G;
|
||||
}
|
||||
|
||||
ret = btrfs_insert_one_raid_extent(trans, bioc);
|
||||
if (ret) {
|
||||
test_err("inserting RAID extent failed: %d", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
io_stripe.dev = btrfs_device_by_devid(fs_info->fs_devices, 0);
|
||||
if (!io_stripe.dev) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_get_raid_extent_offset(fs_info, logical, &len, map_type, 0, &io_stripe);
|
||||
if (ret) {
|
||||
test_err("lookup of RAID extent [%llu, %llu] failed", logical,
|
||||
logical + len);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (io_stripe.physical != logical) {
|
||||
test_err("invalid physical address, expected %llu got %llu",
|
||||
logical, io_stripe.physical);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (len != SZ_64K) {
|
||||
test_err("invalid stripe length, expected %llu got %llu",
|
||||
(u64)SZ_64K, len);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (int i = 0; i < RST_TEST_NUM_DEVICES; i++) {
|
||||
struct btrfs_io_stripe *stripe = &bioc->stripes[i];
|
||||
|
||||
stripe->dev = btrfs_device_by_devid(fs_info->fs_devices, i);
|
||||
if (!stripe->dev) {
|
||||
test_err("cannot find device with devid %d", i);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
stripe->physical = SZ_1G + logical + i * SZ_1G;
|
||||
}
|
||||
|
||||
ret = btrfs_insert_one_raid_extent(trans, bioc);
|
||||
if (ret) {
|
||||
test_err("updating RAID extent failed: %d", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_get_raid_extent_offset(fs_info, logical, &len, map_type, 0, &io_stripe);
|
||||
if (ret) {
|
||||
test_err("lookup of RAID extent [%llu, %llu] failed", logical,
|
||||
logical + len);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (io_stripe.physical != logical + SZ_1G) {
|
||||
test_err("invalid physical address, expected %llu, got %llu",
|
||||
logical + SZ_1G, io_stripe.physical);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (len != SZ_64K) {
|
||||
test_err("invalid stripe length, expected %llu, got %llu",
|
||||
(u64)SZ_64K, len);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_delete_raid_extent(trans, logical, len);
|
||||
if (ret)
|
||||
test_err("deleting RAID extent [%llu, %llu] failed", logical,
|
||||
logical + len);
|
||||
|
||||
out:
|
||||
btrfs_put_bioc(bioc);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Test a simple 64K RST write on a 2 disk RAID1 at a logical address of 1M.
|
||||
* The "physical" copy on device 0 is at 1M, on device 1 it is at 1G+1M.
|
||||
*/
|
||||
static int test_simple_create_delete(struct btrfs_trans_handle *trans)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
struct btrfs_io_context *bioc;
|
||||
struct btrfs_io_stripe io_stripe = { 0 };
|
||||
u64 map_type = RST_TEST_RAID1_TYPE;
|
||||
u64 logical = SZ_1M;
|
||||
u64 len = SZ_64K;
|
||||
int ret;
|
||||
|
||||
bioc = alloc_btrfs_io_context(fs_info, logical, RST_TEST_NUM_DEVICES);
|
||||
if (!bioc) {
|
||||
test_std_err(TEST_ALLOC_IO_CONTEXT);
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
bioc->map_type = map_type;
|
||||
bioc->size = SZ_64K;
|
||||
|
||||
for (int i = 0; i < RST_TEST_NUM_DEVICES; i++) {
|
||||
struct btrfs_io_stripe *stripe = &bioc->stripes[i];
|
||||
|
||||
stripe->dev = btrfs_device_by_devid(fs_info->fs_devices, i);
|
||||
if (!stripe->dev) {
|
||||
test_err("cannot find device with devid %d", i);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
stripe->physical = logical + i * SZ_1G;
|
||||
}
|
||||
|
||||
ret = btrfs_insert_one_raid_extent(trans, bioc);
|
||||
if (ret) {
|
||||
test_err("inserting RAID extent failed: %d", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
io_stripe.dev = btrfs_device_by_devid(fs_info->fs_devices, 0);
|
||||
if (!io_stripe.dev) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_get_raid_extent_offset(fs_info, logical, &len, map_type, 0, &io_stripe);
|
||||
if (ret) {
|
||||
test_err("lookup of RAID extent [%llu, %llu] failed", logical,
|
||||
logical + len);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (io_stripe.physical != logical) {
|
||||
test_err("invalid physical address, expected %llu got %llu",
|
||||
logical, io_stripe.physical);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (len != SZ_64K) {
|
||||
test_err("invalid stripe length, expected %llu got %llu",
|
||||
(u64)SZ_64K, len);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_delete_raid_extent(trans, logical, len);
|
||||
if (ret)
|
||||
test_err("deleting RAID extent [%llu, %llu] failed", logical,
|
||||
logical + len);
|
||||
|
||||
out:
|
||||
btrfs_put_bioc(bioc);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static const test_func_t tests[] = {
|
||||
test_simple_create_delete,
|
||||
test_create_update_delete,
|
||||
test_tail_delete,
|
||||
test_front_delete,
|
||||
};
|
||||
|
||||
static int run_test(test_func_t test, u32 sectorsize, u32 nodesize)
|
||||
{
|
||||
struct btrfs_trans_handle trans;
|
||||
struct btrfs_fs_info *fs_info;
|
||||
struct btrfs_root *root = NULL;
|
||||
int ret;
|
||||
|
||||
fs_info = btrfs_alloc_dummy_fs_info(sectorsize, nodesize);
|
||||
if (!fs_info) {
|
||||
test_std_err(TEST_ALLOC_FS_INFO);
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
root = btrfs_alloc_dummy_root(fs_info);
|
||||
if (IS_ERR(root)) {
|
||||
test_std_err(TEST_ALLOC_ROOT);
|
||||
ret = PTR_ERR(root);
|
||||
goto out;
|
||||
}
|
||||
btrfs_set_super_compat_ro_flags(root->fs_info->super_copy,
|
||||
BTRFS_FEATURE_INCOMPAT_RAID_STRIPE_TREE);
|
||||
root->root_key.objectid = BTRFS_RAID_STRIPE_TREE_OBJECTID;
|
||||
root->root_key.type = BTRFS_ROOT_ITEM_KEY;
|
||||
root->root_key.offset = 0;
|
||||
fs_info->stripe_root = root;
|
||||
root->fs_info->tree_root = root;
|
||||
|
||||
root->node = alloc_test_extent_buffer(root->fs_info, nodesize);
|
||||
if (IS_ERR(root->node)) {
|
||||
test_std_err(TEST_ALLOC_EXTENT_BUFFER);
|
||||
ret = PTR_ERR(root->node);
|
||||
goto out;
|
||||
}
|
||||
btrfs_set_header_level(root->node, 0);
|
||||
btrfs_set_header_nritems(root->node, 0);
|
||||
root->alloc_bytenr += 2 * nodesize;
|
||||
|
||||
for (int i = 0; i < RST_TEST_NUM_DEVICES; i++) {
|
||||
struct btrfs_device *dev;
|
||||
|
||||
dev = btrfs_alloc_dummy_device(fs_info);
|
||||
if (IS_ERR(dev)) {
|
||||
test_err("cannot allocate device");
|
||||
ret = PTR_ERR(dev);
|
||||
goto out;
|
||||
}
|
||||
dev->devid = i;
|
||||
}
|
||||
|
||||
btrfs_init_dummy_trans(&trans, root->fs_info);
|
||||
ret = test(&trans);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
out:
|
||||
btrfs_free_dummy_root(root);
|
||||
btrfs_free_dummy_fs_info(fs_info);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_test_raid_stripe_tree(u32 sectorsize, u32 nodesize)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
test_msg("running raid-stripe-tree tests");
|
||||
for (int i = 0; i < ARRAY_SIZE(tests); i++) {
|
||||
ret = run_test(tests[i], sectorsize, nodesize);
|
||||
if (ret) {
|
||||
test_err("test-case %ps failed with %d\n", tests[i], ret);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
@ -141,8 +141,7 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
|
||||
WARN_ON(refcount_read(&transaction->use_count) == 0);
|
||||
if (refcount_dec_and_test(&transaction->use_count)) {
|
||||
BUG_ON(!list_empty(&transaction->list));
|
||||
WARN_ON(!RB_EMPTY_ROOT(
|
||||
&transaction->delayed_refs.href_root.rb_root));
|
||||
WARN_ON(!xa_empty(&transaction->delayed_refs.head_refs));
|
||||
WARN_ON(!xa_empty(&transaction->delayed_refs.dirty_extents));
|
||||
if (transaction->delayed_refs.pending_csums)
|
||||
btrfs_err(transaction->fs_info,
|
||||
@ -349,9 +348,8 @@ static noinline int join_transaction(struct btrfs_fs_info *fs_info,
|
||||
|
||||
memset(&cur_trans->delayed_refs, 0, sizeof(cur_trans->delayed_refs));
|
||||
|
||||
cur_trans->delayed_refs.href_root = RB_ROOT_CACHED;
|
||||
xa_init(&cur_trans->delayed_refs.head_refs);
|
||||
xa_init(&cur_trans->delayed_refs.dirty_extents);
|
||||
atomic_set(&cur_trans->delayed_refs.num_entries, 0);
|
||||
|
||||
/*
|
||||
* although the tree mod log is per file system and not per transaction,
|
||||
@ -2052,7 +2050,7 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, int err)
|
||||
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
|
||||
btrfs_cleanup_one_transaction(trans->transaction, fs_info);
|
||||
btrfs_cleanup_one_transaction(trans->transaction);
|
||||
|
||||
spin_lock(&fs_info->trans_lock);
|
||||
if (cur_trans == fs_info->running_transaction)
|
||||
|
@ -33,7 +33,7 @@ struct btrfs_path;
|
||||
*/
|
||||
#define BTRFS_TRANS_DIO_WRITE_STUB ((void *) 1)
|
||||
|
||||
/* Radix-tree tag for roots that are part of the trasaction. */
|
||||
/* Radix-tree tag for roots that are part of the transaction. */
|
||||
#define BTRFS_ROOT_TRANS_TAG 0
|
||||
|
||||
enum btrfs_trans_state {
|
||||
|
@ -2183,8 +2183,8 @@ int btrfs_check_eb_owner(const struct extent_buffer *eb, u64 root_owner)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int btrfs_verify_level_key(struct extent_buffer *eb, int level,
|
||||
struct btrfs_key *first_key, u64 parent_transid)
|
||||
int btrfs_verify_level_key(struct extent_buffer *eb,
|
||||
const struct btrfs_tree_parent_check *check)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = eb->fs_info;
|
||||
int found_level;
|
||||
@ -2192,16 +2192,16 @@ int btrfs_verify_level_key(struct extent_buffer *eb, int level,
|
||||
int ret;
|
||||
|
||||
found_level = btrfs_header_level(eb);
|
||||
if (found_level != level) {
|
||||
if (found_level != check->level) {
|
||||
WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
|
||||
KERN_ERR "BTRFS: tree level check failed\n");
|
||||
btrfs_err(fs_info,
|
||||
"tree level mismatch detected, bytenr=%llu level expected=%u has=%u",
|
||||
eb->start, level, found_level);
|
||||
eb->start, check->level, found_level);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
if (!first_key)
|
||||
if (!check->has_first_key)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
@ -2226,15 +2226,15 @@ int btrfs_verify_level_key(struct extent_buffer *eb, int level,
|
||||
btrfs_node_key_to_cpu(eb, &found_key, 0);
|
||||
else
|
||||
btrfs_item_key_to_cpu(eb, &found_key, 0);
|
||||
ret = btrfs_comp_cpu_keys(first_key, &found_key);
|
||||
ret = btrfs_comp_cpu_keys(&check->first_key, &found_key);
|
||||
|
||||
if (ret) {
|
||||
WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
|
||||
KERN_ERR "BTRFS: tree first key check failed\n");
|
||||
btrfs_err(fs_info,
|
||||
"tree first key mismatch detected, bytenr=%llu parent_transid=%llu key expected=(%llu,%u,%llu) has=(%llu,%u,%llu)",
|
||||
eb->start, parent_transid, first_key->objectid,
|
||||
first_key->type, first_key->offset,
|
||||
eb->start, check->transid, check->first_key.objectid,
|
||||
check->first_key.type, check->first_key.offset,
|
||||
found_key.objectid, found_key.type,
|
||||
found_key.offset);
|
||||
}
|
||||
|
@ -69,7 +69,7 @@ int btrfs_check_node(struct extent_buffer *node);
|
||||
int btrfs_check_chunk_valid(struct extent_buffer *leaf,
|
||||
struct btrfs_chunk *chunk, u64 logical);
|
||||
int btrfs_check_eb_owner(const struct extent_buffer *eb, u64 root_owner);
|
||||
int btrfs_verify_level_key(struct extent_buffer *eb, int level,
|
||||
struct btrfs_key *first_key, u64 parent_transid);
|
||||
int btrfs_verify_level_key(struct extent_buffer *eb,
|
||||
const struct btrfs_tree_parent_check *check);
|
||||
|
||||
#endif
|
||||
|
@ -6204,7 +6204,6 @@ static int log_delayed_deletions_full(struct btrfs_trans_handle *trans,
|
||||
static int batch_delete_dir_index_items(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_inode *inode,
|
||||
struct btrfs_path *path,
|
||||
struct btrfs_log_ctx *ctx,
|
||||
const struct list_head *delayed_del_list,
|
||||
const struct btrfs_delayed_item *first,
|
||||
const struct btrfs_delayed_item **last_ret)
|
||||
@ -6265,7 +6264,7 @@ static int log_delayed_deletions_incremental(struct btrfs_trans_handle *trans,
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
} else if (ret == 0) {
|
||||
ret = batch_delete_dir_index_items(trans, inode, path, ctx,
|
||||
ret = batch_delete_dir_index_items(trans, inode, path,
|
||||
delayed_del_list, curr,
|
||||
&last);
|
||||
if (ret)
|
||||
|
@ -909,7 +909,6 @@ static void tree_mod_log_rewind(struct btrfs_fs_info *fs_info,
|
||||
* is freed (its refcount is decremented).
|
||||
*/
|
||||
struct extent_buffer *btrfs_tree_mod_log_rewind(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_path *path,
|
||||
struct extent_buffer *eb,
|
||||
u64 time_seq)
|
||||
{
|
||||
|
@ -41,7 +41,6 @@ int btrfs_tree_mod_log_insert_key(const struct extent_buffer *eb, int slot,
|
||||
enum btrfs_mod_log_op op);
|
||||
int btrfs_tree_mod_log_free_eb(struct extent_buffer *eb);
|
||||
struct extent_buffer *btrfs_tree_mod_log_rewind(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_path *path,
|
||||
struct extent_buffer *eb,
|
||||
u64 time_seq);
|
||||
struct extent_buffer *btrfs_get_old_root(struct btrfs_root *root, u64 time_seq);
|
||||
|
@ -732,6 +732,114 @@ const u8 *btrfs_sb_fsid_ptr(const struct btrfs_super_block *sb)
|
||||
return has_metadata_uuid ? sb->metadata_uuid : sb->fsid;
|
||||
}
|
||||
|
||||
/*
|
||||
* We can have very weird soft links passed in.
|
||||
* One example is "/proc/self/fd/<fd>", which can be a soft link to
|
||||
* a block device.
|
||||
*
|
||||
* But it's never a good idea to use those weird names.
|
||||
* Here we check if the path (not following symlinks) is a good one inside
|
||||
* "/dev/".
|
||||
*/
|
||||
static bool is_good_dev_path(const char *dev_path)
|
||||
{
|
||||
struct path path = { .mnt = NULL, .dentry = NULL };
|
||||
char *path_buf = NULL;
|
||||
char *resolved_path;
|
||||
bool is_good = false;
|
||||
int ret;
|
||||
|
||||
if (!dev_path)
|
||||
goto out;
|
||||
|
||||
path_buf = kmalloc(PATH_MAX, GFP_KERNEL);
|
||||
if (!path_buf)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Do not follow soft link, just check if the original path is inside
|
||||
* "/dev/".
|
||||
*/
|
||||
ret = kern_path(dev_path, 0, &path);
|
||||
if (ret)
|
||||
goto out;
|
||||
resolved_path = d_path(&path, path_buf, PATH_MAX);
|
||||
if (IS_ERR(resolved_path))
|
||||
goto out;
|
||||
if (strncmp(resolved_path, "/dev/", strlen("/dev/")))
|
||||
goto out;
|
||||
is_good = true;
|
||||
out:
|
||||
kfree(path_buf);
|
||||
path_put(&path);
|
||||
return is_good;
|
||||
}
|
||||
|
||||
static int get_canonical_dev_path(const char *dev_path, char *canonical)
|
||||
{
|
||||
struct path path = { .mnt = NULL, .dentry = NULL };
|
||||
char *path_buf = NULL;
|
||||
char *resolved_path;
|
||||
int ret;
|
||||
|
||||
if (!dev_path) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
path_buf = kmalloc(PATH_MAX, GFP_KERNEL);
|
||||
if (!path_buf) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = kern_path(dev_path, LOOKUP_FOLLOW, &path);
|
||||
if (ret)
|
||||
goto out;
|
||||
resolved_path = d_path(&path, path_buf, PATH_MAX);
|
||||
ret = strscpy(canonical, resolved_path, PATH_MAX);
|
||||
out:
|
||||
kfree(path_buf);
|
||||
path_put(&path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool is_same_device(struct btrfs_device *device, const char *new_path)
|
||||
{
|
||||
struct path old = { .mnt = NULL, .dentry = NULL };
|
||||
struct path new = { .mnt = NULL, .dentry = NULL };
|
||||
char *old_path = NULL;
|
||||
bool is_same = false;
|
||||
int ret;
|
||||
|
||||
if (!device->name)
|
||||
goto out;
|
||||
|
||||
old_path = kzalloc(PATH_MAX, GFP_NOFS);
|
||||
if (!old_path)
|
||||
goto out;
|
||||
|
||||
rcu_read_lock();
|
||||
ret = strscpy(old_path, rcu_str_deref(device->name), PATH_MAX);
|
||||
rcu_read_unlock();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = kern_path(old_path, LOOKUP_FOLLOW, &old);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = kern_path(new_path, LOOKUP_FOLLOW, &new);
|
||||
if (ret)
|
||||
goto out;
|
||||
if (path_equal(&old, &new))
|
||||
is_same = true;
|
||||
out:
|
||||
kfree(old_path);
|
||||
path_put(&old);
|
||||
path_put(&new);
|
||||
return is_same;
|
||||
}
|
||||
|
||||
/*
|
||||
* Add new device to list of registered devices
|
||||
*
|
||||
@ -852,7 +960,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
|
||||
MAJOR(path_devt), MINOR(path_devt),
|
||||
current->comm, task_pid_nr(current));
|
||||
|
||||
} else if (!device->name || strcmp(device->name->str, path)) {
|
||||
} else if (!device->name || !is_same_device(device, path)) {
|
||||
/*
|
||||
* When FS is already mounted.
|
||||
* 1. If you are here and if the device->name is NULL that
|
||||
@ -1383,12 +1491,23 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
|
||||
bool new_device_added = false;
|
||||
struct btrfs_device *device = NULL;
|
||||
struct file *bdev_file;
|
||||
char *canonical_path = NULL;
|
||||
u64 bytenr;
|
||||
dev_t devt;
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(&uuid_mutex);
|
||||
|
||||
if (!is_good_dev_path(path)) {
|
||||
canonical_path = kmalloc(PATH_MAX, GFP_KERNEL);
|
||||
if (canonical_path) {
|
||||
ret = get_canonical_dev_path(path, canonical_path);
|
||||
if (ret < 0) {
|
||||
kfree(canonical_path);
|
||||
canonical_path = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Avoid an exclusive open here, as the systemd-udev may initiate the
|
||||
* device scan which may race with the user's mount or mkfs command,
|
||||
@ -1433,7 +1552,8 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
|
||||
goto free_disk_super;
|
||||
}
|
||||
|
||||
device = device_list_add(path, disk_super, &new_device_added);
|
||||
device = device_list_add(canonical_path ? : path, disk_super,
|
||||
&new_device_added);
|
||||
if (!IS_ERR(device) && new_device_added)
|
||||
btrfs_free_stale_devices(device->devt, device);
|
||||
|
||||
@ -1442,6 +1562,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
|
||||
|
||||
error_bdev_put:
|
||||
fput(bdev_file);
|
||||
kfree(canonical_path);
|
||||
|
||||
return device;
|
||||
}
|
||||
@ -2721,8 +2842,6 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
|
||||
set_blocksize(device->bdev_file, BTRFS_BDEV_BLOCKSIZE);
|
||||
|
||||
if (seeding_dev) {
|
||||
btrfs_clear_sb_rdonly(sb);
|
||||
|
||||
/* GFP_KERNEL allocation must not be under device_list_mutex */
|
||||
seed_devices = btrfs_init_sprout(fs_info);
|
||||
if (IS_ERR(seed_devices)) {
|
||||
@ -2865,8 +2984,6 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
|
||||
mutex_unlock(&fs_info->chunk_mutex);
|
||||
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
|
||||
error_trans:
|
||||
if (seeding_dev)
|
||||
btrfs_set_sb_rdonly(sb);
|
||||
if (trans)
|
||||
btrfs_end_transaction(trans);
|
||||
error_free_zone:
|
||||
@ -5310,7 +5427,7 @@ static int decide_stripe_size_zoned(struct alloc_chunk_ctl *ctl,
|
||||
ctl->num_stripes = ctl->ndevs * ctl->dev_stripes;
|
||||
data_stripes = (ctl->num_stripes - ctl->nparity) / ctl->ncopies;
|
||||
|
||||
/* stripe_size is fixed in zoned filesysmte. Reduce ndevs instead. */
|
||||
/* stripe_size is fixed in zoned filesystem. Reduce ndevs instead. */
|
||||
if (ctl->stripe_size * data_stripes > ctl->max_chunk_size) {
|
||||
ctl->ndevs = div_u64(div_u64(ctl->max_chunk_size * ctl->ncopies,
|
||||
ctl->stripe_size) + ctl->nparity,
|
||||
@ -5842,24 +5959,6 @@ unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
|
||||
return len;
|
||||
}
|
||||
|
||||
int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
|
||||
{
|
||||
struct btrfs_chunk_map *map;
|
||||
int ret = 0;
|
||||
|
||||
if (!btrfs_fs_incompat(fs_info, RAID56))
|
||||
return 0;
|
||||
|
||||
map = btrfs_get_chunk_map(fs_info, logical, len);
|
||||
|
||||
if (!WARN_ON(IS_ERR(map))) {
|
||||
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
|
||||
ret = 1;
|
||||
btrfs_free_chunk_map(map);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int find_live_mirror(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_chunk_map *map, int first,
|
||||
int dev_replace_is_ongoing)
|
||||
@ -5920,9 +6019,9 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info,
|
||||
return preferred_mirror;
|
||||
}
|
||||
|
||||
static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_info,
|
||||
u64 logical,
|
||||
u16 total_stripes)
|
||||
EXPORT_FOR_TESTS
|
||||
struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_info,
|
||||
u64 logical, u16 total_stripes)
|
||||
{
|
||||
struct btrfs_io_context *bioc;
|
||||
|
||||
@ -6481,13 +6580,15 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
|
||||
max_len = btrfs_max_io_len(map, map_offset, &io_geom);
|
||||
*length = min_t(u64, map->chunk_len - map_offset, max_len);
|
||||
|
||||
down_read(&dev_replace->rwsem);
|
||||
if (dev_replace->replace_task != current)
|
||||
down_read(&dev_replace->rwsem);
|
||||
|
||||
dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
|
||||
/*
|
||||
* Hold the semaphore for read during the whole operation, write is
|
||||
* requested at commit time but must wait.
|
||||
*/
|
||||
if (!dev_replace_is_ongoing)
|
||||
if (!dev_replace_is_ongoing && dev_replace->replace_task != current)
|
||||
up_read(&dev_replace->rwsem);
|
||||
|
||||
switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
|
||||
@ -6627,7 +6728,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
|
||||
bioc->mirror_num = io_geom.mirror_num;
|
||||
|
||||
out:
|
||||
if (dev_replace_is_ongoing) {
|
||||
if (dev_replace_is_ongoing && dev_replace->replace_task != current) {
|
||||
lockdep_assert_held(&dev_replace->rwsem);
|
||||
/* Unlock and let waiting writers proceed */
|
||||
up_read(&dev_replace->rwsem);
|
||||
|
@ -306,7 +306,7 @@ enum btrfs_read_policy {
|
||||
BTRFS_NR_READ_POLICY,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
#ifdef CONFIG_BTRFS_EXPERIMENTAL
|
||||
/*
|
||||
* Checksum mode - offload it to workqueues or do it synchronously in
|
||||
* btrfs_submit_chunk().
|
||||
@ -430,7 +430,7 @@ struct btrfs_fs_devices {
|
||||
/* Policy used to read the mirrored stripes. */
|
||||
enum btrfs_read_policy read_policy;
|
||||
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
#ifdef CONFIG_BTRFS_EXPERIMENTAL
|
||||
/* Checksum mode - offload it or do it synchronously. */
|
||||
enum btrfs_offload_csum_mode offload_csum_mode;
|
||||
#endif
|
||||
@ -741,8 +741,6 @@ int btrfs_run_dev_stats(struct btrfs_trans_handle *trans);
|
||||
void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev);
|
||||
void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev);
|
||||
void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev);
|
||||
int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info,
|
||||
u64 logical, u64 len);
|
||||
unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
|
||||
u64 logical);
|
||||
u64 btrfs_calc_stripe_length(const struct btrfs_chunk_map *map);
|
||||
@ -840,4 +838,9 @@ bool btrfs_repair_one_zone(struct btrfs_fs_info *fs_info, u64 logical);
|
||||
bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr);
|
||||
const u8 *btrfs_sb_fsid_ptr(const struct btrfs_super_block *sb);
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_info,
|
||||
u64 logical, u16 total_stripes);
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -85,7 +85,6 @@ int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode,
|
||||
{
|
||||
struct btrfs_dir_item *di = NULL;
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_path *path;
|
||||
size_t name_len = strlen(name);
|
||||
int ret = 0;
|
||||
@ -143,14 +142,14 @@ int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode,
|
||||
*/
|
||||
ret = 0;
|
||||
btrfs_assert_tree_write_locked(path->nodes[0]);
|
||||
di = btrfs_match_dir_item_name(fs_info, path, name, name_len);
|
||||
di = btrfs_match_dir_item_name(path, name, name_len);
|
||||
if (!di && !(flags & XATTR_REPLACE)) {
|
||||
ret = -ENOSPC;
|
||||
goto out;
|
||||
}
|
||||
} else if (ret == -EEXIST) {
|
||||
ret = 0;
|
||||
di = btrfs_match_dir_item_name(fs_info, path, name, name_len);
|
||||
di = btrfs_match_dir_item_name(path, name, name_len);
|
||||
ASSERT(di); /* logic error */
|
||||
} else if (ret) {
|
||||
goto out;
|
||||
|
@ -194,7 +194,7 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
|
||||
pg_off = offset_in_page(start);
|
||||
cur_len = btrfs_calc_input_length(orig_end, start);
|
||||
data_in = kmap_local_folio(in_folio, pg_off);
|
||||
start += PAGE_SIZE;
|
||||
start += cur_len;
|
||||
workspace->strm.next_in = data_in;
|
||||
workspace->strm.avail_in = cur_len;
|
||||
}
|
||||
|
@ -1739,7 +1739,7 @@ bool btrfs_use_zone_append(struct btrfs_bio *bbio)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Using REQ_OP_ZONE_APPNED for relocation can break assumptions on the
|
||||
* Using REQ_OP_ZONE_APPEND for relocation can break assumptions on the
|
||||
* extent layout the relocation code has.
|
||||
* Furthermore we have set aside own block-group from which only the
|
||||
* relocation "process" can allocate and make sure only one process at a
|
||||
@ -1973,7 +1973,7 @@ int btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
|
||||
if (block_group->meta_write_pointer > eb->start)
|
||||
return -EBUSY;
|
||||
|
||||
/* If for_sync, this hole will be filled with trasnsaction commit. */
|
||||
/* If for_sync, this hole will be filled with transaction commit. */
|
||||
if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync)
|
||||
return -EAGAIN;
|
||||
return -EBUSY;
|
||||
|
@ -111,6 +111,8 @@ static void zstd_reclaim_timer_fn(struct timer_list *timer)
|
||||
unsigned long reclaim_threshold = jiffies - ZSTD_BTRFS_RECLAIM_JIFFIES;
|
||||
struct list_head *pos, *next;
|
||||
|
||||
ASSERT(timer == &wsm.timer);
|
||||
|
||||
spin_lock(&wsm.lock);
|
||||
|
||||
if (list_empty(&wsm.lru_list)) {
|
||||
@ -495,7 +497,7 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
|
||||
|
||||
/* Check if we need more input */
|
||||
if (workspace->in_buf.pos == workspace->in_buf.size) {
|
||||
tot_in += PAGE_SIZE;
|
||||
tot_in += workspace->in_buf.size;
|
||||
kunmap_local(workspace->in_buf.src);
|
||||
workspace->in_buf.src = NULL;
|
||||
folio_put(in_folio);
|
||||
|
@ -37,6 +37,7 @@ enum io_uring_cmd_flags {
|
||||
/* set when uring wants to cancel a previously issued command */
|
||||
IO_URING_F_CANCEL = (1 << 11),
|
||||
IO_URING_F_COMPAT = (1 << 12),
|
||||
IO_URING_F_TASK_DEAD = (1 << 13),
|
||||
};
|
||||
|
||||
struct io_wq_work_node {
|
||||
|
@ -1706,9 +1706,10 @@ DEFINE_EVENT(btrfs__qgroup_rsv_data, btrfs_qgroup_release_data,
|
||||
|
||||
DECLARE_EVENT_CLASS(btrfs_qgroup_extent,
|
||||
TP_PROTO(const struct btrfs_fs_info *fs_info,
|
||||
const struct btrfs_qgroup_extent_record *rec),
|
||||
const struct btrfs_qgroup_extent_record *rec,
|
||||
u64 bytenr),
|
||||
|
||||
TP_ARGS(fs_info, rec),
|
||||
TP_ARGS(fs_info, rec, bytenr),
|
||||
|
||||
TP_STRUCT__entry_btrfs(
|
||||
__field( u64, bytenr )
|
||||
@ -1716,7 +1717,7 @@ DECLARE_EVENT_CLASS(btrfs_qgroup_extent,
|
||||
),
|
||||
|
||||
TP_fast_assign_btrfs(fs_info,
|
||||
__entry->bytenr = rec->bytenr;
|
||||
__entry->bytenr = bytenr;
|
||||
__entry->num_bytes = rec->num_bytes;
|
||||
),
|
||||
|
||||
@ -1727,17 +1728,19 @@ DECLARE_EVENT_CLASS(btrfs_qgroup_extent,
|
||||
DEFINE_EVENT(btrfs_qgroup_extent, btrfs_qgroup_account_extents,
|
||||
|
||||
TP_PROTO(const struct btrfs_fs_info *fs_info,
|
||||
const struct btrfs_qgroup_extent_record *rec),
|
||||
const struct btrfs_qgroup_extent_record *rec,
|
||||
u64 bytenr),
|
||||
|
||||
TP_ARGS(fs_info, rec)
|
||||
TP_ARGS(fs_info, rec, bytenr)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(btrfs_qgroup_extent, btrfs_qgroup_trace_extent,
|
||||
|
||||
TP_PROTO(const struct btrfs_fs_info *fs_info,
|
||||
const struct btrfs_qgroup_extent_record *rec),
|
||||
const struct btrfs_qgroup_extent_record *rec,
|
||||
u64 bytenr),
|
||||
|
||||
TP_ARGS(fs_info, rec)
|
||||
TP_ARGS(fs_info, rec, bytenr)
|
||||
);
|
||||
|
||||
TRACE_EVENT(qgroup_num_dirty_extents,
|
||||
@ -2341,7 +2344,6 @@ DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_read_unlock_blocking);
|
||||
DEFINE_BTRFS_LOCK_EVENT(btrfs_set_lock_blocking_read);
|
||||
DEFINE_BTRFS_LOCK_EVENT(btrfs_set_lock_blocking_write);
|
||||
DEFINE_BTRFS_LOCK_EVENT(btrfs_try_tree_read_lock);
|
||||
DEFINE_BTRFS_LOCK_EVENT(btrfs_try_tree_write_lock);
|
||||
DEFINE_BTRFS_LOCK_EVENT(btrfs_tree_read_lock_atomic);
|
||||
|
||||
DECLARE_EVENT_CLASS(btrfs__space_info_update,
|
||||
@ -2553,10 +2555,9 @@ TRACE_EVENT(btrfs_extent_map_shrinker_count,
|
||||
|
||||
TRACE_EVENT(btrfs_extent_map_shrinker_scan_enter,
|
||||
|
||||
TP_PROTO(const struct btrfs_fs_info *fs_info, long nr_to_scan, long nr,
|
||||
u64 last_root_id, u64 last_ino),
|
||||
TP_PROTO(const struct btrfs_fs_info *fs_info, long nr),
|
||||
|
||||
TP_ARGS(fs_info, nr_to_scan, nr, last_root_id, last_ino),
|
||||
TP_ARGS(fs_info, nr),
|
||||
|
||||
TP_STRUCT__entry_btrfs(
|
||||
__field( long, nr_to_scan )
|
||||
@ -2566,10 +2567,11 @@ TRACE_EVENT(btrfs_extent_map_shrinker_scan_enter,
|
||||
),
|
||||
|
||||
TP_fast_assign_btrfs(fs_info,
|
||||
__entry->nr_to_scan = nr_to_scan;
|
||||
__entry->nr_to_scan = \
|
||||
atomic64_read(&fs_info->em_shrinker_nr_to_scan);
|
||||
__entry->nr = nr;
|
||||
__entry->last_root_id = last_root_id;
|
||||
__entry->last_ino = last_ino;
|
||||
__entry->last_root_id = fs_info->em_shrinker_last_root;
|
||||
__entry->last_ino = fs_info->em_shrinker_last_ino;
|
||||
),
|
||||
|
||||
TP_printk_btrfs("nr_to_scan=%ld nr=%ld last_root=%llu(%s) last_ino=%llu",
|
||||
@ -2579,10 +2581,9 @@ TRACE_EVENT(btrfs_extent_map_shrinker_scan_enter,
|
||||
|
||||
TRACE_EVENT(btrfs_extent_map_shrinker_scan_exit,
|
||||
|
||||
TP_PROTO(const struct btrfs_fs_info *fs_info, long nr_dropped, long nr,
|
||||
u64 last_root_id, u64 last_ino),
|
||||
TP_PROTO(const struct btrfs_fs_info *fs_info, long nr_dropped, long nr),
|
||||
|
||||
TP_ARGS(fs_info, nr_dropped, nr, last_root_id, last_ino),
|
||||
TP_ARGS(fs_info, nr_dropped, nr),
|
||||
|
||||
TP_STRUCT__entry_btrfs(
|
||||
__field( long, nr_dropped )
|
||||
@ -2594,8 +2595,8 @@ TRACE_EVENT(btrfs_extent_map_shrinker_scan_exit,
|
||||
TP_fast_assign_btrfs(fs_info,
|
||||
__entry->nr_dropped = nr_dropped;
|
||||
__entry->nr = nr;
|
||||
__entry->last_root_id = last_root_id;
|
||||
__entry->last_ino = last_ino;
|
||||
__entry->last_root_id = fs_info->em_shrinker_last_root;
|
||||
__entry->last_ino = fs_info->em_shrinker_last_ino;
|
||||
),
|
||||
|
||||
TP_printk_btrfs("nr_dropped=%ld nr=%ld last_root=%llu(%s) last_ino=%llu",
|
||||
|
@ -1049,6 +1049,29 @@ struct btrfs_ioctl_encoded_io_args {
|
||||
#define BTRFS_ENCODED_IO_ENCRYPTION_NONE 0
|
||||
#define BTRFS_ENCODED_IO_ENCRYPTION_TYPES 1
|
||||
|
||||
/*
|
||||
* Wait for subvolume cleaning process. This queries the kernel queue and it
|
||||
* can change between the calls.
|
||||
*
|
||||
* - FOR_ONE - specify the subvolid
|
||||
* - FOR_QUEUED - wait for all currently queued
|
||||
* - COUNT - count number of queued
|
||||
* - PEEK_FIRST - read which is the first in the queue (to be cleaned or being
|
||||
* cleaned already), or 0 if the queue is empty
|
||||
* - PEEK_LAST - read the last subvolid in the queue, or 0 if the queue is empty
|
||||
*/
|
||||
struct btrfs_ioctl_subvol_wait {
|
||||
__u64 subvolid;
|
||||
__u32 mode;
|
||||
__u32 count;
|
||||
};
|
||||
|
||||
#define BTRFS_SUBVOL_SYNC_WAIT_FOR_ONE (0)
|
||||
#define BTRFS_SUBVOL_SYNC_WAIT_FOR_QUEUED (1)
|
||||
#define BTRFS_SUBVOL_SYNC_COUNT (2)
|
||||
#define BTRFS_SUBVOL_SYNC_PEEK_FIRST (3)
|
||||
#define BTRFS_SUBVOL_SYNC_PEEK_LAST (4)
|
||||
|
||||
/* Error codes as returned by the kernel */
|
||||
enum btrfs_err_code {
|
||||
BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET = 1,
|
||||
@ -1181,6 +1204,8 @@ enum btrfs_err_code {
|
||||
struct btrfs_ioctl_encoded_io_args)
|
||||
#define BTRFS_IOC_ENCODED_WRITE _IOW(BTRFS_IOCTL_MAGIC, 64, \
|
||||
struct btrfs_ioctl_encoded_io_args)
|
||||
#define BTRFS_IOC_SUBVOL_SYNC_WAIT _IOW(BTRFS_IOCTL_MAGIC, 65, \
|
||||
struct btrfs_ioctl_subvol_wait)
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -119,9 +119,13 @@ EXPORT_SYMBOL_GPL(io_uring_cmd_mark_cancelable);
|
||||
static void io_uring_cmd_work(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
{
|
||||
struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
|
||||
unsigned int flags = IO_URING_F_COMPLETE_DEFER;
|
||||
|
||||
if (current->flags & (PF_EXITING | PF_KTHREAD))
|
||||
flags |= IO_URING_F_TASK_DEAD;
|
||||
|
||||
/* task_work executor checks the deffered list completion */
|
||||
ioucmd->task_work_cb(ioucmd, IO_URING_F_COMPLETE_DEFER);
|
||||
ioucmd->task_work_cb(ioucmd, flags);
|
||||
}
|
||||
|
||||
void __io_uring_cmd_do_in_task(struct io_uring_cmd *ioucmd,
|
||||
|
Loading…
Reference in New Issue
Block a user