mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git
synced 2025-01-17 02:36:21 +00:00
for-6.11-tag
-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmaVN3MACgkQxWXV+ddt WDtpIRAAl+1NjsEj8e5V/UYn8Jr06ujTOnrkR3PCTICxDHbUaMLkQEw21H0K/ogQ 3fOiEVpSlZOfKdYXtXaMQbC0jd/Af2eA10Uht96nAEjAtxu1uJ4cFZGu2meNdXZP xUioivJ/CElMPH2aluG6FaQvUTqmhrEr8tSoYbxzQmUd434q9kqqyjtw1tfzYDG1 VDn2f7ykhpB/8P0aoqgWSshWTmaCzG0GkuI28o1o0iZUIF/P9TKdzxlLRW6BVHE7 T2oGLEQjN1GQbCH75L4IeNJDkCBVfcDcbZkUDJ/ae4Pt/jJQTFY53YIP9wXFZQnd mdfHmK7Atpsk75ATftYSq+ENkbQ5fsuut5CD63u54gAqA4M1FncDXTAWS1Y30F76 P8juSCmsSy0o3gTflDIo/IMdntoh/JmncwwStF6oKzmyUZZzzarsqM8mc1P03ZNt 3ttlnbY7lC1TDAlD5J2wXE0INCT2pN+4C9IToWdRypeuLu6qrI7cQ0oylyp9OVQM t9umTXm0B6s1cyqEDjJf0xJZS/JTHYwu7S4EmAJwicgiLpOjABVTmO8021rVmDJy TAUu6yEhSsrTT6Dxm7/2Et1EEOKFF5hhsG1SiGD9oUIZK6B5+0waT+rbkEWl7osR 4/TAv2zX6tuCc7HIW0fQloM/6/Gyd5wcDVaQNDUzFA075uKstwY= =k5d3 -----END PGP SIGNATURE----- Merge tag 'for-6.11-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux Pull btrfs updates from David Sterba: "The highlights are new logic behind background block group reclaim, automatic removal of qgroup after removing a subvolume and new 'rescue=' mount options. The rest is optimizations, cleanups and refactoring. User visible features: - dynamic block group reclaim: - tunable framework to avoid situations where eager data allocations prevent creating new metadata chunks due to lack of unallocated space - reuse sysfs knob bg_reclaim_threshold (otherwise used only in zoned mode) for a fixed value threshold - new on/off sysfs knob "dynamic_reclaim" calculating the value based on heuristics, aiming to keep spare working space for relocating chunks but not to needlessly relocate partially utilized block groups or reclaim newly allocated ones - stats are exported in sysfs per block group type, files "reclaim_*" - this may increase IO load at unexpected times but the corner case of no allocatable block groups is known to be worse - automatically remove qgroup of deleted subvolumes: - adjust qgroup removal conditions, make sure all related subvolume data are already removed, or return EBUSY, also take into account setting of sysfs drop_subtree_threshold - also works in squota mode - mount option updates: new modes of 'rescue=' that allow to mount images (read-only) that could have been partially converted by user space tools - ignoremetacsums - invalid metadata checksums are ignored - ignoresuperflags - super block flags that track conversion in progress (like UUID or checksums) Core: - size of struct btrfs_inode is now below 1024 (on a release config), improved memory packing and other secondary effects - switch tracking of open inodes from rb-tree to xarray, minor performance improvement - reduce number of empty transaction commits when there are no dirty data/metadata - memory allocation optimizations (reduced numbers, reordering out of critical sections) - extent map structure optimizations and refactoring, more sanity checks - more subpage in zoned mode preparations or fixes - general snapshot code cleanups, improvements and documentation - tree-checker updates: more file extent ram_bytes fixes, continued - raid-stripe-tree update (not backward compatible): - remove extent encoding field from the structure, can be inferred from other information - requires btrfs-progs 6.9.1 or newer - cleanups and refactoring - error message updates - error handling improvements - return type and parameter cleanups and improvements" * tag 'for-6.11-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (152 commits) btrfs: fix extent map use-after-free when adding pages to compressed bio btrfs: fix bitmap leak when loading free space cache on duplicate entry btrfs: remove the BUG_ON() inside extent_range_clear_dirty_for_io() btrfs: move extent_range_clear_dirty_for_io() into inode.c btrfs: enhance compression error messages btrfs: fix data race when accessing the last_trans field of a root btrfs: rename the extra_gfp parameter of btrfs_alloc_page_array() btrfs: remove the extra_gfp parameter from btrfs_alloc_folio_array() btrfs: introduce new "rescue=ignoresuperflags" mount option btrfs: introduce new "rescue=ignoremetacsums" mount option btrfs: output the unrecognized super block flags as hex btrfs: remove unused Opt enums btrfs: tree-checker: add extra ram_bytes and disk_num_bytes check btrfs: fix the ram_bytes assignment for truncated ordered extents btrfs: make validate_extent_map() catch ram_bytes mismatch btrfs: ignore incorrect btrfs_file_extent_item::ram_bytes btrfs: cleanup the bytenr usage inside btrfs_extent_item_to_extent_map() btrfs: fix typo in error message in btrfs_validate_super() btrfs: move the direct IO code into its own file btrfs: pass a btrfs_inode to btrfs_set_prop() ...
This commit is contained in:
commit
a1b547f0f2
@ -33,7 +33,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
|
||||
uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \
|
||||
block-rsv.o delalloc-space.o block-group.o discard.o reflink.o \
|
||||
subpage.o tree-mod-log.o extent-io-tree.o fs.o messages.o bio.o \
|
||||
lru_cache.o raid-stripe-tree.o
|
||||
lru_cache.o raid-stripe-tree.o fiemap.o direct-io.o
|
||||
|
||||
btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
|
||||
btrfs-$(CONFIG_BTRFS_FS_REF_VERIFY) += ref-verify.o
|
||||
|
@ -34,7 +34,7 @@ void btrfs_init_map_token(struct btrfs_map_token *token, struct extent_buffer *e
|
||||
|
||||
static inline u8 get_unaligned_le8(const void *p)
|
||||
{
|
||||
return *(u8 *)p;
|
||||
return *(const u8 *)p;
|
||||
}
|
||||
|
||||
static inline void put_unaligned_le8(u8 val, void *p)
|
||||
@ -48,8 +48,8 @@ static inline void put_unaligned_le8(u8 val, void *p)
|
||||
offsetof(type, member), \
|
||||
sizeof_field(type, member)))
|
||||
|
||||
#define write_eb_member(eb, ptr, type, member, result) (\
|
||||
write_extent_buffer(eb, (char *)(result), \
|
||||
#define write_eb_member(eb, ptr, type, member, source) ( \
|
||||
write_extent_buffer(eb, (const char *)(source), \
|
||||
((unsigned long)(ptr)) + \
|
||||
offsetof(type, member), \
|
||||
sizeof_field(type, member)))
|
||||
@ -315,11 +315,8 @@ BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32);
|
||||
BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64);
|
||||
BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32);
|
||||
|
||||
BTRFS_SETGET_FUNCS(stripe_extent_encoding, struct btrfs_stripe_extent, encoding, 8);
|
||||
BTRFS_SETGET_FUNCS(raid_stride_devid, struct btrfs_raid_stride, devid, 64);
|
||||
BTRFS_SETGET_FUNCS(raid_stride_physical, struct btrfs_raid_stride, physical, 64);
|
||||
BTRFS_SETGET_STACK_FUNCS(stack_stripe_extent_encoding,
|
||||
struct btrfs_stripe_extent, encoding, 8);
|
||||
BTRFS_SETGET_STACK_FUNCS(stack_raid_stride_devid, struct btrfs_raid_stride, devid, 64);
|
||||
BTRFS_SETGET_STACK_FUNCS(stack_raid_stride_physical, struct btrfs_raid_stride, physical, 64);
|
||||
|
||||
@ -353,7 +350,7 @@ static inline void btrfs_tree_block_key(const struct extent_buffer *eb,
|
||||
|
||||
static inline void btrfs_set_tree_block_key(const struct extent_buffer *eb,
|
||||
struct btrfs_tree_block_info *item,
|
||||
struct btrfs_disk_key *key)
|
||||
const struct btrfs_disk_key *key)
|
||||
{
|
||||
write_eb_member(eb, item, struct btrfs_tree_block_info, key, key);
|
||||
}
|
||||
@ -446,7 +443,7 @@ void btrfs_node_key(const struct extent_buffer *eb,
|
||||
struct btrfs_disk_key *disk_key, int nr);
|
||||
|
||||
static inline void btrfs_set_node_key(const struct extent_buffer *eb,
|
||||
struct btrfs_disk_key *disk_key, int nr)
|
||||
const struct btrfs_disk_key *disk_key, int nr)
|
||||
{
|
||||
unsigned long ptr;
|
||||
|
||||
@ -512,7 +509,7 @@ static inline void btrfs_item_key(const struct extent_buffer *eb,
|
||||
}
|
||||
|
||||
static inline void btrfs_set_item_key(struct extent_buffer *eb,
|
||||
struct btrfs_disk_key *disk_key, int nr)
|
||||
const struct btrfs_disk_key *disk_key, int nr)
|
||||
{
|
||||
struct btrfs_item *item = btrfs_item_nr(eb, nr);
|
||||
|
||||
|
@ -29,7 +29,7 @@ struct btrfs_failed_bio {
|
||||
/* Is this a data path I/O that needs storage layer checksum and repair? */
|
||||
static inline bool is_data_bbio(struct btrfs_bio *bbio)
|
||||
{
|
||||
return bbio->inode && is_data_inode(&bbio->inode->vfs_inode);
|
||||
return bbio->inode && is_data_inode(bbio->inode);
|
||||
}
|
||||
|
||||
static bool bbio_has_ordered_extent(struct btrfs_bio *bbio)
|
||||
@ -732,7 +732,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
|
||||
* point, so they are handled as part of the no-checksum case.
|
||||
*/
|
||||
if (inode && !(inode->flags & BTRFS_INODE_NODATASUM) &&
|
||||
!test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state) &&
|
||||
!test_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state) &&
|
||||
!btrfs_is_data_reloc_root(inode->root)) {
|
||||
if (should_async_write(bbio) &&
|
||||
btrfs_wq_submit_bio(bbio, bioc, &smap, mirror_num))
|
||||
|
@ -1022,6 +1022,13 @@ static void clear_incompat_bg_bits(struct btrfs_fs_info *fs_info, u64 flags)
|
||||
}
|
||||
}
|
||||
|
||||
static struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE))
|
||||
return fs_info->block_group_root;
|
||||
return btrfs_extent_root(fs_info, 0);
|
||||
}
|
||||
|
||||
static int remove_block_group_item(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_path *path,
|
||||
struct btrfs_block_group *block_group)
|
||||
@ -1757,24 +1764,21 @@ static inline bool btrfs_should_reclaim(struct btrfs_fs_info *fs_info)
|
||||
|
||||
static bool should_reclaim_block_group(struct btrfs_block_group *bg, u64 bytes_freed)
|
||||
{
|
||||
const struct btrfs_space_info *space_info = bg->space_info;
|
||||
const int reclaim_thresh = READ_ONCE(space_info->bg_reclaim_threshold);
|
||||
const int thresh_pct = btrfs_calc_reclaim_threshold(bg->space_info);
|
||||
u64 thresh_bytes = mult_perc(bg->length, thresh_pct);
|
||||
const u64 new_val = bg->used;
|
||||
const u64 old_val = new_val + bytes_freed;
|
||||
u64 thresh;
|
||||
|
||||
if (reclaim_thresh == 0)
|
||||
if (thresh_bytes == 0)
|
||||
return false;
|
||||
|
||||
thresh = mult_perc(bg->length, reclaim_thresh);
|
||||
|
||||
/*
|
||||
* If we were below the threshold before don't reclaim, we are likely a
|
||||
* brand new block group and we don't want to relocate new block groups.
|
||||
*/
|
||||
if (old_val < thresh)
|
||||
if (old_val < thresh_bytes)
|
||||
return false;
|
||||
if (new_val >= thresh)
|
||||
if (new_val >= thresh_bytes)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
@ -1822,6 +1826,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
|
||||
list_sort(NULL, &fs_info->reclaim_bgs, reclaim_bgs_cmp);
|
||||
while (!list_empty(&fs_info->reclaim_bgs)) {
|
||||
u64 zone_unusable;
|
||||
u64 reclaimed;
|
||||
int ret = 0;
|
||||
|
||||
bg = list_first_entry(&fs_info->reclaim_bgs,
|
||||
@ -1835,6 +1840,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
|
||||
/* Don't race with allocators so take the groups_sem */
|
||||
down_write(&space_info->groups_sem);
|
||||
|
||||
spin_lock(&space_info->lock);
|
||||
spin_lock(&bg->lock);
|
||||
if (bg->reserved || bg->pinned || bg->ro) {
|
||||
/*
|
||||
@ -1844,6 +1850,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
|
||||
* this block group.
|
||||
*/
|
||||
spin_unlock(&bg->lock);
|
||||
spin_unlock(&space_info->lock);
|
||||
up_write(&space_info->groups_sem);
|
||||
goto next;
|
||||
}
|
||||
@ -1862,6 +1869,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
|
||||
if (!btrfs_test_opt(fs_info, DISCARD_ASYNC))
|
||||
btrfs_mark_bg_unused(bg);
|
||||
spin_unlock(&bg->lock);
|
||||
spin_unlock(&space_info->lock);
|
||||
up_write(&space_info->groups_sem);
|
||||
goto next;
|
||||
|
||||
@ -1878,10 +1886,12 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
|
||||
*/
|
||||
if (!should_reclaim_block_group(bg, bg->length)) {
|
||||
spin_unlock(&bg->lock);
|
||||
spin_unlock(&space_info->lock);
|
||||
up_write(&space_info->groups_sem);
|
||||
goto next;
|
||||
}
|
||||
spin_unlock(&bg->lock);
|
||||
spin_unlock(&space_info->lock);
|
||||
|
||||
/*
|
||||
* Get out fast, in case we're read-only or unmounting the
|
||||
@ -1914,15 +1924,26 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
|
||||
div64_u64(bg->used * 100, bg->length),
|
||||
div64_u64(zone_unusable * 100, bg->length));
|
||||
trace_btrfs_reclaim_block_group(bg);
|
||||
reclaimed = bg->used;
|
||||
ret = btrfs_relocate_chunk(fs_info, bg->start);
|
||||
if (ret) {
|
||||
btrfs_dec_block_group_ro(bg);
|
||||
btrfs_err(fs_info, "error relocating chunk %llu",
|
||||
bg->start);
|
||||
reclaimed = 0;
|
||||
spin_lock(&space_info->lock);
|
||||
space_info->reclaim_errors++;
|
||||
if (READ_ONCE(space_info->periodic_reclaim))
|
||||
space_info->periodic_reclaim_ready = false;
|
||||
spin_unlock(&space_info->lock);
|
||||
}
|
||||
spin_lock(&space_info->lock);
|
||||
space_info->reclaim_count++;
|
||||
space_info->reclaim_bytes += reclaimed;
|
||||
spin_unlock(&space_info->lock);
|
||||
|
||||
next:
|
||||
if (ret) {
|
||||
if (ret && !READ_ONCE(space_info->periodic_reclaim)) {
|
||||
/* Refcount held by the reclaim_bgs list after splice. */
|
||||
spin_lock(&fs_info->unused_bgs_lock);
|
||||
/*
|
||||
@ -1964,6 +1985,7 @@ end:
|
||||
|
||||
void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
btrfs_reclaim_sweep(fs_info);
|
||||
spin_lock(&fs_info->unused_bgs_lock);
|
||||
if (!list_empty(&fs_info->reclaim_bgs))
|
||||
queue_work(system_unbound_wq, &fs_info->reclaim_bgs_work);
|
||||
@ -3662,9 +3684,12 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
|
||||
old_val += num_bytes;
|
||||
cache->used = old_val;
|
||||
cache->reserved -= num_bytes;
|
||||
cache->reclaim_mark = 0;
|
||||
space_info->bytes_reserved -= num_bytes;
|
||||
space_info->bytes_used += num_bytes;
|
||||
space_info->disk_used += num_bytes * factor;
|
||||
if (READ_ONCE(space_info->periodic_reclaim))
|
||||
btrfs_space_info_update_reclaimable(space_info, -num_bytes);
|
||||
spin_unlock(&cache->lock);
|
||||
spin_unlock(&space_info->lock);
|
||||
} else {
|
||||
@ -3674,8 +3699,10 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
|
||||
btrfs_space_info_update_bytes_pinned(info, space_info, num_bytes);
|
||||
space_info->bytes_used -= num_bytes;
|
||||
space_info->disk_used -= num_bytes * factor;
|
||||
|
||||
reclaim = should_reclaim_block_group(cache, num_bytes);
|
||||
if (READ_ONCE(space_info->periodic_reclaim))
|
||||
btrfs_space_info_update_reclaimable(space_info, num_bytes);
|
||||
else
|
||||
reclaim = should_reclaim_block_group(cache, num_bytes);
|
||||
|
||||
spin_unlock(&cache->lock);
|
||||
spin_unlock(&space_info->lock);
|
||||
@ -4329,13 +4356,13 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
|
||||
spin_lock(&block_group->lock);
|
||||
if (test_and_clear_bit(BLOCK_GROUP_FLAG_IREF,
|
||||
&block_group->runtime_flags)) {
|
||||
struct inode *inode = block_group->inode;
|
||||
struct btrfs_inode *inode = block_group->inode;
|
||||
|
||||
block_group->inode = NULL;
|
||||
spin_unlock(&block_group->lock);
|
||||
|
||||
ASSERT(block_group->io_ctl.inode == NULL);
|
||||
iput(inode);
|
||||
iput(&inode->vfs_inode);
|
||||
} else {
|
||||
spin_unlock(&block_group->lock);
|
||||
}
|
||||
|
@ -115,7 +115,7 @@ struct btrfs_caching_control {
|
||||
|
||||
struct btrfs_block_group {
|
||||
struct btrfs_fs_info *fs_info;
|
||||
struct inode *inode;
|
||||
struct btrfs_inode *inode;
|
||||
spinlock_t lock;
|
||||
u64 start;
|
||||
u64 length;
|
||||
@ -263,6 +263,7 @@ struct btrfs_block_group {
|
||||
struct work_struct zone_finish_work;
|
||||
struct extent_buffer *last_eb;
|
||||
enum btrfs_block_group_size_class size_class;
|
||||
u64 reclaim_mark;
|
||||
};
|
||||
|
||||
static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group)
|
||||
|
@ -19,7 +19,6 @@
|
||||
#include <uapi/linux/btrfs_tree.h>
|
||||
#include <trace/events/btrfs.h>
|
||||
#include "block-rsv.h"
|
||||
#include "btrfs_inode.h"
|
||||
#include "extent_map.h"
|
||||
#include "extent_io.h"
|
||||
#include "extent-io-tree.h"
|
||||
@ -99,6 +98,29 @@ enum {
|
||||
* range).
|
||||
*/
|
||||
BTRFS_INODE_COW_WRITE_ERROR,
|
||||
/*
|
||||
* Indicate this is a directory that points to a subvolume for which
|
||||
* there is no root reference item. That's a case like the following:
|
||||
*
|
||||
* $ btrfs subvolume create /mnt/parent
|
||||
* $ btrfs subvolume create /mnt/parent/child
|
||||
* $ btrfs subvolume snapshot /mnt/parent /mnt/snap
|
||||
*
|
||||
* If subvolume "parent" is root 256, subvolume "child" is root 257 and
|
||||
* snapshot "snap" is root 258, then there's no root reference item (key
|
||||
* BTRFS_ROOT_REF_KEY in the root tree) for the subvolume "child"
|
||||
* associated to root 258 (the snapshot) - there's only for the root
|
||||
* of the "parent" subvolume (root 256). In the chunk root we have a
|
||||
* (256 BTRFS_ROOT_REF_KEY 257) key but we don't have a
|
||||
* (258 BTRFS_ROOT_REF_KEY 257) key - the sames goes for backrefs, we
|
||||
* have a (257 BTRFS_ROOT_BACKREF_KEY 256) but we don't have a
|
||||
* (257 BTRFS_ROOT_BACKREF_KEY 258) key.
|
||||
*
|
||||
* So when opening the "child" dentry from the snapshot's directory,
|
||||
* we don't find a root ref item and we create a stub inode. This is
|
||||
* done at new_simple_dir(), called from btrfs_lookup_dentry().
|
||||
*/
|
||||
BTRFS_INODE_ROOT_STUB,
|
||||
};
|
||||
|
||||
/* in memory btrfs inode */
|
||||
@ -106,10 +128,14 @@ struct btrfs_inode {
|
||||
/* which subvolume this inode belongs to */
|
||||
struct btrfs_root *root;
|
||||
|
||||
/* key used to find this inode on disk. This is used by the code
|
||||
* to read in roots of subvolumes
|
||||
#if BITS_PER_LONG == 32
|
||||
/*
|
||||
* The objectid of the corresponding BTRFS_INODE_ITEM_KEY.
|
||||
* On 64 bits platforms we can get it from vfs_inode.i_ino, which is an
|
||||
* unsigned long and therefore 64 bits on such platforms.
|
||||
*/
|
||||
struct btrfs_key location;
|
||||
u64 objectid;
|
||||
#endif
|
||||
|
||||
/* Cached value of inode property 'compression'. */
|
||||
u8 prop_compress;
|
||||
@ -165,9 +191,6 @@ struct btrfs_inode {
|
||||
*/
|
||||
struct list_head delalloc_inodes;
|
||||
|
||||
/* node for the red-black tree that links inodes in subvolume root */
|
||||
struct rb_node rb_node;
|
||||
|
||||
unsigned long runtime_flags;
|
||||
|
||||
/* full 64 bit generation number, struct vfs_inode doesn't have a big
|
||||
@ -228,11 +251,20 @@ struct btrfs_inode {
|
||||
u64 last_dir_index_offset;
|
||||
};
|
||||
|
||||
/*
|
||||
* Total number of bytes pending defrag, used by stat to check whether
|
||||
* it needs COW. Protected by 'lock'.
|
||||
*/
|
||||
u64 defrag_bytes;
|
||||
union {
|
||||
/*
|
||||
* Total number of bytes pending defrag, used by stat to check whether
|
||||
* it needs COW. Protected by 'lock'.
|
||||
* Used by inodes other than the data relocation inode.
|
||||
*/
|
||||
u64 defrag_bytes;
|
||||
|
||||
/*
|
||||
* Logical address of the block group being relocated.
|
||||
* Used only by the data relocation inode.
|
||||
*/
|
||||
u64 reloc_block_group_start;
|
||||
};
|
||||
|
||||
/*
|
||||
* The size of the file stored in the metadata on disk. data=ordered
|
||||
@ -241,12 +273,21 @@ struct btrfs_inode {
|
||||
*/
|
||||
u64 disk_i_size;
|
||||
|
||||
/*
|
||||
* If this is a directory then index_cnt is the counter for the index
|
||||
* number for new files that are created. For an empty directory, this
|
||||
* must be initialized to BTRFS_DIR_START_INDEX.
|
||||
*/
|
||||
u64 index_cnt;
|
||||
union {
|
||||
/*
|
||||
* If this is a directory then index_cnt is the counter for the
|
||||
* index number for new files that are created. For an empty
|
||||
* directory, this must be initialized to BTRFS_DIR_START_INDEX.
|
||||
*/
|
||||
u64 index_cnt;
|
||||
|
||||
/*
|
||||
* If this is not a directory, this is the number of bytes
|
||||
* outstanding that are going to need csums. This is used in
|
||||
* ENOSPC accounting. Protected by 'lock'.
|
||||
*/
|
||||
u64 csum_bytes;
|
||||
};
|
||||
|
||||
/* Cache the directory index number to speed the dir/file remove */
|
||||
u64 dir_index;
|
||||
@ -258,22 +299,25 @@ struct btrfs_inode {
|
||||
*/
|
||||
u64 last_unlink_trans;
|
||||
|
||||
/*
|
||||
* The id/generation of the last transaction where this inode was
|
||||
* either the source or the destination of a clone/dedupe operation.
|
||||
* Used when logging an inode to know if there are shared extents that
|
||||
* need special care when logging checksum items, to avoid duplicate
|
||||
* checksum items in a log (which can lead to a corruption where we end
|
||||
* up with missing checksum ranges after log replay).
|
||||
* Protected by the vfs inode lock.
|
||||
*/
|
||||
u64 last_reflink_trans;
|
||||
union {
|
||||
/*
|
||||
* The id/generation of the last transaction where this inode
|
||||
* was either the source or the destination of a clone/dedupe
|
||||
* operation. Used when logging an inode to know if there are
|
||||
* shared extents that need special care when logging checksum
|
||||
* items, to avoid duplicate checksum items in a log (which can
|
||||
* lead to a corruption where we end up with missing checksum
|
||||
* ranges after log replay). Protected by the VFS inode lock.
|
||||
* Used for regular files only.
|
||||
*/
|
||||
u64 last_reflink_trans;
|
||||
|
||||
/*
|
||||
* Number of bytes outstanding that are going to need csums. This is
|
||||
* used in ENOSPC accounting. Protected by 'lock'.
|
||||
*/
|
||||
u64 csum_bytes;
|
||||
/*
|
||||
* In case this a root stub inode (BTRFS_INODE_ROOT_STUB flag set),
|
||||
* the ID of that root.
|
||||
*/
|
||||
u64 ref_root_id;
|
||||
};
|
||||
|
||||
/* Backwards incompatible flags, lower half of inode_item::flags */
|
||||
u32 flags;
|
||||
@ -331,10 +375,9 @@ static inline unsigned long btrfs_inode_hash(u64 objectid,
|
||||
*/
|
||||
static inline u64 btrfs_ino(const struct btrfs_inode *inode)
|
||||
{
|
||||
u64 ino = inode->location.objectid;
|
||||
u64 ino = inode->objectid;
|
||||
|
||||
/* type == BTRFS_ROOT_ITEM_KEY: subvol dir */
|
||||
if (inode->location.type == BTRFS_ROOT_ITEM_KEY)
|
||||
if (test_bit(BTRFS_INODE_ROOT_STUB, &inode->runtime_flags))
|
||||
ino = inode->vfs_inode.i_ino;
|
||||
return ino;
|
||||
}
|
||||
@ -348,20 +391,36 @@ static inline u64 btrfs_ino(const struct btrfs_inode *inode)
|
||||
|
||||
#endif
|
||||
|
||||
static inline void btrfs_get_inode_key(const struct btrfs_inode *inode,
|
||||
struct btrfs_key *key)
|
||||
{
|
||||
key->objectid = btrfs_ino(inode);
|
||||
key->type = BTRFS_INODE_ITEM_KEY;
|
||||
key->offset = 0;
|
||||
}
|
||||
|
||||
static inline void btrfs_set_inode_number(struct btrfs_inode *inode, u64 ino)
|
||||
{
|
||||
#if BITS_PER_LONG == 32
|
||||
inode->objectid = ino;
|
||||
#endif
|
||||
inode->vfs_inode.i_ino = ino;
|
||||
}
|
||||
|
||||
static inline void btrfs_i_size_write(struct btrfs_inode *inode, u64 size)
|
||||
{
|
||||
i_size_write(&inode->vfs_inode, size);
|
||||
inode->disk_i_size = size;
|
||||
}
|
||||
|
||||
static inline bool btrfs_is_free_space_inode(struct btrfs_inode *inode)
|
||||
static inline bool btrfs_is_free_space_inode(const struct btrfs_inode *inode)
|
||||
{
|
||||
return test_bit(BTRFS_INODE_FREE_SPACE_INODE, &inode->runtime_flags);
|
||||
}
|
||||
|
||||
static inline bool is_data_inode(struct inode *inode)
|
||||
static inline bool is_data_inode(const struct btrfs_inode *inode)
|
||||
{
|
||||
return btrfs_ino(BTRFS_I(inode)) != BTRFS_BTREE_INODE_OBJECTID;
|
||||
return btrfs_ino(inode) != BTRFS_BTREE_INODE_OBJECTID;
|
||||
}
|
||||
|
||||
static inline void btrfs_mod_outstanding_extents(struct btrfs_inode *inode,
|
||||
@ -455,8 +514,8 @@ int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, struct page *page,
|
||||
bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev,
|
||||
u32 bio_offset, struct bio_vec *bv);
|
||||
noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
|
||||
u64 *orig_start, u64 *orig_block_len,
|
||||
u64 *ram_bytes, bool nowait, bool strict);
|
||||
struct btrfs_file_extent *file_extent,
|
||||
bool nowait, bool strict);
|
||||
|
||||
void btrfs_del_delalloc_inode(struct btrfs_inode *inode);
|
||||
struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry);
|
||||
@ -515,9 +574,9 @@ void btrfs_free_inode(struct inode *inode);
|
||||
int btrfs_drop_inode(struct inode *inode);
|
||||
int __init btrfs_init_cachep(void);
|
||||
void __cold btrfs_destroy_cachep(void);
|
||||
struct inode *btrfs_iget_path(struct super_block *s, u64 ino,
|
||||
struct btrfs_root *root, struct btrfs_path *path);
|
||||
struct inode *btrfs_iget(struct super_block *s, u64 ino, struct btrfs_root *root);
|
||||
struct inode *btrfs_iget_path(u64 ino, struct btrfs_root *root,
|
||||
struct btrfs_path *path);
|
||||
struct inode *btrfs_iget(u64 ino, struct btrfs_root *root);
|
||||
struct extent_map *btrfs_get_extent(struct btrfs_inode *inode,
|
||||
struct page *page, u64 start, u64 len);
|
||||
int btrfs_update_inode(struct btrfs_trans_handle *trans,
|
||||
@ -551,10 +610,6 @@ ssize_t btrfs_encoded_read(struct kiocb *iocb, struct iov_iter *iter,
|
||||
ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
|
||||
const struct btrfs_ioctl_encoded_io_args *encoded);
|
||||
|
||||
ssize_t btrfs_dio_read(struct kiocb *iocb, struct iov_iter *iter,
|
||||
size_t done_before);
|
||||
struct iomap_dio *btrfs_dio_write(struct kiocb *iocb, struct iov_iter *iter,
|
||||
size_t done_before);
|
||||
struct btrfs_inode *btrfs_find_first_inode(struct btrfs_root *root, u64 min_ino);
|
||||
|
||||
extern const struct dentry_operations btrfs_dentry_operations;
|
||||
@ -571,5 +626,10 @@ void btrfs_inode_unlock(struct btrfs_inode *inode, unsigned int ilock_flags);
|
||||
void btrfs_update_inode_bytes(struct btrfs_inode *inode, const u64 add_bytes,
|
||||
const u64 del_bytes);
|
||||
void btrfs_assert_inode_range_clean(struct btrfs_inode *inode, u64 start, u64 end);
|
||||
u64 btrfs_get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
|
||||
u64 num_bytes);
|
||||
struct extent_map *btrfs_create_io_em(struct btrfs_inode *inode, u64 start,
|
||||
const struct btrfs_file_extent *file_extent,
|
||||
int type);
|
||||
|
||||
#endif
|
||||
|
@ -261,7 +261,7 @@ void btrfs_free_compr_folio(struct folio *folio)
|
||||
folio_put(folio);
|
||||
}
|
||||
|
||||
static void end_bbio_comprssed_read(struct btrfs_bio *bbio)
|
||||
static void end_bbio_compressed_read(struct btrfs_bio *bbio)
|
||||
{
|
||||
struct compressed_bio *cb = to_compressed_bio(bbio);
|
||||
blk_status_t status = bbio->bio.bi_status;
|
||||
@ -334,7 +334,7 @@ static void btrfs_finish_compressed_write_work(struct work_struct *work)
|
||||
* This also calls the writeback end hooks for the file pages so that metadata
|
||||
* and checksums can be updated in the file.
|
||||
*/
|
||||
static void end_bbio_comprssed_write(struct btrfs_bio *bbio)
|
||||
static void end_bbio_compressed_write(struct btrfs_bio *bbio)
|
||||
{
|
||||
struct compressed_bio *cb = to_compressed_bio(bbio);
|
||||
struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
|
||||
@ -374,7 +374,7 @@ void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered,
|
||||
blk_opf_t write_flags,
|
||||
bool writeback)
|
||||
{
|
||||
struct btrfs_inode *inode = BTRFS_I(ordered->inode);
|
||||
struct btrfs_inode *inode = ordered->inode;
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
struct compressed_bio *cb;
|
||||
|
||||
@ -383,7 +383,7 @@ void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered,
|
||||
|
||||
cb = alloc_compressed_bio(inode, ordered->file_offset,
|
||||
REQ_OP_WRITE | write_flags,
|
||||
end_bbio_comprssed_write);
|
||||
end_bbio_compressed_write);
|
||||
cb->start = ordered->file_offset;
|
||||
cb->len = ordered->num_bytes;
|
||||
cb->compressed_folios = compressed_folios;
|
||||
@ -507,13 +507,15 @@ static noinline int add_ra_bio_pages(struct inode *inode,
|
||||
*/
|
||||
if (!em || cur < em->start ||
|
||||
(cur + fs_info->sectorsize > extent_map_end(em)) ||
|
||||
(em->block_start >> SECTOR_SHIFT) != orig_bio->bi_iter.bi_sector) {
|
||||
(extent_map_block_start(em) >> SECTOR_SHIFT) !=
|
||||
orig_bio->bi_iter.bi_sector) {
|
||||
free_extent_map(em);
|
||||
unlock_extent(tree, cur, page_end, NULL);
|
||||
unlock_page(page);
|
||||
put_page(page);
|
||||
break;
|
||||
}
|
||||
add_size = min(em->start + em->len, page_end + 1) - cur;
|
||||
free_extent_map(em);
|
||||
|
||||
if (page->index == end_index) {
|
||||
@ -526,7 +528,6 @@ static noinline int add_ra_bio_pages(struct inode *inode,
|
||||
}
|
||||
}
|
||||
|
||||
add_size = min(em->start + em->len, page_end + 1) - cur;
|
||||
ret = bio_add_page(orig_bio, page, add_size, offset_in_page(cur));
|
||||
if (ret != add_size) {
|
||||
unlock_extent(tree, cur, page_end, NULL);
|
||||
@ -585,12 +586,12 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio)
|
||||
}
|
||||
|
||||
ASSERT(extent_map_is_compressed(em));
|
||||
compressed_len = em->block_len;
|
||||
compressed_len = em->disk_num_bytes;
|
||||
|
||||
cb = alloc_compressed_bio(inode, file_offset, REQ_OP_READ,
|
||||
end_bbio_comprssed_read);
|
||||
end_bbio_compressed_read);
|
||||
|
||||
cb->start = em->orig_start;
|
||||
cb->start = em->start - em->offset;
|
||||
em_len = em->len;
|
||||
em_start = em->start;
|
||||
|
||||
@ -608,7 +609,7 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio)
|
||||
goto out_free_bio;
|
||||
}
|
||||
|
||||
ret2 = btrfs_alloc_folio_array(cb->nr_folios, cb->compressed_folios, 0);
|
||||
ret2 = btrfs_alloc_folio_array(cb->nr_folios, cb->compressed_folios);
|
||||
if (ret2) {
|
||||
ret = BLK_STS_RESOURCE;
|
||||
goto out_free_compressed_pages;
|
||||
@ -1506,7 +1507,7 @@ static void heuristic_collect_sample(struct inode *inode, u64 start, u64 end,
|
||||
*
|
||||
* Return non-zero if the compression should be done, 0 otherwise.
|
||||
*/
|
||||
int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end)
|
||||
int btrfs_compress_heuristic(struct btrfs_inode *inode, u64 start, u64 end)
|
||||
{
|
||||
struct list_head *ws_list = get_workspace(0, 0);
|
||||
struct heuristic_ws *ws;
|
||||
@ -1516,7 +1517,7 @@ int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end)
|
||||
|
||||
ws = list_entry(ws_list, struct heuristic_ws, list);
|
||||
|
||||
heuristic_collect_sample(inode, start, end, ws);
|
||||
heuristic_collect_sample(&inode->vfs_inode, start, end, ws);
|
||||
|
||||
if (sample_repeated_patterns(ws)) {
|
||||
ret = 1;
|
||||
|
@ -144,7 +144,7 @@ extern const struct btrfs_compress_op btrfs_zstd_compress;
|
||||
const char* btrfs_compress_type2str(enum btrfs_compression_type type);
|
||||
bool btrfs_compress_is_valid_type(const char *str, size_t len);
|
||||
|
||||
int btrfs_compress_heuristic(struct inode *inode, u64 start, u64 end);
|
||||
int btrfs_compress_heuristic(struct btrfs_inode *inode, u64 start, u64 end);
|
||||
|
||||
int btrfs_compress_filemap_get_folio(struct address_space *mapping, u64 start,
|
||||
struct folio **in_folio_ret);
|
||||
|
108
fs/btrfs/ctree.c
108
fs/btrfs/ctree.c
@ -321,7 +321,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
|
||||
WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
|
||||
trans->transid != fs_info->running_transaction->transid);
|
||||
WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
|
||||
trans->transid != root->last_trans);
|
||||
trans->transid != btrfs_get_root_last_trans(root));
|
||||
|
||||
level = btrfs_header_level(buf);
|
||||
if (level == 0)
|
||||
@ -417,7 +417,6 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
|
||||
u64 refs;
|
||||
u64 owner;
|
||||
u64 flags;
|
||||
u64 new_flags = 0;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
@ -462,8 +461,16 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
|
||||
}
|
||||
|
||||
owner = btrfs_header_owner(buf);
|
||||
BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID &&
|
||||
!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF));
|
||||
if (unlikely(owner == BTRFS_TREE_RELOC_OBJECTID &&
|
||||
!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF))) {
|
||||
btrfs_crit(fs_info,
|
||||
"found tree block at bytenr %llu level %d root %llu refs %llu flags %llx without full backref flag set",
|
||||
buf->start, btrfs_header_level(buf),
|
||||
btrfs_root_id(root), refs, flags);
|
||||
ret = -EUCLEAN;
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (refs > 1) {
|
||||
if ((owner == btrfs_root_id(root) ||
|
||||
@ -481,7 +488,10 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
|
||||
ret = btrfs_set_disk_extent_flags(trans, buf,
|
||||
BTRFS_BLOCK_FLAG_FULL_BACKREF);
|
||||
if (ret)
|
||||
return ret;
|
||||
} else {
|
||||
|
||||
if (btrfs_root_id(root) == BTRFS_TREE_RELOC_OBJECTID)
|
||||
@ -491,11 +501,6 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
if (new_flags != 0) {
|
||||
ret = btrfs_set_disk_extent_flags(trans, buf, new_flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
} else {
|
||||
if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
|
||||
if (btrfs_root_id(root) == BTRFS_TREE_RELOC_OBJECTID)
|
||||
@ -551,7 +556,7 @@ int btrfs_force_cow_block(struct btrfs_trans_handle *trans,
|
||||
WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
|
||||
trans->transid != fs_info->running_transaction->transid);
|
||||
WARN_ON(test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
|
||||
trans->transid != root->last_trans);
|
||||
trans->transid != btrfs_get_root_last_trans(root));
|
||||
|
||||
level = btrfs_header_level(buf);
|
||||
|
||||
@ -588,19 +593,15 @@ int btrfs_force_cow_block(struct btrfs_trans_handle *trans,
|
||||
|
||||
ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
|
||||
if (ret) {
|
||||
btrfs_tree_unlock(cow);
|
||||
free_extent_buffer(cow);
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
return ret;
|
||||
goto error_unlock_cow;
|
||||
}
|
||||
|
||||
if (test_bit(BTRFS_ROOT_SHAREABLE, &root->state)) {
|
||||
ret = btrfs_reloc_cow_block(trans, root, buf, cow);
|
||||
if (ret) {
|
||||
btrfs_tree_unlock(cow);
|
||||
free_extent_buffer(cow);
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
return ret;
|
||||
goto error_unlock_cow;
|
||||
}
|
||||
}
|
||||
|
||||
@ -612,27 +613,27 @@ int btrfs_force_cow_block(struct btrfs_trans_handle *trans,
|
||||
|
||||
ret = btrfs_tree_mod_log_insert_root(root->node, cow, true);
|
||||
if (ret < 0) {
|
||||
btrfs_tree_unlock(cow);
|
||||
free_extent_buffer(cow);
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
return ret;
|
||||
goto error_unlock_cow;
|
||||
}
|
||||
atomic_inc(&cow->refs);
|
||||
rcu_assign_pointer(root->node, cow);
|
||||
|
||||
btrfs_free_tree_block(trans, btrfs_root_id(root), buf,
|
||||
parent_start, last_ref);
|
||||
ret = btrfs_free_tree_block(trans, btrfs_root_id(root), buf,
|
||||
parent_start, last_ref);
|
||||
free_extent_buffer(buf);
|
||||
add_root_to_dirty_list(root);
|
||||
if (ret < 0) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto error_unlock_cow;
|
||||
}
|
||||
} else {
|
||||
WARN_ON(trans->transid != btrfs_header_generation(parent));
|
||||
ret = btrfs_tree_mod_log_insert_key(parent, parent_slot,
|
||||
BTRFS_MOD_LOG_KEY_REPLACE);
|
||||
if (ret) {
|
||||
btrfs_tree_unlock(cow);
|
||||
free_extent_buffer(cow);
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
return ret;
|
||||
goto error_unlock_cow;
|
||||
}
|
||||
btrfs_set_node_blockptr(parent, parent_slot,
|
||||
cow->start);
|
||||
@ -642,14 +643,16 @@ int btrfs_force_cow_block(struct btrfs_trans_handle *trans,
|
||||
if (last_ref) {
|
||||
ret = btrfs_tree_mod_log_free_eb(buf);
|
||||
if (ret) {
|
||||
btrfs_tree_unlock(cow);
|
||||
free_extent_buffer(cow);
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
return ret;
|
||||
goto error_unlock_cow;
|
||||
}
|
||||
}
|
||||
btrfs_free_tree_block(trans, btrfs_root_id(root), buf,
|
||||
parent_start, last_ref);
|
||||
ret = btrfs_free_tree_block(trans, btrfs_root_id(root), buf,
|
||||
parent_start, last_ref);
|
||||
if (ret < 0) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto error_unlock_cow;
|
||||
}
|
||||
}
|
||||
if (unlock_orig)
|
||||
btrfs_tree_unlock(buf);
|
||||
@ -657,6 +660,11 @@ int btrfs_force_cow_block(struct btrfs_trans_handle *trans,
|
||||
btrfs_mark_buffer_dirty(trans, cow);
|
||||
*cow_ret = cow;
|
||||
return 0;
|
||||
|
||||
error_unlock_cow:
|
||||
btrfs_tree_unlock(cow);
|
||||
free_extent_buffer(cow);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int should_cow_block(struct btrfs_trans_handle *trans,
|
||||
@ -983,9 +991,13 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
|
||||
free_extent_buffer(mid);
|
||||
|
||||
root_sub_used_bytes(root);
|
||||
btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1);
|
||||
ret = btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1);
|
||||
/* once for the root ptr */
|
||||
free_extent_buffer_stale(mid);
|
||||
if (ret < 0) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
if (btrfs_header_nritems(mid) >
|
||||
@ -1053,10 +1065,14 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
|
||||
goto out;
|
||||
}
|
||||
root_sub_used_bytes(root);
|
||||
btrfs_free_tree_block(trans, btrfs_root_id(root), right,
|
||||
0, 1);
|
||||
ret = btrfs_free_tree_block(trans, btrfs_root_id(root),
|
||||
right, 0, 1);
|
||||
free_extent_buffer_stale(right);
|
||||
right = NULL;
|
||||
if (ret < 0) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
struct btrfs_disk_key right_key;
|
||||
btrfs_node_key(right, &right_key, 0);
|
||||
@ -1111,9 +1127,13 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
|
||||
goto out;
|
||||
}
|
||||
root_sub_used_bytes(root);
|
||||
btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1);
|
||||
ret = btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1);
|
||||
free_extent_buffer_stale(mid);
|
||||
mid = NULL;
|
||||
if (ret < 0) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
/* update the parent key to reflect our changes */
|
||||
struct btrfs_disk_key mid_key;
|
||||
@ -1551,12 +1571,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p,
|
||||
if (ret) {
|
||||
free_extent_buffer(tmp);
|
||||
btrfs_release_path(p);
|
||||
return -EIO;
|
||||
}
|
||||
if (btrfs_check_eb_owner(tmp, btrfs_root_id(root))) {
|
||||
free_extent_buffer(tmp);
|
||||
btrfs_release_path(p);
|
||||
return -EUCLEAN;
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (unlock_up)
|
||||
@ -2883,7 +2898,11 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
|
||||
old = root->node;
|
||||
ret = btrfs_tree_mod_log_insert_root(root->node, c, false);
|
||||
if (ret < 0) {
|
||||
btrfs_free_tree_block(trans, btrfs_root_id(root), c, 0, 1);
|
||||
int ret2;
|
||||
|
||||
ret2 = btrfs_free_tree_block(trans, btrfs_root_id(root), c, 0, 1);
|
||||
if (ret2 < 0)
|
||||
btrfs_abort_transaction(trans, ret2);
|
||||
btrfs_tree_unlock(c);
|
||||
free_extent_buffer(c);
|
||||
return ret;
|
||||
@ -4452,9 +4471,12 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
|
||||
root_sub_used_bytes(root);
|
||||
|
||||
atomic_inc(&leaf->refs);
|
||||
btrfs_free_tree_block(trans, btrfs_root_id(root), leaf, 0, 1);
|
||||
ret = btrfs_free_tree_block(trans, btrfs_root_id(root), leaf, 0, 1);
|
||||
free_extent_buffer_stale(leaf);
|
||||
return 0;
|
||||
if (ret < 0)
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
/*
|
||||
* delete the item at the leaf level in path. If that empties
|
||||
|
@ -221,9 +221,11 @@ struct btrfs_root {
|
||||
|
||||
struct list_head root_list;
|
||||
|
||||
spinlock_t inode_lock;
|
||||
/* red-black tree that keeps track of in-memory inodes */
|
||||
struct rb_root inode_tree;
|
||||
/*
|
||||
* Xarray that keeps track of in-memory inodes, protected by the lock
|
||||
* @inode_lock.
|
||||
*/
|
||||
struct xarray inodes;
|
||||
|
||||
/*
|
||||
* Xarray that keeps track of delayed nodes of every inode, protected
|
||||
@ -354,6 +356,16 @@ static inline void btrfs_set_root_last_log_commit(struct btrfs_root *root, int c
|
||||
WRITE_ONCE(root->last_log_commit, commit_id);
|
||||
}
|
||||
|
||||
static inline u64 btrfs_get_root_last_trans(const struct btrfs_root *root)
|
||||
{
|
||||
return READ_ONCE(root->last_trans);
|
||||
}
|
||||
|
||||
static inline void btrfs_set_root_last_trans(struct btrfs_root *root, u64 transid)
|
||||
{
|
||||
WRITE_ONCE(root->last_trans, transid);
|
||||
}
|
||||
|
||||
/*
|
||||
* Structure that conveys information about an extent that is going to replace
|
||||
* all the extents in a file range.
|
||||
|
@ -139,7 +139,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
|
||||
if (trans)
|
||||
transid = trans->transid;
|
||||
else
|
||||
transid = inode->root->last_trans;
|
||||
transid = btrfs_get_root_last_trans(root);
|
||||
|
||||
defrag = kmem_cache_zalloc(btrfs_inode_defrag_cachep, GFP_NOFS);
|
||||
if (!defrag)
|
||||
@ -255,7 +255,7 @@ again:
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
inode = btrfs_iget(fs_info->sb, defrag->ino, inode_root);
|
||||
inode = btrfs_iget(defrag->ino, inode_root);
|
||||
btrfs_put_root(inode_root);
|
||||
if (IS_ERR(inode)) {
|
||||
ret = PTR_ERR(inode);
|
||||
@ -707,8 +707,10 @@ iterate:
|
||||
*/
|
||||
if (key.offset > start) {
|
||||
em->start = start;
|
||||
em->orig_start = start;
|
||||
em->block_start = EXTENT_MAP_HOLE;
|
||||
em->disk_bytenr = EXTENT_MAP_HOLE;
|
||||
em->disk_num_bytes = 0;
|
||||
em->ram_bytes = 0;
|
||||
em->offset = 0;
|
||||
em->len = key.offset - start;
|
||||
break;
|
||||
}
|
||||
@ -825,7 +827,7 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em,
|
||||
*/
|
||||
next = defrag_lookup_extent(inode, em->start + em->len, newer_than, locked);
|
||||
/* No more em or hole */
|
||||
if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)
|
||||
if (!next || next->disk_bytenr >= EXTENT_MAP_LAST_BYTE)
|
||||
goto out;
|
||||
if (next->flags & EXTENT_FLAG_PREALLOC)
|
||||
goto out;
|
||||
@ -992,12 +994,12 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
|
||||
* This is for users who want to convert inline extents to
|
||||
* regular ones through max_inline= mount option.
|
||||
*/
|
||||
if (em->block_start == EXTENT_MAP_INLINE &&
|
||||
if (em->disk_bytenr == EXTENT_MAP_INLINE &&
|
||||
em->len <= inode->root->fs_info->max_inline)
|
||||
goto next;
|
||||
|
||||
/* Skip holes and preallocated extents. */
|
||||
if (em->block_start == EXTENT_MAP_HOLE ||
|
||||
if (em->disk_bytenr == EXTENT_MAP_HOLE ||
|
||||
(em->flags & EXTENT_FLAG_PREALLOC))
|
||||
goto next;
|
||||
|
||||
@ -1062,7 +1064,7 @@ static int defrag_collect_targets(struct btrfs_inode *inode,
|
||||
* So if an inline extent passed all above checks, just add it
|
||||
* for defrag, and be converted to regular extents.
|
||||
*/
|
||||
if (em->block_start == EXTENT_MAP_INLINE)
|
||||
if (em->disk_bytenr == EXTENT_MAP_INLINE)
|
||||
goto add;
|
||||
|
||||
next_mergeable = defrag_check_next_extent(&inode->vfs_inode, em,
|
||||
|
@ -111,7 +111,7 @@
|
||||
* making error handling and cleanup easier.
|
||||
*/
|
||||
|
||||
int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes)
|
||||
int btrfs_alloc_data_chunk_ondemand(const struct btrfs_inode *inode, u64 bytes)
|
||||
{
|
||||
struct btrfs_root *root = inode->root;
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
|
@ -9,7 +9,7 @@ struct extent_changeset;
|
||||
struct btrfs_inode;
|
||||
struct btrfs_fs_info;
|
||||
|
||||
int btrfs_alloc_data_chunk_ondemand(struct btrfs_inode *inode, u64 bytes);
|
||||
int btrfs_alloc_data_chunk_ondemand(const struct btrfs_inode *inode, u64 bytes);
|
||||
int btrfs_check_data_free_space(struct btrfs_inode *inode,
|
||||
struct extent_changeset **reserved, u64 start, u64 len,
|
||||
bool noflush);
|
||||
|
@ -77,14 +77,14 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
|
||||
return node;
|
||||
}
|
||||
|
||||
spin_lock(&root->inode_lock);
|
||||
xa_lock(&root->delayed_nodes);
|
||||
node = xa_load(&root->delayed_nodes, ino);
|
||||
|
||||
if (node) {
|
||||
if (btrfs_inode->delayed_node) {
|
||||
refcount_inc(&node->refs); /* can be accessed */
|
||||
BUG_ON(btrfs_inode->delayed_node != node);
|
||||
spin_unlock(&root->inode_lock);
|
||||
xa_unlock(&root->delayed_nodes);
|
||||
return node;
|
||||
}
|
||||
|
||||
@ -111,10 +111,10 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
|
||||
node = NULL;
|
||||
}
|
||||
|
||||
spin_unlock(&root->inode_lock);
|
||||
xa_unlock(&root->delayed_nodes);
|
||||
return node;
|
||||
}
|
||||
spin_unlock(&root->inode_lock);
|
||||
xa_unlock(&root->delayed_nodes);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
@ -148,21 +148,21 @@ again:
|
||||
kmem_cache_free(delayed_node_cache, node);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
spin_lock(&root->inode_lock);
|
||||
xa_lock(&root->delayed_nodes);
|
||||
ptr = xa_load(&root->delayed_nodes, ino);
|
||||
if (ptr) {
|
||||
/* Somebody inserted it, go back and read it. */
|
||||
spin_unlock(&root->inode_lock);
|
||||
xa_unlock(&root->delayed_nodes);
|
||||
kmem_cache_free(delayed_node_cache, node);
|
||||
node = NULL;
|
||||
goto again;
|
||||
}
|
||||
ptr = xa_store(&root->delayed_nodes, ino, node, GFP_ATOMIC);
|
||||
ptr = __xa_store(&root->delayed_nodes, ino, node, GFP_ATOMIC);
|
||||
ASSERT(xa_err(ptr) != -EINVAL);
|
||||
ASSERT(xa_err(ptr) != -ENOMEM);
|
||||
ASSERT(ptr == NULL);
|
||||
btrfs_inode->delayed_node = node;
|
||||
spin_unlock(&root->inode_lock);
|
||||
xa_unlock(&root->delayed_nodes);
|
||||
|
||||
return node;
|
||||
}
|
||||
@ -275,14 +275,12 @@ static void __btrfs_release_delayed_node(
|
||||
if (refcount_dec_and_test(&delayed_node->refs)) {
|
||||
struct btrfs_root *root = delayed_node->root;
|
||||
|
||||
spin_lock(&root->inode_lock);
|
||||
xa_erase(&root->delayed_nodes, delayed_node->inode_id);
|
||||
/*
|
||||
* Once our refcount goes to zero, nobody is allowed to bump it
|
||||
* back up. We can delete it now.
|
||||
*/
|
||||
ASSERT(refcount_read(&delayed_node->refs) == 0);
|
||||
xa_erase(&root->delayed_nodes, delayed_node->inode_id);
|
||||
spin_unlock(&root->inode_lock);
|
||||
kmem_cache_free(delayed_node_cache, delayed_node);
|
||||
}
|
||||
}
|
||||
@ -1471,7 +1469,7 @@ static void btrfs_release_dir_index_item_space(struct btrfs_trans_handle *trans)
|
||||
int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
|
||||
const char *name, int name_len,
|
||||
struct btrfs_inode *dir,
|
||||
struct btrfs_disk_key *disk_key, u8 flags,
|
||||
const struct btrfs_disk_key *disk_key, u8 flags,
|
||||
u64 index)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
@ -1684,7 +1682,7 @@ int btrfs_inode_delayed_dir_index_count(struct btrfs_inode *inode)
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool btrfs_readdir_get_delayed_items(struct inode *inode,
|
||||
bool btrfs_readdir_get_delayed_items(struct btrfs_inode *inode,
|
||||
u64 last_index,
|
||||
struct list_head *ins_list,
|
||||
struct list_head *del_list)
|
||||
@ -1692,7 +1690,7 @@ bool btrfs_readdir_get_delayed_items(struct inode *inode,
|
||||
struct btrfs_delayed_node *delayed_node;
|
||||
struct btrfs_delayed_item *item;
|
||||
|
||||
delayed_node = btrfs_get_delayed_node(BTRFS_I(inode));
|
||||
delayed_node = btrfs_get_delayed_node(inode);
|
||||
if (!delayed_node)
|
||||
return false;
|
||||
|
||||
@ -1700,8 +1698,8 @@ bool btrfs_readdir_get_delayed_items(struct inode *inode,
|
||||
* We can only do one readdir with delayed items at a time because of
|
||||
* item->readdir_list.
|
||||
*/
|
||||
btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_SHARED);
|
||||
btrfs_inode_lock(BTRFS_I(inode), 0);
|
||||
btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED);
|
||||
btrfs_inode_lock(inode, 0);
|
||||
|
||||
mutex_lock(&delayed_node->mutex);
|
||||
item = __btrfs_first_delayed_insertion_item(delayed_node);
|
||||
@ -1732,7 +1730,7 @@ bool btrfs_readdir_get_delayed_items(struct inode *inode,
|
||||
return true;
|
||||
}
|
||||
|
||||
void btrfs_readdir_put_delayed_items(struct inode *inode,
|
||||
void btrfs_readdir_put_delayed_items(struct btrfs_inode *inode,
|
||||
struct list_head *ins_list,
|
||||
struct list_head *del_list)
|
||||
{
|
||||
@ -1754,10 +1752,10 @@ void btrfs_readdir_put_delayed_items(struct inode *inode,
|
||||
* The VFS is going to do up_read(), so we need to downgrade back to a
|
||||
* read lock.
|
||||
*/
|
||||
downgrade_write(&inode->i_rwsem);
|
||||
downgrade_write(&inode->vfs_inode.i_rwsem);
|
||||
}
|
||||
|
||||
int btrfs_should_delete_dir_index(struct list_head *del_list,
|
||||
int btrfs_should_delete_dir_index(const struct list_head *del_list,
|
||||
u64 index)
|
||||
{
|
||||
struct btrfs_delayed_item *curr;
|
||||
@ -1778,7 +1776,7 @@ int btrfs_should_delete_dir_index(struct list_head *del_list,
|
||||
* Read dir info stored in the delayed tree.
|
||||
*/
|
||||
int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
|
||||
struct list_head *ins_list)
|
||||
const struct list_head *ins_list)
|
||||
{
|
||||
struct btrfs_dir_item *di;
|
||||
struct btrfs_delayed_item *curr, *next;
|
||||
@ -1916,7 +1914,8 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
|
||||
BTRFS_I(inode)->i_otime_nsec = btrfs_stack_timespec_nsec(&inode_item->otime);
|
||||
|
||||
inode->i_generation = BTRFS_I(inode)->generation;
|
||||
BTRFS_I(inode)->index_cnt = (u64)-1;
|
||||
if (S_ISDIR(inode->i_mode))
|
||||
BTRFS_I(inode)->index_cnt = (u64)-1;
|
||||
|
||||
mutex_unlock(&delayed_node->mutex);
|
||||
btrfs_release_delayed_node(delayed_node);
|
||||
@ -2057,9 +2056,9 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
|
||||
struct btrfs_delayed_node *node;
|
||||
int count;
|
||||
|
||||
spin_lock(&root->inode_lock);
|
||||
xa_lock(&root->delayed_nodes);
|
||||
if (xa_empty(&root->delayed_nodes)) {
|
||||
spin_unlock(&root->inode_lock);
|
||||
xa_unlock(&root->delayed_nodes);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -2076,7 +2075,7 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
|
||||
if (count >= ARRAY_SIZE(delayed_nodes))
|
||||
break;
|
||||
}
|
||||
spin_unlock(&root->inode_lock);
|
||||
xa_unlock(&root->delayed_nodes);
|
||||
index++;
|
||||
|
||||
for (int i = 0; i < count; i++) {
|
||||
|
@ -110,7 +110,7 @@ void btrfs_init_delayed_root(struct btrfs_delayed_root *delayed_root);
|
||||
int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
|
||||
const char *name, int name_len,
|
||||
struct btrfs_inode *dir,
|
||||
struct btrfs_disk_key *disk_key, u8 flags,
|
||||
const struct btrfs_disk_key *disk_key, u8 flags,
|
||||
u64 index);
|
||||
|
||||
int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
|
||||
@ -143,17 +143,17 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
|
||||
void btrfs_destroy_delayed_inodes(struct btrfs_fs_info *fs_info);
|
||||
|
||||
/* Used for readdir() */
|
||||
bool btrfs_readdir_get_delayed_items(struct inode *inode,
|
||||
bool btrfs_readdir_get_delayed_items(struct btrfs_inode *inode,
|
||||
u64 last_index,
|
||||
struct list_head *ins_list,
|
||||
struct list_head *del_list);
|
||||
void btrfs_readdir_put_delayed_items(struct inode *inode,
|
||||
void btrfs_readdir_put_delayed_items(struct btrfs_inode *inode,
|
||||
struct list_head *ins_list,
|
||||
struct list_head *del_list);
|
||||
int btrfs_should_delete_dir_index(struct list_head *del_list,
|
||||
int btrfs_should_delete_dir_index(const struct list_head *del_list,
|
||||
u64 index);
|
||||
int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
|
||||
struct list_head *ins_list);
|
||||
const struct list_head *ins_list);
|
||||
|
||||
/* Used during directory logging. */
|
||||
void btrfs_log_get_delayed_items(struct btrfs_inode *inode,
|
||||
|
@ -194,48 +194,6 @@ void btrfs_dec_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info)
|
||||
0, released, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Transfer bytes to our delayed refs rsv.
|
||||
*
|
||||
* @fs_info: the filesystem
|
||||
* @num_bytes: number of bytes to transfer
|
||||
*
|
||||
* This transfers up to the num_bytes amount, previously reserved, to the
|
||||
* delayed_refs_rsv. Any extra bytes are returned to the space info.
|
||||
*/
|
||||
void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
|
||||
u64 num_bytes)
|
||||
{
|
||||
struct btrfs_block_rsv *delayed_refs_rsv = &fs_info->delayed_refs_rsv;
|
||||
u64 to_free = 0;
|
||||
|
||||
spin_lock(&delayed_refs_rsv->lock);
|
||||
if (delayed_refs_rsv->size > delayed_refs_rsv->reserved) {
|
||||
u64 delta = delayed_refs_rsv->size -
|
||||
delayed_refs_rsv->reserved;
|
||||
if (num_bytes > delta) {
|
||||
to_free = num_bytes - delta;
|
||||
num_bytes = delta;
|
||||
}
|
||||
} else {
|
||||
to_free = num_bytes;
|
||||
num_bytes = 0;
|
||||
}
|
||||
|
||||
if (num_bytes)
|
||||
delayed_refs_rsv->reserved += num_bytes;
|
||||
if (delayed_refs_rsv->reserved >= delayed_refs_rsv->size)
|
||||
delayed_refs_rsv->full = true;
|
||||
spin_unlock(&delayed_refs_rsv->lock);
|
||||
|
||||
if (num_bytes)
|
||||
trace_btrfs_space_reservation(fs_info, "delayed_refs_rsv",
|
||||
0, num_bytes, 1);
|
||||
if (to_free)
|
||||
btrfs_space_info_free_bytes_may_use(fs_info,
|
||||
delayed_refs_rsv->space_info, to_free);
|
||||
}
|
||||
|
||||
/*
|
||||
* Refill based on our delayed refs usage.
|
||||
*
|
||||
@ -861,6 +819,12 @@ static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref,
|
||||
spin_lock_init(&head_ref->lock);
|
||||
mutex_init(&head_ref->mutex);
|
||||
|
||||
/* If not metadata set an impossible level to help debugging. */
|
||||
if (generic_ref->type == BTRFS_REF_METADATA)
|
||||
head_ref->level = generic_ref->tree_ref.level;
|
||||
else
|
||||
head_ref->level = U8_MAX;
|
||||
|
||||
if (qrecord) {
|
||||
if (generic_ref->ref_root && reserved) {
|
||||
qrecord->data_rsv = reserved;
|
||||
@ -1114,7 +1078,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
|
||||
}
|
||||
|
||||
int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
|
||||
u64 bytenr, u64 num_bytes,
|
||||
u64 bytenr, u64 num_bytes, u8 level,
|
||||
struct btrfs_delayed_extent_op *extent_op)
|
||||
{
|
||||
struct btrfs_delayed_ref_head *head_ref;
|
||||
@ -1124,6 +1088,7 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
|
||||
.action = BTRFS_UPDATE_DELAYED_HEAD,
|
||||
.bytenr = bytenr,
|
||||
.num_bytes = num_bytes,
|
||||
.tree_ref.level = level,
|
||||
};
|
||||
|
||||
head_ref = kmem_cache_alloc(btrfs_delayed_ref_head_cachep, GFP_NOFS);
|
||||
|
@ -108,7 +108,6 @@ struct btrfs_delayed_ref_node {
|
||||
|
||||
struct btrfs_delayed_extent_op {
|
||||
struct btrfs_disk_key key;
|
||||
u8 level;
|
||||
bool update_key;
|
||||
bool update_flags;
|
||||
u64 flags_to_set;
|
||||
@ -172,6 +171,9 @@ struct btrfs_delayed_ref_head {
|
||||
*/
|
||||
u64 reserved_bytes;
|
||||
|
||||
/* Tree block level, for metadata only. */
|
||||
u8 level;
|
||||
|
||||
/*
|
||||
* when a new extent is allocated, it is just reserved in memory
|
||||
* The actual extent isn't inserted into the extent allocation tree
|
||||
@ -355,7 +357,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_ref *generic_ref,
|
||||
u64 reserved);
|
||||
int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans,
|
||||
u64 bytenr, u64 num_bytes,
|
||||
u64 bytenr, u64 num_bytes, u8 level,
|
||||
struct btrfs_delayed_extent_op *extent_op);
|
||||
void btrfs_merge_delayed_refs(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_delayed_ref_root *delayed_refs,
|
||||
@ -386,8 +388,6 @@ void btrfs_inc_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_dec_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info,
|
||||
enum btrfs_reserve_flush_enum flush);
|
||||
void btrfs_migrate_to_delayed_refs_rsv(struct btrfs_fs_info *fs_info,
|
||||
u64 num_bytes);
|
||||
bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info);
|
||||
|
||||
static inline u64 btrfs_delayed_ref_owner(struct btrfs_delayed_ref_node *node)
|
||||
|
@ -684,7 +684,7 @@ static int btrfs_dev_replace_start(struct btrfs_fs_info *fs_info,
|
||||
if (ret)
|
||||
btrfs_err(fs_info, "kobj add dev failed %d", ret);
|
||||
|
||||
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
|
||||
btrfs_wait_ordered_roots(fs_info, U64_MAX, NULL);
|
||||
|
||||
/*
|
||||
* Commit dev_replace state and reserve 1 item for it.
|
||||
@ -880,7 +880,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
|
||||
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
|
||||
return ret;
|
||||
}
|
||||
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
|
||||
btrfs_wait_ordered_roots(fs_info, U64_MAX, NULL);
|
||||
|
||||
/*
|
||||
* We have to use this loop approach because at this point src_device
|
||||
|
@ -22,7 +22,7 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
|
||||
*trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path,
|
||||
struct btrfs_key *cpu_key,
|
||||
const struct btrfs_key *cpu_key,
|
||||
u32 data_size,
|
||||
const char *name,
|
||||
int name_len)
|
||||
@ -108,7 +108,7 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
|
||||
*/
|
||||
int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
|
||||
const struct fscrypt_str *name, struct btrfs_inode *dir,
|
||||
struct btrfs_key *location, u8 type, u64 index)
|
||||
const struct btrfs_key *location, u8 type, u64 index)
|
||||
{
|
||||
int ret = 0;
|
||||
int ret2 = 0;
|
||||
@ -379,7 +379,7 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
|
||||
* for a specific name.
|
||||
*/
|
||||
struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_path *path,
|
||||
const struct btrfs_path *path,
|
||||
const char *name, int name_len)
|
||||
{
|
||||
struct btrfs_dir_item *dir_item;
|
||||
@ -417,7 +417,7 @@ struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_fs_info *fs_info,
|
||||
int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path,
|
||||
struct btrfs_dir_item *di)
|
||||
const struct btrfs_dir_item *di)
|
||||
{
|
||||
|
||||
struct extent_buffer *leaf;
|
||||
|
@ -17,7 +17,7 @@ int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
|
||||
const struct fscrypt_str *name);
|
||||
int btrfs_insert_dir_item(struct btrfs_trans_handle *trans,
|
||||
const struct fscrypt_str *name, struct btrfs_inode *dir,
|
||||
struct btrfs_key *location, u8 type, u64 index);
|
||||
const struct btrfs_key *location, u8 type, u64 index);
|
||||
struct btrfs_dir_item *btrfs_lookup_dir_item(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path, u64 dir,
|
||||
@ -33,7 +33,7 @@ struct btrfs_dir_item *btrfs_search_dir_index_item(struct btrfs_root *root,
|
||||
int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path,
|
||||
struct btrfs_dir_item *di);
|
||||
const struct btrfs_dir_item *di);
|
||||
int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root,
|
||||
struct btrfs_path *path, u64 objectid,
|
||||
@ -45,7 +45,7 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
|
||||
const char *name, u16 name_len,
|
||||
int mod);
|
||||
struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_path *path,
|
||||
const struct btrfs_path *path,
|
||||
const char *name,
|
||||
int name_len);
|
||||
|
||||
|
1052
fs/btrfs/direct-io.c
Normal file
1052
fs/btrfs/direct-io.c
Normal file
File diff suppressed because it is too large
Load Diff
14
fs/btrfs/direct-io.h
Normal file
14
fs/btrfs/direct-io.h
Normal file
@ -0,0 +1,14 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef BTRFS_DIRECT_IO_H
|
||||
#define BTRFS_DIRECT_IO_H
|
||||
|
||||
#include <linux/types.h>
|
||||
|
||||
int __init btrfs_init_dio(void);
|
||||
void __cold btrfs_destroy_dio(void);
|
||||
|
||||
ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from);
|
||||
ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to);
|
||||
|
||||
#endif /* BTRFS_DIRECT_IO_H */
|
@ -213,7 +213,7 @@ static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb,
|
||||
* structure for details.
|
||||
*/
|
||||
int btrfs_read_extent_buffer(struct extent_buffer *eb,
|
||||
struct btrfs_tree_parent_check *check)
|
||||
const struct btrfs_tree_parent_check *check)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = eb->fs_info;
|
||||
int failed = 0;
|
||||
@ -358,7 +358,7 @@ static bool check_tree_block_fsid(struct extent_buffer *eb)
|
||||
|
||||
/* Do basic extent buffer checks at read time */
|
||||
int btrfs_validate_extent_buffer(struct extent_buffer *eb,
|
||||
struct btrfs_tree_parent_check *check)
|
||||
const struct btrfs_tree_parent_check *check)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = eb->fs_info;
|
||||
u64 found_start;
|
||||
@ -367,6 +367,7 @@ int btrfs_validate_extent_buffer(struct extent_buffer *eb,
|
||||
u8 result[BTRFS_CSUM_SIZE];
|
||||
const u8 *header_csum;
|
||||
int ret = 0;
|
||||
const bool ignore_csum = btrfs_test_opt(fs_info, IGNOREMETACSUMS);
|
||||
|
||||
ASSERT(check);
|
||||
|
||||
@ -399,13 +400,16 @@ int btrfs_validate_extent_buffer(struct extent_buffer *eb,
|
||||
|
||||
if (memcmp(result, header_csum, csum_size) != 0) {
|
||||
btrfs_warn_rl(fs_info,
|
||||
"checksum verify failed on logical %llu mirror %u wanted " CSUM_FMT " found " CSUM_FMT " level %d",
|
||||
"checksum verify failed on logical %llu mirror %u wanted " CSUM_FMT " found " CSUM_FMT " level %d%s",
|
||||
eb->start, eb->read_mirror,
|
||||
CSUM_FMT_VALUE(csum_size, header_csum),
|
||||
CSUM_FMT_VALUE(csum_size, result),
|
||||
btrfs_header_level(eb));
|
||||
ret = -EUCLEAN;
|
||||
goto out;
|
||||
btrfs_header_level(eb),
|
||||
ignore_csum ? ", ignored" : "");
|
||||
if (!ignore_csum) {
|
||||
ret = -EUCLEAN;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (found_level != check->level) {
|
||||
@ -425,7 +429,7 @@ int btrfs_validate_extent_buffer(struct extent_buffer *eb,
|
||||
goto out;
|
||||
}
|
||||
if (check->has_first_key) {
|
||||
struct btrfs_key *expect_key = &check->first_key;
|
||||
const struct btrfs_key *expect_key = &check->first_key;
|
||||
struct btrfs_key found_key;
|
||||
|
||||
if (found_level)
|
||||
@ -635,10 +639,6 @@ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
|
||||
free_extent_buffer_stale(buf);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
if (btrfs_check_eb_owner(buf, check->owner_root)) {
|
||||
free_extent_buffer_stale(buf);
|
||||
return ERR_PTR(-EUCLEAN);
|
||||
}
|
||||
return buf;
|
||||
|
||||
}
|
||||
@ -658,11 +658,11 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
|
||||
root->state = 0;
|
||||
RB_CLEAR_NODE(&root->rb_node);
|
||||
|
||||
root->last_trans = 0;
|
||||
btrfs_set_root_last_trans(root, 0);
|
||||
root->free_objectid = 0;
|
||||
root->nr_delalloc_inodes = 0;
|
||||
root->nr_ordered_extents = 0;
|
||||
root->inode_tree = RB_ROOT;
|
||||
xa_init(&root->inodes);
|
||||
xa_init(&root->delayed_nodes);
|
||||
|
||||
btrfs_init_root_block_rsv(root);
|
||||
@ -674,7 +674,6 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
|
||||
INIT_LIST_HEAD(&root->ordered_extents);
|
||||
INIT_LIST_HEAD(&root->ordered_root);
|
||||
INIT_LIST_HEAD(&root->reloc_dirty_list);
|
||||
spin_lock_init(&root->inode_lock);
|
||||
spin_lock_init(&root->delalloc_lock);
|
||||
spin_lock_init(&root->ordered_extent_lock);
|
||||
spin_lock_init(&root->accounting_lock);
|
||||
@ -847,13 +846,6 @@ struct btrfs_root *btrfs_extent_root(struct btrfs_fs_info *fs_info, u64 bytenr)
|
||||
return btrfs_global_root(fs_info, &key);
|
||||
}
|
||||
|
||||
struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE))
|
||||
return fs_info->block_group_root;
|
||||
return btrfs_extent_root(fs_info, 0);
|
||||
}
|
||||
|
||||
struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
|
||||
u64 objectid)
|
||||
{
|
||||
@ -1010,7 +1002,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
|
||||
return ret;
|
||||
}
|
||||
|
||||
log_root->last_trans = trans->transid;
|
||||
btrfs_set_root_last_trans(log_root, trans->transid);
|
||||
log_root->root_key.offset = btrfs_root_id(root);
|
||||
|
||||
inode_item = &log_root->root_item.inode;
|
||||
@ -1033,7 +1025,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
|
||||
|
||||
static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root,
|
||||
struct btrfs_path *path,
|
||||
struct btrfs_key *key)
|
||||
const struct btrfs_key *key)
|
||||
{
|
||||
struct btrfs_root *root;
|
||||
struct btrfs_tree_parent_check check = { 0 };
|
||||
@ -1095,7 +1087,7 @@ fail:
|
||||
}
|
||||
|
||||
struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
|
||||
struct btrfs_key *key)
|
||||
const struct btrfs_key *key)
|
||||
{
|
||||
struct btrfs_root *root;
|
||||
struct btrfs_path *path;
|
||||
@ -1230,7 +1222,7 @@ int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
|
||||
return ret;
|
||||
}
|
||||
|
||||
void btrfs_check_leaked_roots(struct btrfs_fs_info *fs_info)
|
||||
void btrfs_check_leaked_roots(const struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
struct btrfs_root *root;
|
||||
@ -1854,7 +1846,8 @@ void btrfs_put_root(struct btrfs_root *root)
|
||||
return;
|
||||
|
||||
if (refcount_dec_and_test(&root->refs)) {
|
||||
WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
|
||||
if (WARN_ON(!xa_empty(&root->inodes)))
|
||||
xa_destroy(&root->inodes);
|
||||
WARN_ON(test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state));
|
||||
if (root->anon_dev)
|
||||
free_anon_bdev(root->anon_dev);
|
||||
@ -1928,7 +1921,7 @@ static int btrfs_init_btree_inode(struct super_block *sb)
|
||||
if (!inode)
|
||||
return -ENOMEM;
|
||||
|
||||
inode->i_ino = BTRFS_BTREE_INODE_OBJECTID;
|
||||
btrfs_set_inode_number(BTRFS_I(inode), BTRFS_BTREE_INODE_OBJECTID);
|
||||
set_nlink(inode, 1);
|
||||
/*
|
||||
* we set the i_size on the btree inode to the max possible int.
|
||||
@ -1939,15 +1932,11 @@ static int btrfs_init_btree_inode(struct super_block *sb)
|
||||
inode->i_mapping->a_ops = &btree_aops;
|
||||
mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
|
||||
|
||||
RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
|
||||
extent_io_tree_init(fs_info, &BTRFS_I(inode)->io_tree,
|
||||
IO_TREE_BTREE_INODE_IO);
|
||||
extent_map_tree_init(&BTRFS_I(inode)->extent_tree);
|
||||
|
||||
BTRFS_I(inode)->root = btrfs_grab_root(fs_info->tree_root);
|
||||
BTRFS_I(inode)->location.objectid = BTRFS_BTREE_INODE_OBJECTID;
|
||||
BTRFS_I(inode)->location.type = 0;
|
||||
BTRFS_I(inode)->location.offset = 0;
|
||||
set_bit(BTRFS_INODE_DUMMY, &BTRFS_I(inode)->runtime_flags);
|
||||
__insert_inode_hash(inode, hash);
|
||||
fs_info->btree_inode = inode;
|
||||
@ -2146,7 +2135,7 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
|
||||
/* If we have IGNOREDATACSUMS skip loading these roots. */
|
||||
if (objectid == BTRFS_CSUM_TREE_OBJECTID &&
|
||||
btrfs_test_opt(fs_info, IGNOREDATACSUMS)) {
|
||||
set_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
|
||||
set_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -2199,7 +2188,7 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
|
||||
|
||||
if (!found || ret) {
|
||||
if (objectid == BTRFS_CSUM_TREE_OBJECTID)
|
||||
set_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state);
|
||||
set_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state);
|
||||
|
||||
if (!btrfs_test_opt(fs_info, IGNOREBADROOTS))
|
||||
ret = ret ? ret : -ENOENT;
|
||||
@ -2350,21 +2339,29 @@ out:
|
||||
* 1, 2 2nd and 3rd backup copy
|
||||
* -1 skip bytenr check
|
||||
*/
|
||||
int btrfs_validate_super(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_super_block *sb, int mirror_num)
|
||||
int btrfs_validate_super(const struct btrfs_fs_info *fs_info,
|
||||
const struct btrfs_super_block *sb, int mirror_num)
|
||||
{
|
||||
u64 nodesize = btrfs_super_nodesize(sb);
|
||||
u64 sectorsize = btrfs_super_sectorsize(sb);
|
||||
int ret = 0;
|
||||
const bool ignore_flags = btrfs_test_opt(fs_info, IGNORESUPERFLAGS);
|
||||
|
||||
if (btrfs_super_magic(sb) != BTRFS_MAGIC) {
|
||||
btrfs_err(fs_info, "no valid FS found");
|
||||
ret = -EINVAL;
|
||||
}
|
||||
if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP) {
|
||||
btrfs_err(fs_info, "unrecognized or unsupported super flag: %llu",
|
||||
btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP);
|
||||
ret = -EINVAL;
|
||||
if ((btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP)) {
|
||||
if (!ignore_flags) {
|
||||
btrfs_err(fs_info,
|
||||
"unrecognized or unsupported super flag 0x%llx",
|
||||
btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP);
|
||||
ret = -EINVAL;
|
||||
} else {
|
||||
btrfs_info(fs_info,
|
||||
"unrecognized or unsupported super flags: 0x%llx, ignored",
|
||||
btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP);
|
||||
}
|
||||
}
|
||||
if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
|
||||
btrfs_err(fs_info, "tree_root level too big: %d >= %d",
|
||||
@ -2467,7 +2464,7 @@ int btrfs_validate_super(struct btrfs_fs_info *fs_info,
|
||||
(!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID) ||
|
||||
!btrfs_fs_incompat(fs_info, NO_HOLES))) {
|
||||
btrfs_err(fs_info,
|
||||
"block-group-tree feature requires fres-space-tree and no-holes");
|
||||
"block-group-tree feature requires free-space-tree and no-holes");
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
@ -2882,6 +2879,8 @@ static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block
|
||||
|
||||
if (sb_rdonly(sb))
|
||||
set_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state);
|
||||
if (btrfs_test_opt(fs_info, IGNOREMETACSUMS))
|
||||
set_bit(BTRFS_FS_STATE_SKIP_META_CSUMS, &fs_info->fs_state);
|
||||
|
||||
return btrfs_alloc_stripe_hash_table(fs_info);
|
||||
}
|
||||
@ -2927,22 +2926,22 @@ static int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
u64 root_objectid = 0;
|
||||
struct btrfs_root *gang[8];
|
||||
int i = 0;
|
||||
int err = 0;
|
||||
unsigned int ret = 0;
|
||||
int ret = 0;
|
||||
|
||||
while (1) {
|
||||
unsigned int found;
|
||||
|
||||
spin_lock(&fs_info->fs_roots_radix_lock);
|
||||
ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
|
||||
found = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
|
||||
(void **)gang, root_objectid,
|
||||
ARRAY_SIZE(gang));
|
||||
if (!ret) {
|
||||
if (!found) {
|
||||
spin_unlock(&fs_info->fs_roots_radix_lock);
|
||||
break;
|
||||
}
|
||||
root_objectid = btrfs_root_id(gang[ret - 1]) + 1;
|
||||
root_objectid = btrfs_root_id(gang[found - 1]) + 1;
|
||||
|
||||
for (i = 0; i < ret; i++) {
|
||||
for (int i = 0; i < found; i++) {
|
||||
/* Avoid to grab roots in dead_roots. */
|
||||
if (btrfs_root_refs(&gang[i]->root_item) == 0) {
|
||||
gang[i] = NULL;
|
||||
@ -2953,24 +2952,25 @@ static int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
|
||||
}
|
||||
spin_unlock(&fs_info->fs_roots_radix_lock);
|
||||
|
||||
for (i = 0; i < ret; i++) {
|
||||
for (int i = 0; i < found; i++) {
|
||||
if (!gang[i])
|
||||
continue;
|
||||
root_objectid = btrfs_root_id(gang[i]);
|
||||
err = btrfs_orphan_cleanup(gang[i]);
|
||||
if (err)
|
||||
goto out;
|
||||
/*
|
||||
* Continue to release the remaining roots after the first
|
||||
* error without cleanup and preserve the first error
|
||||
* for the return.
|
||||
*/
|
||||
if (!ret)
|
||||
ret = btrfs_orphan_cleanup(gang[i]);
|
||||
btrfs_put_root(gang[i]);
|
||||
}
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
root_objectid++;
|
||||
}
|
||||
out:
|
||||
/* Release the uncleaned roots due to error. */
|
||||
for (; i < ret; i++) {
|
||||
if (gang[i])
|
||||
btrfs_put_root(gang[i]);
|
||||
}
|
||||
return err;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3204,7 +3204,7 @@ int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount)
|
||||
}
|
||||
|
||||
int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices,
|
||||
char *options)
|
||||
const char *options)
|
||||
{
|
||||
u32 sectorsize;
|
||||
u32 nodesize;
|
||||
@ -4157,9 +4157,6 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
|
||||
|
||||
int btrfs_commit_super(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct btrfs_root *root = fs_info->tree_root;
|
||||
struct btrfs_trans_handle *trans;
|
||||
|
||||
mutex_lock(&fs_info->cleaner_mutex);
|
||||
btrfs_run_delayed_iputs(fs_info);
|
||||
mutex_unlock(&fs_info->cleaner_mutex);
|
||||
@ -4169,10 +4166,7 @@ int btrfs_commit_super(struct btrfs_fs_info *fs_info)
|
||||
down_write(&fs_info->cleanup_work_sem);
|
||||
up_write(&fs_info->cleanup_work_sem);
|
||||
|
||||
trans = btrfs_join_transaction(root);
|
||||
if (IS_ERR(trans))
|
||||
return PTR_ERR(trans);
|
||||
return btrfs_commit_transaction(trans);
|
||||
return btrfs_commit_current_transaction(fs_info->tree_root);
|
||||
}
|
||||
|
||||
static void warn_about_uncommitted_trans(struct btrfs_fs_info *fs_info)
|
||||
@ -4533,7 +4527,7 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
|
||||
* extents that haven't had their dirty pages IO start writeout yet
|
||||
* actually get run and error out properly.
|
||||
*/
|
||||
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
|
||||
btrfs_wait_ordered_roots(fs_info, U64_MAX, NULL);
|
||||
}
|
||||
|
||||
static void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
|
||||
|
@ -41,7 +41,7 @@ static inline u64 btrfs_sb_offset(int mirror)
|
||||
return BTRFS_SUPER_INFO_OFFSET;
|
||||
}
|
||||
|
||||
void btrfs_check_leaked_roots(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_check_leaked_roots(const struct btrfs_fs_info *fs_info);
|
||||
void btrfs_init_fs_info(struct btrfs_fs_info *fs_info);
|
||||
struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
|
||||
struct btrfs_tree_parent_check *check);
|
||||
@ -52,12 +52,11 @@ struct extent_buffer *btrfs_find_create_tree_block(
|
||||
int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
|
||||
const struct btrfs_super_block *disk_sb);
|
||||
int __cold open_ctree(struct super_block *sb,
|
||||
struct btrfs_fs_devices *fs_devices,
|
||||
char *options);
|
||||
int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_devices,
|
||||
const char *options);
|
||||
void __cold close_ctree(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_validate_super(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_super_block *sb, int mirror_num);
|
||||
int btrfs_validate_super(const struct btrfs_fs_info *fs_info,
|
||||
const struct btrfs_super_block *sb, int mirror_num);
|
||||
int btrfs_check_features(struct btrfs_fs_info *fs_info, bool is_rw_mount);
|
||||
int write_all_supers(struct btrfs_fs_info *fs_info, int max_mirrors);
|
||||
struct btrfs_super_block *btrfs_read_dev_super(struct block_device *bdev);
|
||||
@ -65,7 +64,7 @@ struct btrfs_super_block *btrfs_read_dev_one_super(struct block_device *bdev,
|
||||
int copy_num, bool drop_cache);
|
||||
int btrfs_commit_super(struct btrfs_fs_info *fs_info);
|
||||
struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
|
||||
struct btrfs_key *key);
|
||||
const struct btrfs_key *key);
|
||||
int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_root *root);
|
||||
void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
|
||||
@ -83,7 +82,6 @@ struct btrfs_root *btrfs_global_root(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_key *key);
|
||||
struct btrfs_root *btrfs_csum_root(struct btrfs_fs_info *fs_info, u64 bytenr);
|
||||
struct btrfs_root *btrfs_extent_root(struct btrfs_fs_info *fs_info, u64 bytenr);
|
||||
struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info);
|
||||
|
||||
void btrfs_free_fs_info(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info);
|
||||
@ -91,7 +89,7 @@ void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_root *root);
|
||||
int btrfs_validate_extent_buffer(struct extent_buffer *eb,
|
||||
struct btrfs_tree_parent_check *check);
|
||||
const struct btrfs_tree_parent_check *check);
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info);
|
||||
#endif
|
||||
@ -118,7 +116,7 @@ void btrfs_mark_buffer_dirty(struct btrfs_trans_handle *trans,
|
||||
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
|
||||
int atomic);
|
||||
int btrfs_read_extent_buffer(struct extent_buffer *buf,
|
||||
struct btrfs_tree_parent_check *check);
|
||||
const struct btrfs_tree_parent_check *check);
|
||||
|
||||
blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio);
|
||||
int btrfs_alloc_log_tree_node(struct btrfs_trans_handle *trans,
|
||||
|
@ -40,7 +40,7 @@ static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
|
||||
if (parent) {
|
||||
u64 parent_root_id;
|
||||
|
||||
fid->parent_objectid = BTRFS_I(parent)->location.objectid;
|
||||
fid->parent_objectid = btrfs_ino(BTRFS_I(parent));
|
||||
fid->parent_gen = parent->i_generation;
|
||||
parent_root_id = btrfs_root_id(BTRFS_I(parent)->root);
|
||||
|
||||
@ -84,7 +84,7 @@ struct dentry *btrfs_get_dentry(struct super_block *sb, u64 objectid,
|
||||
if (IS_ERR(root))
|
||||
return ERR_CAST(root);
|
||||
|
||||
inode = btrfs_iget(sb, objectid, root);
|
||||
inode = btrfs_iget(objectid, root);
|
||||
btrfs_put_root(root);
|
||||
if (IS_ERR(inode))
|
||||
return ERR_CAST(inode);
|
||||
@ -210,7 +210,7 @@ struct dentry *btrfs_get_parent(struct dentry *child)
|
||||
found_key.offset, 0);
|
||||
}
|
||||
|
||||
return d_obtain_alias(btrfs_iget(fs_info->sb, key.objectid, root));
|
||||
return d_obtain_alias(btrfs_iget(key.objectid, root));
|
||||
fail:
|
||||
btrfs_free_path(path);
|
||||
return ERR_PTR(ret);
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <trace/events/btrfs.h>
|
||||
#include "messages.h"
|
||||
#include "ctree.h"
|
||||
#include "extent_io.h"
|
||||
#include "extent-io-tree.h"
|
||||
#include "btrfs_inode.h"
|
||||
|
||||
@ -1084,6 +1085,9 @@ again:
|
||||
*/
|
||||
prealloc = alloc_extent_state(mask);
|
||||
}
|
||||
/* Optimistically preallocate the extent changeset ulist node. */
|
||||
if (changeset)
|
||||
extent_changeset_prealloc(changeset, mask);
|
||||
|
||||
spin_lock(&tree->lock);
|
||||
if (cached_state && *cached_state) {
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -127,10 +127,10 @@ struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans,
|
||||
u64 empty_size,
|
||||
u64 reloc_src_root,
|
||||
enum btrfs_lock_nesting nest);
|
||||
void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
|
||||
u64 root_id,
|
||||
struct extent_buffer *buf,
|
||||
u64 parent, int last_ref);
|
||||
int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
|
||||
u64 root_id,
|
||||
struct extent_buffer *buf,
|
||||
u64 parent, int last_ref);
|
||||
int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, u64 owner,
|
||||
u64 offset, u64 ram_bytes,
|
||||
|
1092
fs/btrfs/extent_io.c
1092
fs/btrfs/extent_io.c
File diff suppressed because it is too large
Load Diff
@ -215,6 +215,11 @@ static inline struct extent_changeset *extent_changeset_alloc(void)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void extent_changeset_prealloc(struct extent_changeset *changeset, gfp_t gfp_mask)
|
||||
{
|
||||
ulist_prealloc(&changeset->range_changed, gfp_mask);
|
||||
}
|
||||
|
||||
static inline void extent_changeset_release(struct extent_changeset *changeset)
|
||||
{
|
||||
if (!changeset)
|
||||
@ -235,15 +240,13 @@ bool try_release_extent_mapping(struct page *page, gfp_t mask);
|
||||
int try_release_extent_buffer(struct page *page);
|
||||
|
||||
int btrfs_read_folio(struct file *file, struct folio *folio);
|
||||
void extent_write_locked_range(struct inode *inode, struct page *locked_page,
|
||||
void extent_write_locked_range(struct inode *inode, const struct page *locked_page,
|
||||
u64 start, u64 end, struct writeback_control *wbc,
|
||||
bool pages_dirty);
|
||||
int btrfs_writepages(struct address_space *mapping, struct writeback_control *wbc);
|
||||
int btree_write_cache_pages(struct address_space *mapping,
|
||||
struct writeback_control *wbc);
|
||||
void btrfs_readahead(struct readahead_control *rac);
|
||||
int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
|
||||
u64 start, u64 len);
|
||||
int set_folio_extent_mapped(struct folio *folio);
|
||||
int set_page_extent_mapped(struct page *page);
|
||||
void clear_page_extent_mapped(struct page *page);
|
||||
@ -263,7 +266,7 @@ void free_extent_buffer_stale(struct extent_buffer *eb);
|
||||
#define WAIT_COMPLETE 1
|
||||
#define WAIT_PAGE_LOCK 2
|
||||
int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num,
|
||||
struct btrfs_tree_parent_check *parent_check);
|
||||
const struct btrfs_tree_parent_check *parent_check);
|
||||
void wait_on_extent_buffer_writeback(struct extent_buffer *eb);
|
||||
void btrfs_readahead_tree_block(struct btrfs_fs_info *fs_info,
|
||||
u64 bytenr, u64 owner_root, u64 gen, int level);
|
||||
@ -350,9 +353,8 @@ void extent_buffer_bitmap_clear(const struct extent_buffer *eb,
|
||||
void set_extent_buffer_dirty(struct extent_buffer *eb);
|
||||
void set_extent_buffer_uptodate(struct extent_buffer *eb);
|
||||
void clear_extent_buffer_uptodate(struct extent_buffer *eb);
|
||||
void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
|
||||
void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
|
||||
struct page *locked_page,
|
||||
const struct page *locked_page,
|
||||
struct extent_state **cached,
|
||||
u32 bits_to_clear, unsigned long page_ops);
|
||||
int extent_invalidate_folio(struct extent_io_tree *tree,
|
||||
@ -361,9 +363,8 @@ void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans,
|
||||
struct extent_buffer *buf);
|
||||
|
||||
int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array,
|
||||
gfp_t extra_gfp);
|
||||
int btrfs_alloc_folio_array(unsigned int nr_folios, struct folio **folio_array,
|
||||
gfp_t extra_gfp);
|
||||
bool nofail);
|
||||
int btrfs_alloc_folio_array(unsigned int nr_folios, struct folio **folio_array);
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
bool find_lock_delalloc_range(struct inode *inode,
|
||||
|
@ -33,7 +33,7 @@ void __cold extent_map_exit(void)
|
||||
*/
|
||||
void extent_map_tree_init(struct extent_map_tree *tree)
|
||||
{
|
||||
tree->map = RB_ROOT_CACHED;
|
||||
tree->root = RB_ROOT;
|
||||
INIT_LIST_HEAD(&tree->modified_extents);
|
||||
rwlock_init(&tree->lock);
|
||||
}
|
||||
@ -85,27 +85,24 @@ static void dec_evictable_extent_maps(struct btrfs_inode *inode)
|
||||
percpu_counter_dec(&fs_info->evictable_extent_maps);
|
||||
}
|
||||
|
||||
static int tree_insert(struct rb_root_cached *root, struct extent_map *em)
|
||||
static int tree_insert(struct rb_root *root, struct extent_map *em)
|
||||
{
|
||||
struct rb_node **p = &root->rb_root.rb_node;
|
||||
struct rb_node **p = &root->rb_node;
|
||||
struct rb_node *parent = NULL;
|
||||
struct extent_map *entry = NULL;
|
||||
struct rb_node *orig_parent = NULL;
|
||||
u64 end = range_end(em->start, em->len);
|
||||
bool leftmost = true;
|
||||
|
||||
while (*p) {
|
||||
parent = *p;
|
||||
entry = rb_entry(parent, struct extent_map, rb_node);
|
||||
|
||||
if (em->start < entry->start) {
|
||||
if (em->start < entry->start)
|
||||
p = &(*p)->rb_left;
|
||||
} else if (em->start >= extent_map_end(entry)) {
|
||||
else if (em->start >= extent_map_end(entry))
|
||||
p = &(*p)->rb_right;
|
||||
leftmost = false;
|
||||
} else {
|
||||
else
|
||||
return -EEXIST;
|
||||
}
|
||||
}
|
||||
|
||||
orig_parent = parent;
|
||||
@ -128,7 +125,7 @@ static int tree_insert(struct rb_root_cached *root, struct extent_map *em)
|
||||
return -EEXIST;
|
||||
|
||||
rb_link_node(&em->rb_node, orig_parent, p);
|
||||
rb_insert_color_cached(&em->rb_node, root, leftmost);
|
||||
rb_insert_color(&em->rb_node, root);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -186,11 +183,19 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline u64 extent_map_block_len(const struct extent_map *em)
|
||||
{
|
||||
if (extent_map_is_compressed(em))
|
||||
return em->disk_num_bytes;
|
||||
return em->len;
|
||||
}
|
||||
|
||||
static inline u64 extent_map_block_end(const struct extent_map *em)
|
||||
{
|
||||
if (em->block_start + em->block_len < em->block_start)
|
||||
if (extent_map_block_start(em) + extent_map_block_len(em) <
|
||||
extent_map_block_start(em))
|
||||
return (u64)-1;
|
||||
return em->block_start + em->block_len;
|
||||
return extent_map_block_start(em) + extent_map_block_len(em);
|
||||
}
|
||||
|
||||
static bool can_merge_extent_map(const struct extent_map *em)
|
||||
@ -225,15 +230,106 @@ static bool mergeable_maps(const struct extent_map *prev, const struct extent_ma
|
||||
if (prev->flags != next->flags)
|
||||
return false;
|
||||
|
||||
if (next->block_start < EXTENT_MAP_LAST_BYTE - 1)
|
||||
return next->block_start == extent_map_block_end(prev);
|
||||
if (next->disk_bytenr < EXTENT_MAP_LAST_BYTE - 1)
|
||||
return extent_map_block_start(next) == extent_map_block_end(prev);
|
||||
|
||||
/* HOLES and INLINE extents. */
|
||||
return next->block_start == prev->block_start;
|
||||
return next->disk_bytenr == prev->disk_bytenr;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle the on-disk data extents merge for @prev and @next.
|
||||
*
|
||||
* Only touches disk_bytenr/disk_num_bytes/offset/ram_bytes.
|
||||
* For now only uncompressed regular extent can be merged.
|
||||
*
|
||||
* @prev and @next will be both updated to point to the new merged range.
|
||||
* Thus one of them should be removed by the caller.
|
||||
*/
|
||||
static void merge_ondisk_extents(struct extent_map *prev, struct extent_map *next)
|
||||
{
|
||||
u64 new_disk_bytenr;
|
||||
u64 new_disk_num_bytes;
|
||||
u64 new_offset;
|
||||
|
||||
/* @prev and @next should not be compressed. */
|
||||
ASSERT(!extent_map_is_compressed(prev));
|
||||
ASSERT(!extent_map_is_compressed(next));
|
||||
|
||||
/*
|
||||
* There are two different cases where @prev and @next can be merged.
|
||||
*
|
||||
* 1) They are referring to the same data extent:
|
||||
*
|
||||
* |<----- data extent A ----->|
|
||||
* |<- prev ->|<- next ->|
|
||||
*
|
||||
* 2) They are referring to different data extents but still adjacent:
|
||||
*
|
||||
* |<-- data extent A -->|<-- data extent B -->|
|
||||
* |<- prev ->|<- next ->|
|
||||
*
|
||||
* The calculation here always merges the data extents first, then updates
|
||||
* @offset using the new data extents.
|
||||
*
|
||||
* For case 1), the merged data extent would be the same.
|
||||
* For case 2), we just merge the two data extents into one.
|
||||
*/
|
||||
new_disk_bytenr = min(prev->disk_bytenr, next->disk_bytenr);
|
||||
new_disk_num_bytes = max(prev->disk_bytenr + prev->disk_num_bytes,
|
||||
next->disk_bytenr + next->disk_num_bytes) -
|
||||
new_disk_bytenr;
|
||||
new_offset = prev->disk_bytenr + prev->offset - new_disk_bytenr;
|
||||
|
||||
prev->disk_bytenr = new_disk_bytenr;
|
||||
prev->disk_num_bytes = new_disk_num_bytes;
|
||||
prev->ram_bytes = new_disk_num_bytes;
|
||||
prev->offset = new_offset;
|
||||
|
||||
next->disk_bytenr = new_disk_bytenr;
|
||||
next->disk_num_bytes = new_disk_num_bytes;
|
||||
next->ram_bytes = new_disk_num_bytes;
|
||||
next->offset = new_offset;
|
||||
}
|
||||
|
||||
static void dump_extent_map(struct btrfs_fs_info *fs_info, const char *prefix,
|
||||
struct extent_map *em)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_BTRFS_DEBUG))
|
||||
return;
|
||||
btrfs_crit(fs_info,
|
||||
"%s, start=%llu len=%llu disk_bytenr=%llu disk_num_bytes=%llu ram_bytes=%llu offset=%llu flags=0x%x",
|
||||
prefix, em->start, em->len, em->disk_bytenr, em->disk_num_bytes,
|
||||
em->ram_bytes, em->offset, em->flags);
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
/* Internal sanity checks for btrfs debug builds. */
|
||||
static void validate_extent_map(struct btrfs_fs_info *fs_info, struct extent_map *em)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_BTRFS_DEBUG))
|
||||
return;
|
||||
if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE) {
|
||||
if (em->disk_num_bytes == 0)
|
||||
dump_extent_map(fs_info, "zero disk_num_bytes", em);
|
||||
if (em->offset + em->len > em->ram_bytes)
|
||||
dump_extent_map(fs_info, "ram_bytes too small", em);
|
||||
if (em->offset + em->len > em->disk_num_bytes &&
|
||||
!extent_map_is_compressed(em))
|
||||
dump_extent_map(fs_info, "disk_num_bytes too small", em);
|
||||
if (!extent_map_is_compressed(em) &&
|
||||
em->ram_bytes != em->disk_num_bytes)
|
||||
dump_extent_map(fs_info,
|
||||
"ram_bytes mismatch with disk_num_bytes for non-compressed em",
|
||||
em);
|
||||
} else if (em->offset) {
|
||||
dump_extent_map(fs_info, "non-zero offset for hole/inline", em);
|
||||
}
|
||||
}
|
||||
|
||||
static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
struct extent_map_tree *tree = &inode->extent_tree;
|
||||
struct extent_map *merge = NULL;
|
||||
struct rb_node *rb;
|
||||
@ -258,14 +354,15 @@ static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
|
||||
merge = rb_entry(rb, struct extent_map, rb_node);
|
||||
if (rb && can_merge_extent_map(merge) && mergeable_maps(merge, em)) {
|
||||
em->start = merge->start;
|
||||
em->orig_start = merge->orig_start;
|
||||
em->len += merge->len;
|
||||
em->block_len += merge->block_len;
|
||||
em->block_start = merge->block_start;
|
||||
em->generation = max(em->generation, merge->generation);
|
||||
|
||||
if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE)
|
||||
merge_ondisk_extents(merge, em);
|
||||
em->flags |= EXTENT_FLAG_MERGED;
|
||||
|
||||
rb_erase_cached(&merge->rb_node, &tree->map);
|
||||
validate_extent_map(fs_info, em);
|
||||
rb_erase(&merge->rb_node, &tree->root);
|
||||
RB_CLEAR_NODE(&merge->rb_node);
|
||||
free_extent_map(merge);
|
||||
dec_evictable_extent_maps(inode);
|
||||
@ -277,8 +374,10 @@ static void try_merge_map(struct btrfs_inode *inode, struct extent_map *em)
|
||||
merge = rb_entry(rb, struct extent_map, rb_node);
|
||||
if (rb && can_merge_extent_map(merge) && mergeable_maps(em, merge)) {
|
||||
em->len += merge->len;
|
||||
em->block_len += merge->block_len;
|
||||
rb_erase_cached(&merge->rb_node, &tree->map);
|
||||
if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE)
|
||||
merge_ondisk_extents(em, merge);
|
||||
validate_extent_map(fs_info, em);
|
||||
rb_erase(&merge->rb_node, &tree->root);
|
||||
RB_CLEAR_NODE(&merge->rb_node);
|
||||
em->generation = max(em->generation, merge->generation);
|
||||
em->flags |= EXTENT_FLAG_MERGED;
|
||||
@ -389,7 +488,8 @@ static int add_extent_mapping(struct btrfs_inode *inode,
|
||||
|
||||
lockdep_assert_held_write(&tree->lock);
|
||||
|
||||
ret = tree_insert(&tree->map, em);
|
||||
validate_extent_map(fs_info, em);
|
||||
ret = tree_insert(&tree->root, em);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@ -410,7 +510,7 @@ __lookup_extent_mapping(struct extent_map_tree *tree,
|
||||
struct rb_node *prev_or_next = NULL;
|
||||
u64 end = range_end(start, len);
|
||||
|
||||
rb_node = __tree_search(&tree->map.rb_root, start, &prev_or_next);
|
||||
rb_node = __tree_search(&tree->root, start, &prev_or_next);
|
||||
if (!rb_node) {
|
||||
if (prev_or_next)
|
||||
rb_node = prev_or_next;
|
||||
@ -479,7 +579,7 @@ void remove_extent_mapping(struct btrfs_inode *inode, struct extent_map *em)
|
||||
lockdep_assert_held_write(&tree->lock);
|
||||
|
||||
WARN_ON(em->flags & EXTENT_FLAG_PINNED);
|
||||
rb_erase_cached(&em->rb_node, &tree->map);
|
||||
rb_erase(&em->rb_node, &tree->root);
|
||||
if (!(em->flags & EXTENT_FLAG_LOGGING))
|
||||
list_del_init(&em->list);
|
||||
RB_CLEAR_NODE(&em->rb_node);
|
||||
@ -492,15 +592,18 @@ static void replace_extent_mapping(struct btrfs_inode *inode,
|
||||
struct extent_map *new,
|
||||
int modified)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
struct extent_map_tree *tree = &inode->extent_tree;
|
||||
|
||||
lockdep_assert_held_write(&tree->lock);
|
||||
|
||||
validate_extent_map(fs_info, new);
|
||||
|
||||
WARN_ON(cur->flags & EXTENT_FLAG_PINNED);
|
||||
ASSERT(extent_map_in_tree(cur));
|
||||
if (!(cur->flags & EXTENT_FLAG_LOGGING))
|
||||
list_del_init(&cur->list);
|
||||
rb_replace_node_cached(&cur->rb_node, &new->rb_node, &tree->map);
|
||||
rb_replace_node(&cur->rb_node, &new->rb_node, &tree->root);
|
||||
RB_CLEAR_NODE(&cur->rb_node);
|
||||
|
||||
setup_extent_mapping(inode, new, modified);
|
||||
@ -561,11 +664,8 @@ static noinline int merge_extent_mapping(struct btrfs_inode *inode,
|
||||
start_diff = start - em->start;
|
||||
em->start = start;
|
||||
em->len = end - start;
|
||||
if (em->block_start < EXTENT_MAP_LAST_BYTE &&
|
||||
!extent_map_is_compressed(em)) {
|
||||
em->block_start += start_diff;
|
||||
em->block_len = em->len;
|
||||
}
|
||||
if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE && !extent_map_is_compressed(em))
|
||||
em->offset += start_diff;
|
||||
return add_extent_mapping(inode, em, 0);
|
||||
}
|
||||
|
||||
@ -600,7 +700,7 @@ int btrfs_add_extent_mapping(struct btrfs_inode *inode,
|
||||
* Tree-checker should have rejected any inline extent with non-zero
|
||||
* file offset. Here just do a sanity check.
|
||||
*/
|
||||
if (em->block_start == EXTENT_MAP_INLINE)
|
||||
if (em->disk_bytenr == EXTENT_MAP_INLINE)
|
||||
ASSERT(em->start == 0);
|
||||
|
||||
ret = add_extent_mapping(inode, em, 0);
|
||||
@ -657,18 +757,23 @@ int btrfs_add_extent_mapping(struct btrfs_inode *inode,
|
||||
static void drop_all_extent_maps_fast(struct btrfs_inode *inode)
|
||||
{
|
||||
struct extent_map_tree *tree = &inode->extent_tree;
|
||||
struct rb_node *node;
|
||||
|
||||
write_lock(&tree->lock);
|
||||
while (!RB_EMPTY_ROOT(&tree->map.rb_root)) {
|
||||
node = rb_first(&tree->root);
|
||||
while (node) {
|
||||
struct extent_map *em;
|
||||
struct rb_node *node;
|
||||
struct rb_node *next = rb_next(node);
|
||||
|
||||
node = rb_first_cached(&tree->map);
|
||||
em = rb_entry(node, struct extent_map, rb_node);
|
||||
em->flags &= ~(EXTENT_FLAG_PINNED | EXTENT_FLAG_LOGGING);
|
||||
remove_extent_mapping(inode, em);
|
||||
free_extent_map(em);
|
||||
cond_resched_rwlock_write(&tree->lock);
|
||||
|
||||
if (cond_resched_rwlock_write(&tree->lock))
|
||||
node = rb_first(&tree->root);
|
||||
else
|
||||
node = next;
|
||||
}
|
||||
write_unlock(&tree->lock);
|
||||
}
|
||||
@ -729,7 +834,6 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
|
||||
u64 gen;
|
||||
unsigned long flags;
|
||||
bool modified;
|
||||
bool compressed;
|
||||
|
||||
if (em_end < end) {
|
||||
next_em = next_extent_map(em);
|
||||
@ -763,7 +867,6 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
|
||||
goto remove_em;
|
||||
|
||||
gen = em->generation;
|
||||
compressed = extent_map_is_compressed(em);
|
||||
|
||||
if (em->start < start) {
|
||||
if (!split) {
|
||||
@ -775,22 +878,15 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
|
||||
split->start = em->start;
|
||||
split->len = start - em->start;
|
||||
|
||||
if (em->block_start < EXTENT_MAP_LAST_BYTE) {
|
||||
split->orig_start = em->orig_start;
|
||||
split->block_start = em->block_start;
|
||||
|
||||
if (compressed)
|
||||
split->block_len = em->block_len;
|
||||
else
|
||||
split->block_len = split->len;
|
||||
split->orig_block_len = max(split->block_len,
|
||||
em->orig_block_len);
|
||||
if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE) {
|
||||
split->disk_bytenr = em->disk_bytenr;
|
||||
split->disk_num_bytes = em->disk_num_bytes;
|
||||
split->offset = em->offset;
|
||||
split->ram_bytes = em->ram_bytes;
|
||||
} else {
|
||||
split->orig_start = split->start;
|
||||
split->block_len = 0;
|
||||
split->block_start = em->block_start;
|
||||
split->orig_block_len = 0;
|
||||
split->disk_bytenr = em->disk_bytenr;
|
||||
split->disk_num_bytes = 0;
|
||||
split->offset = 0;
|
||||
split->ram_bytes = split->len;
|
||||
}
|
||||
|
||||
@ -810,30 +906,18 @@ void btrfs_drop_extent_map_range(struct btrfs_inode *inode, u64 start, u64 end,
|
||||
}
|
||||
split->start = end;
|
||||
split->len = em_end - end;
|
||||
split->block_start = em->block_start;
|
||||
split->disk_bytenr = em->disk_bytenr;
|
||||
split->flags = flags;
|
||||
split->generation = gen;
|
||||
|
||||
if (em->block_start < EXTENT_MAP_LAST_BYTE) {
|
||||
split->orig_block_len = max(em->block_len,
|
||||
em->orig_block_len);
|
||||
|
||||
if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE) {
|
||||
split->disk_num_bytes = em->disk_num_bytes;
|
||||
split->offset = em->offset + end - em->start;
|
||||
split->ram_bytes = em->ram_bytes;
|
||||
if (compressed) {
|
||||
split->block_len = em->block_len;
|
||||
split->orig_start = em->orig_start;
|
||||
} else {
|
||||
const u64 diff = end - em->start;
|
||||
|
||||
split->block_len = split->len;
|
||||
split->block_start += diff;
|
||||
split->orig_start = em->orig_start;
|
||||
}
|
||||
} else {
|
||||
split->disk_num_bytes = 0;
|
||||
split->offset = 0;
|
||||
split->ram_bytes = split->len;
|
||||
split->orig_start = split->start;
|
||||
split->block_len = 0;
|
||||
split->orig_block_len = 0;
|
||||
}
|
||||
|
||||
if (extent_map_in_tree(em)) {
|
||||
@ -976,7 +1060,7 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
|
||||
|
||||
ASSERT(em->len == len);
|
||||
ASSERT(!extent_map_is_compressed(em));
|
||||
ASSERT(em->block_start < EXTENT_MAP_LAST_BYTE);
|
||||
ASSERT(em->disk_bytenr < EXTENT_MAP_LAST_BYTE);
|
||||
ASSERT(em->flags & EXTENT_FLAG_PINNED);
|
||||
ASSERT(!(em->flags & EXTENT_FLAG_LOGGING));
|
||||
ASSERT(!list_empty(&em->list));
|
||||
@ -987,10 +1071,9 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
|
||||
/* First, replace the em with a new extent_map starting from * em->start */
|
||||
split_pre->start = em->start;
|
||||
split_pre->len = pre;
|
||||
split_pre->orig_start = split_pre->start;
|
||||
split_pre->block_start = new_logical;
|
||||
split_pre->block_len = split_pre->len;
|
||||
split_pre->orig_block_len = split_pre->block_len;
|
||||
split_pre->disk_bytenr = new_logical;
|
||||
split_pre->disk_num_bytes = split_pre->len;
|
||||
split_pre->offset = 0;
|
||||
split_pre->ram_bytes = split_pre->len;
|
||||
split_pre->flags = flags;
|
||||
split_pre->generation = em->generation;
|
||||
@ -1005,10 +1088,9 @@ int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
|
||||
/* Insert the middle extent_map. */
|
||||
split_mid->start = em->start + pre;
|
||||
split_mid->len = em->len - pre;
|
||||
split_mid->orig_start = split_mid->start;
|
||||
split_mid->block_start = em->block_start + pre;
|
||||
split_mid->block_len = split_mid->len;
|
||||
split_mid->orig_block_len = split_mid->block_len;
|
||||
split_mid->disk_bytenr = extent_map_block_start(em) + pre;
|
||||
split_mid->disk_num_bytes = split_mid->len;
|
||||
split_mid->offset = 0;
|
||||
split_mid->ram_bytes = split_mid->len;
|
||||
split_mid->flags = flags;
|
||||
split_mid->generation = em->generation;
|
||||
@ -1076,12 +1158,12 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, struct btrfs_em_shrink_c
|
||||
return 0;
|
||||
}
|
||||
|
||||
node = rb_first_cached(&tree->map);
|
||||
node = rb_first(&tree->root);
|
||||
while (node) {
|
||||
struct rb_node *next = rb_next(node);
|
||||
struct extent_map *em;
|
||||
|
||||
em = rb_entry(node, struct extent_map, rb_node);
|
||||
node = rb_next(node);
|
||||
ctx->scanned++;
|
||||
|
||||
if (em->flags & EXTENT_FLAG_PINNED)
|
||||
@ -1115,6 +1197,7 @@ next:
|
||||
*/
|
||||
if (need_resched() || rwlock_needbreak(&tree->lock))
|
||||
break;
|
||||
node = next;
|
||||
}
|
||||
write_unlock(&tree->lock);
|
||||
up_read(&inode->i_mmap_lock);
|
||||
|
@ -4,12 +4,11 @@
|
||||
#define BTRFS_EXTENT_MAP_H
|
||||
|
||||
#include <linux/compiler_types.h>
|
||||
#include <linux/rwlock_types.h>
|
||||
#include <linux/spinlock_types.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/refcount.h>
|
||||
#include "misc.h"
|
||||
#include "extent_map.h"
|
||||
#include "compression.h"
|
||||
|
||||
struct btrfs_inode;
|
||||
@ -62,19 +61,27 @@ struct extent_map {
|
||||
u64 len;
|
||||
|
||||
/*
|
||||
* The file offset of the original file extent before splitting.
|
||||
* The bytenr of the full on-disk extent.
|
||||
*
|
||||
* This is an in-memory only member, matching
|
||||
* extent_map::start - btrfs_file_extent_item::offset for
|
||||
* regular/preallocated extents. EXTENT_MAP_HOLE otherwise.
|
||||
* For regular extents it's btrfs_file_extent_item::disk_bytenr.
|
||||
* For holes it's EXTENT_MAP_HOLE and for inline extents it's
|
||||
* EXTENT_MAP_INLINE.
|
||||
*/
|
||||
u64 orig_start;
|
||||
u64 disk_bytenr;
|
||||
|
||||
/*
|
||||
* The full on-disk extent length, matching
|
||||
* btrfs_file_extent_item::disk_num_bytes.
|
||||
*/
|
||||
u64 orig_block_len;
|
||||
u64 disk_num_bytes;
|
||||
|
||||
/*
|
||||
* Offset inside the decompressed extent.
|
||||
*
|
||||
* For regular extents it's btrfs_file_extent_item::offset.
|
||||
* For holes and inline extents it's 0.
|
||||
*/
|
||||
u64 offset;
|
||||
|
||||
/*
|
||||
* The decompressed size of the whole on-disk extent, matching
|
||||
@ -82,27 +89,6 @@ struct extent_map {
|
||||
*/
|
||||
u64 ram_bytes;
|
||||
|
||||
/*
|
||||
* The on-disk logical bytenr for the file extent.
|
||||
*
|
||||
* For compressed extents it matches btrfs_file_extent_item::disk_bytenr.
|
||||
* For uncompressed extents it matches
|
||||
* btrfs_file_extent_item::disk_bytenr + btrfs_file_extent_item::offset
|
||||
*
|
||||
* For holes it is EXTENT_MAP_HOLE and for inline extents it is
|
||||
* EXTENT_MAP_INLINE.
|
||||
*/
|
||||
u64 block_start;
|
||||
|
||||
/*
|
||||
* The on-disk length for the file extent.
|
||||
*
|
||||
* For compressed extents it matches btrfs_file_extent_item::disk_num_bytes.
|
||||
* For uncompressed extents it matches extent_map::len.
|
||||
* For holes and inline extents it's -1 and shouldn't be used.
|
||||
*/
|
||||
u64 block_len;
|
||||
|
||||
/*
|
||||
* Generation of the extent map, for merged em it's the highest
|
||||
* generation of all merged ems.
|
||||
@ -115,7 +101,7 @@ struct extent_map {
|
||||
};
|
||||
|
||||
struct extent_map_tree {
|
||||
struct rb_root_cached map;
|
||||
struct rb_root root;
|
||||
struct list_head modified_extents;
|
||||
rwlock_t lock;
|
||||
};
|
||||
@ -163,6 +149,16 @@ static inline int extent_map_in_tree(const struct extent_map *em)
|
||||
return !RB_EMPTY_NODE(&em->rb_node);
|
||||
}
|
||||
|
||||
static inline u64 extent_map_block_start(const struct extent_map *em)
|
||||
{
|
||||
if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE) {
|
||||
if (extent_map_is_compressed(em))
|
||||
return em->disk_bytenr;
|
||||
return em->disk_bytenr + em->offset;
|
||||
}
|
||||
return em->disk_bytenr;
|
||||
}
|
||||
|
||||
static inline u64 extent_map_end(const struct extent_map *em)
|
||||
{
|
||||
if (em->start + em->len < em->start)
|
||||
|
930
fs/btrfs/fiemap.c
Normal file
930
fs/btrfs/fiemap.c
Normal file
@ -0,0 +1,930 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "backref.h"
|
||||
#include "btrfs_inode.h"
|
||||
#include "fiemap.h"
|
||||
#include "file.h"
|
||||
#include "file-item.h"
|
||||
|
||||
struct btrfs_fiemap_entry {
|
||||
u64 offset;
|
||||
u64 phys;
|
||||
u64 len;
|
||||
u32 flags;
|
||||
};
|
||||
|
||||
/*
|
||||
* Indicate the caller of emit_fiemap_extent() that it needs to unlock the file
|
||||
* range from the inode's io tree, unlock the subvolume tree search path, flush
|
||||
* the fiemap cache and relock the file range and research the subvolume tree.
|
||||
* The value here is something negative that can't be confused with a valid
|
||||
* errno value and different from 1 because that's also a return value from
|
||||
* fiemap_fill_next_extent() and also it's often used to mean some btree search
|
||||
* did not find a key, so make it some distinct negative value.
|
||||
*/
|
||||
#define BTRFS_FIEMAP_FLUSH_CACHE (-(MAX_ERRNO + 1))
|
||||
|
||||
/*
|
||||
* Used to:
|
||||
*
|
||||
* - Cache the next entry to be emitted to the fiemap buffer, so that we can
|
||||
* merge extents that are contiguous and can be grouped as a single one;
|
||||
*
|
||||
* - Store extents ready to be written to the fiemap buffer in an intermediary
|
||||
* buffer. This intermediary buffer is to ensure that in case the fiemap
|
||||
* buffer is memory mapped to the fiemap target file, we don't deadlock
|
||||
* during btrfs_page_mkwrite(). This is because during fiemap we are locking
|
||||
* an extent range in order to prevent races with delalloc flushing and
|
||||
* ordered extent completion, which is needed in order to reliably detect
|
||||
* delalloc in holes and prealloc extents. And this can lead to a deadlock
|
||||
* if the fiemap buffer is memory mapped to the file we are running fiemap
|
||||
* against (a silly, useless in practice scenario, but possible) because
|
||||
* btrfs_page_mkwrite() will try to lock the same extent range.
|
||||
*/
|
||||
struct fiemap_cache {
|
||||
/* An array of ready fiemap entries. */
|
||||
struct btrfs_fiemap_entry *entries;
|
||||
/* Number of entries in the entries array. */
|
||||
int entries_size;
|
||||
/* Index of the next entry in the entries array to write to. */
|
||||
int entries_pos;
|
||||
/*
|
||||
* Once the entries array is full, this indicates what's the offset for
|
||||
* the next file extent item we must search for in the inode's subvolume
|
||||
* tree after unlocking the extent range in the inode's io tree and
|
||||
* releasing the search path.
|
||||
*/
|
||||
u64 next_search_offset;
|
||||
/*
|
||||
* This matches struct fiemap_extent_info::fi_mapped_extents, we use it
|
||||
* to count ourselves emitted extents and stop instead of relying on
|
||||
* fiemap_fill_next_extent() because we buffer ready fiemap entries at
|
||||
* the @entries array, and we want to stop as soon as we hit the max
|
||||
* amount of extents to map, not just to save time but also to make the
|
||||
* logic at extent_fiemap() simpler.
|
||||
*/
|
||||
unsigned int extents_mapped;
|
||||
/* Fields for the cached extent (unsubmitted, not ready, extent). */
|
||||
u64 offset;
|
||||
u64 phys;
|
||||
u64 len;
|
||||
u32 flags;
|
||||
bool cached;
|
||||
};
|
||||
|
||||
static int flush_fiemap_cache(struct fiemap_extent_info *fieinfo,
|
||||
struct fiemap_cache *cache)
|
||||
{
|
||||
for (int i = 0; i < cache->entries_pos; i++) {
|
||||
struct btrfs_fiemap_entry *entry = &cache->entries[i];
|
||||
int ret;
|
||||
|
||||
ret = fiemap_fill_next_extent(fieinfo, entry->offset,
|
||||
entry->phys, entry->len,
|
||||
entry->flags);
|
||||
/*
|
||||
* Ignore 1 (reached max entries) because we keep track of that
|
||||
* ourselves in emit_fiemap_extent().
|
||||
*/
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
cache->entries_pos = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper to submit fiemap extent.
|
||||
*
|
||||
* Will try to merge current fiemap extent specified by @offset, @phys,
|
||||
* @len and @flags with cached one.
|
||||
* And only when we fails to merge, cached one will be submitted as
|
||||
* fiemap extent.
|
||||
*
|
||||
* Return value is the same as fiemap_fill_next_extent().
|
||||
*/
|
||||
static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo,
|
||||
struct fiemap_cache *cache,
|
||||
u64 offset, u64 phys, u64 len, u32 flags)
|
||||
{
|
||||
struct btrfs_fiemap_entry *entry;
|
||||
u64 cache_end;
|
||||
|
||||
/* Set at the end of extent_fiemap(). */
|
||||
ASSERT((flags & FIEMAP_EXTENT_LAST) == 0);
|
||||
|
||||
if (!cache->cached)
|
||||
goto assign;
|
||||
|
||||
/*
|
||||
* When iterating the extents of the inode, at extent_fiemap(), we may
|
||||
* find an extent that starts at an offset behind the end offset of the
|
||||
* previous extent we processed. This happens if fiemap is called
|
||||
* without FIEMAP_FLAG_SYNC and there are ordered extents completing
|
||||
* after we had to unlock the file range, release the search path, emit
|
||||
* the fiemap extents stored in the buffer (cache->entries array) and
|
||||
* the lock the remainder of the range and re-search the btree.
|
||||
*
|
||||
* For example we are in leaf X processing its last item, which is the
|
||||
* file extent item for file range [512K, 1M[, and after
|
||||
* btrfs_next_leaf() releases the path, there's an ordered extent that
|
||||
* completes for the file range [768K, 2M[, and that results in trimming
|
||||
* the file extent item so that it now corresponds to the file range
|
||||
* [512K, 768K[ and a new file extent item is inserted for the file
|
||||
* range [768K, 2M[, which may end up as the last item of leaf X or as
|
||||
* the first item of the next leaf - in either case btrfs_next_leaf()
|
||||
* will leave us with a path pointing to the new extent item, for the
|
||||
* file range [768K, 2M[, since that's the first key that follows the
|
||||
* last one we processed. So in order not to report overlapping extents
|
||||
* to user space, we trim the length of the previously cached extent and
|
||||
* emit it.
|
||||
*
|
||||
* Upon calling btrfs_next_leaf() we may also find an extent with an
|
||||
* offset smaller than or equals to cache->offset, and this happens
|
||||
* when we had a hole or prealloc extent with several delalloc ranges in
|
||||
* it, but after btrfs_next_leaf() released the path, delalloc was
|
||||
* flushed and the resulting ordered extents were completed, so we can
|
||||
* now have found a file extent item for an offset that is smaller than
|
||||
* or equals to what we have in cache->offset. We deal with this as
|
||||
* described below.
|
||||
*/
|
||||
cache_end = cache->offset + cache->len;
|
||||
if (cache_end > offset) {
|
||||
if (offset == cache->offset) {
|
||||
/*
|
||||
* We cached a dealloc range (found in the io tree) for
|
||||
* a hole or prealloc extent and we have now found a
|
||||
* file extent item for the same offset. What we have
|
||||
* now is more recent and up to date, so discard what
|
||||
* we had in the cache and use what we have just found.
|
||||
*/
|
||||
goto assign;
|
||||
} else if (offset > cache->offset) {
|
||||
/*
|
||||
* The extent range we previously found ends after the
|
||||
* offset of the file extent item we found and that
|
||||
* offset falls somewhere in the middle of that previous
|
||||
* extent range. So adjust the range we previously found
|
||||
* to end at the offset of the file extent item we have
|
||||
* just found, since this extent is more up to date.
|
||||
* Emit that adjusted range and cache the file extent
|
||||
* item we have just found. This corresponds to the case
|
||||
* where a previously found file extent item was split
|
||||
* due to an ordered extent completing.
|
||||
*/
|
||||
cache->len = offset - cache->offset;
|
||||
goto emit;
|
||||
} else {
|
||||
const u64 range_end = offset + len;
|
||||
|
||||
/*
|
||||
* The offset of the file extent item we have just found
|
||||
* is behind the cached offset. This means we were
|
||||
* processing a hole or prealloc extent for which we
|
||||
* have found delalloc ranges (in the io tree), so what
|
||||
* we have in the cache is the last delalloc range we
|
||||
* found while the file extent item we found can be
|
||||
* either for a whole delalloc range we previously
|
||||
* emmitted or only a part of that range.
|
||||
*
|
||||
* We have two cases here:
|
||||
*
|
||||
* 1) The file extent item's range ends at or behind the
|
||||
* cached extent's end. In this case just ignore the
|
||||
* current file extent item because we don't want to
|
||||
* overlap with previous ranges that may have been
|
||||
* emmitted already;
|
||||
*
|
||||
* 2) The file extent item starts behind the currently
|
||||
* cached extent but its end offset goes beyond the
|
||||
* end offset of the cached extent. We don't want to
|
||||
* overlap with a previous range that may have been
|
||||
* emmitted already, so we emit the currently cached
|
||||
* extent and then partially store the current file
|
||||
* extent item's range in the cache, for the subrange
|
||||
* going the cached extent's end to the end of the
|
||||
* file extent item.
|
||||
*/
|
||||
if (range_end <= cache_end)
|
||||
return 0;
|
||||
|
||||
if (!(flags & (FIEMAP_EXTENT_ENCODED | FIEMAP_EXTENT_DELALLOC)))
|
||||
phys += cache_end - offset;
|
||||
|
||||
offset = cache_end;
|
||||
len = range_end - cache_end;
|
||||
goto emit;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Only merges fiemap extents if
|
||||
* 1) Their logical addresses are continuous
|
||||
*
|
||||
* 2) Their physical addresses are continuous
|
||||
* So truly compressed (physical size smaller than logical size)
|
||||
* extents won't get merged with each other
|
||||
*
|
||||
* 3) Share same flags
|
||||
*/
|
||||
if (cache->offset + cache->len == offset &&
|
||||
cache->phys + cache->len == phys &&
|
||||
cache->flags == flags) {
|
||||
cache->len += len;
|
||||
return 0;
|
||||
}
|
||||
|
||||
emit:
|
||||
/* Not mergeable, need to submit cached one */
|
||||
|
||||
if (cache->entries_pos == cache->entries_size) {
|
||||
/*
|
||||
* We will need to research for the end offset of the last
|
||||
* stored extent and not from the current offset, because after
|
||||
* unlocking the range and releasing the path, if there's a hole
|
||||
* between that end offset and this current offset, a new extent
|
||||
* may have been inserted due to a new write, so we don't want
|
||||
* to miss it.
|
||||
*/
|
||||
entry = &cache->entries[cache->entries_size - 1];
|
||||
cache->next_search_offset = entry->offset + entry->len;
|
||||
cache->cached = false;
|
||||
|
||||
return BTRFS_FIEMAP_FLUSH_CACHE;
|
||||
}
|
||||
|
||||
entry = &cache->entries[cache->entries_pos];
|
||||
entry->offset = cache->offset;
|
||||
entry->phys = cache->phys;
|
||||
entry->len = cache->len;
|
||||
entry->flags = cache->flags;
|
||||
cache->entries_pos++;
|
||||
cache->extents_mapped++;
|
||||
|
||||
if (cache->extents_mapped == fieinfo->fi_extents_max) {
|
||||
cache->cached = false;
|
||||
return 1;
|
||||
}
|
||||
assign:
|
||||
cache->cached = true;
|
||||
cache->offset = offset;
|
||||
cache->phys = phys;
|
||||
cache->len = len;
|
||||
cache->flags = flags;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Emit last fiemap cache
|
||||
*
|
||||
* The last fiemap cache may still be cached in the following case:
|
||||
* 0 4k 8k
|
||||
* |<- Fiemap range ->|
|
||||
* |<------------ First extent ----------->|
|
||||
*
|
||||
* In this case, the first extent range will be cached but not emitted.
|
||||
* So we must emit it before ending extent_fiemap().
|
||||
*/
|
||||
static int emit_last_fiemap_cache(struct fiemap_extent_info *fieinfo,
|
||||
struct fiemap_cache *cache)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if (!cache->cached)
|
||||
return 0;
|
||||
|
||||
ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys,
|
||||
cache->len, cache->flags);
|
||||
cache->cached = false;
|
||||
if (ret > 0)
|
||||
ret = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int fiemap_next_leaf_item(struct btrfs_inode *inode, struct btrfs_path *path)
|
||||
{
|
||||
struct extent_buffer *clone = path->nodes[0];
|
||||
struct btrfs_key key;
|
||||
int slot;
|
||||
int ret;
|
||||
|
||||
path->slots[0]++;
|
||||
if (path->slots[0] < btrfs_header_nritems(path->nodes[0]))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Add a temporary extra ref to an already cloned extent buffer to
|
||||
* prevent btrfs_next_leaf() freeing it, we want to reuse it to avoid
|
||||
* the cost of allocating a new one.
|
||||
*/
|
||||
ASSERT(test_bit(EXTENT_BUFFER_UNMAPPED, &clone->bflags));
|
||||
atomic_inc(&clone->refs);
|
||||
|
||||
ret = btrfs_next_leaf(inode->root, path);
|
||||
if (ret != 0)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Don't bother with cloning if there are no more file extent items for
|
||||
* our inode.
|
||||
*/
|
||||
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
|
||||
if (key.objectid != btrfs_ino(inode) || key.type != BTRFS_EXTENT_DATA_KEY) {
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Important to preserve the start field, for the optimizations when
|
||||
* checking if extents are shared (see extent_fiemap()).
|
||||
*
|
||||
* We must set ->start before calling copy_extent_buffer_full(). If we
|
||||
* are on sub-pagesize blocksize, we use ->start to determine the offset
|
||||
* into the folio where our eb exists, and if we update ->start after
|
||||
* the fact then any subsequent reads of the eb may read from a
|
||||
* different offset in the folio than where we originally copied into.
|
||||
*/
|
||||
clone->start = path->nodes[0]->start;
|
||||
/* See the comment at fiemap_search_slot() about why we clone. */
|
||||
copy_extent_buffer_full(clone, path->nodes[0]);
|
||||
|
||||
slot = path->slots[0];
|
||||
btrfs_release_path(path);
|
||||
path->nodes[0] = clone;
|
||||
path->slots[0] = slot;
|
||||
out:
|
||||
if (ret)
|
||||
free_extent_buffer(clone);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Search for the first file extent item that starts at a given file offset or
|
||||
* the one that starts immediately before that offset.
|
||||
* Returns: 0 on success, < 0 on error, 1 if not found.
|
||||
*/
|
||||
static int fiemap_search_slot(struct btrfs_inode *inode, struct btrfs_path *path,
|
||||
u64 file_offset)
|
||||
{
|
||||
const u64 ino = btrfs_ino(inode);
|
||||
struct btrfs_root *root = inode->root;
|
||||
struct extent_buffer *clone;
|
||||
struct btrfs_key key;
|
||||
int slot;
|
||||
int ret;
|
||||
|
||||
key.objectid = ino;
|
||||
key.type = BTRFS_EXTENT_DATA_KEY;
|
||||
key.offset = file_offset;
|
||||
|
||||
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (ret > 0 && path->slots[0] > 0) {
|
||||
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1);
|
||||
if (key.objectid == ino && key.type == BTRFS_EXTENT_DATA_KEY)
|
||||
path->slots[0]--;
|
||||
}
|
||||
|
||||
if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
|
||||
ret = btrfs_next_leaf(root, path);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
|
||||
if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* We clone the leaf and use it during fiemap. This is because while
|
||||
* using the leaf we do expensive things like checking if an extent is
|
||||
* shared, which can take a long time. In order to prevent blocking
|
||||
* other tasks for too long, we use a clone of the leaf. We have locked
|
||||
* the file range in the inode's io tree, so we know none of our file
|
||||
* extent items can change. This way we avoid blocking other tasks that
|
||||
* want to insert items for other inodes in the same leaf or b+tree
|
||||
* rebalance operations (triggered for example when someone is trying
|
||||
* to push items into this leaf when trying to insert an item in a
|
||||
* neighbour leaf).
|
||||
* We also need the private clone because holding a read lock on an
|
||||
* extent buffer of the subvolume's b+tree will make lockdep unhappy
|
||||
* when we check if extents are shared, as backref walking may need to
|
||||
* lock the same leaf we are processing.
|
||||
*/
|
||||
clone = btrfs_clone_extent_buffer(path->nodes[0]);
|
||||
if (!clone)
|
||||
return -ENOMEM;
|
||||
|
||||
slot = path->slots[0];
|
||||
btrfs_release_path(path);
|
||||
path->nodes[0] = clone;
|
||||
path->slots[0] = slot;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Process a range which is a hole or a prealloc extent in the inode's subvolume
|
||||
* btree. If @disk_bytenr is 0, we are dealing with a hole, otherwise a prealloc
|
||||
* extent. The end offset (@end) is inclusive.
|
||||
*/
|
||||
static int fiemap_process_hole(struct btrfs_inode *inode,
|
||||
struct fiemap_extent_info *fieinfo,
|
||||
struct fiemap_cache *cache,
|
||||
struct extent_state **delalloc_cached_state,
|
||||
struct btrfs_backref_share_check_ctx *backref_ctx,
|
||||
u64 disk_bytenr, u64 extent_offset,
|
||||
u64 extent_gen,
|
||||
u64 start, u64 end)
|
||||
{
|
||||
const u64 i_size = i_size_read(&inode->vfs_inode);
|
||||
u64 cur_offset = start;
|
||||
u64 last_delalloc_end = 0;
|
||||
u32 prealloc_flags = FIEMAP_EXTENT_UNWRITTEN;
|
||||
bool checked_extent_shared = false;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* There can be no delalloc past i_size, so don't waste time looking for
|
||||
* it beyond i_size.
|
||||
*/
|
||||
while (cur_offset < end && cur_offset < i_size) {
|
||||
u64 delalloc_start;
|
||||
u64 delalloc_end;
|
||||
u64 prealloc_start;
|
||||
u64 prealloc_len = 0;
|
||||
bool delalloc;
|
||||
|
||||
delalloc = btrfs_find_delalloc_in_range(inode, cur_offset, end,
|
||||
delalloc_cached_state,
|
||||
&delalloc_start,
|
||||
&delalloc_end);
|
||||
if (!delalloc)
|
||||
break;
|
||||
|
||||
/*
|
||||
* If this is a prealloc extent we have to report every section
|
||||
* of it that has no delalloc.
|
||||
*/
|
||||
if (disk_bytenr != 0) {
|
||||
if (last_delalloc_end == 0) {
|
||||
prealloc_start = start;
|
||||
prealloc_len = delalloc_start - start;
|
||||
} else {
|
||||
prealloc_start = last_delalloc_end + 1;
|
||||
prealloc_len = delalloc_start - prealloc_start;
|
||||
}
|
||||
}
|
||||
|
||||
if (prealloc_len > 0) {
|
||||
if (!checked_extent_shared && fieinfo->fi_extents_max) {
|
||||
ret = btrfs_is_data_extent_shared(inode,
|
||||
disk_bytenr,
|
||||
extent_gen,
|
||||
backref_ctx);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
else if (ret > 0)
|
||||
prealloc_flags |= FIEMAP_EXTENT_SHARED;
|
||||
|
||||
checked_extent_shared = true;
|
||||
}
|
||||
ret = emit_fiemap_extent(fieinfo, cache, prealloc_start,
|
||||
disk_bytenr + extent_offset,
|
||||
prealloc_len, prealloc_flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
extent_offset += prealloc_len;
|
||||
}
|
||||
|
||||
ret = emit_fiemap_extent(fieinfo, cache, delalloc_start, 0,
|
||||
delalloc_end + 1 - delalloc_start,
|
||||
FIEMAP_EXTENT_DELALLOC |
|
||||
FIEMAP_EXTENT_UNKNOWN);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
last_delalloc_end = delalloc_end;
|
||||
cur_offset = delalloc_end + 1;
|
||||
extent_offset += cur_offset - delalloc_start;
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
/*
|
||||
* Either we found no delalloc for the whole prealloc extent or we have
|
||||
* a prealloc extent that spans i_size or starts at or after i_size.
|
||||
*/
|
||||
if (disk_bytenr != 0 && last_delalloc_end < end) {
|
||||
u64 prealloc_start;
|
||||
u64 prealloc_len;
|
||||
|
||||
if (last_delalloc_end == 0) {
|
||||
prealloc_start = start;
|
||||
prealloc_len = end + 1 - start;
|
||||
} else {
|
||||
prealloc_start = last_delalloc_end + 1;
|
||||
prealloc_len = end + 1 - prealloc_start;
|
||||
}
|
||||
|
||||
if (!checked_extent_shared && fieinfo->fi_extents_max) {
|
||||
ret = btrfs_is_data_extent_shared(inode,
|
||||
disk_bytenr,
|
||||
extent_gen,
|
||||
backref_ctx);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
else if (ret > 0)
|
||||
prealloc_flags |= FIEMAP_EXTENT_SHARED;
|
||||
}
|
||||
ret = emit_fiemap_extent(fieinfo, cache, prealloc_start,
|
||||
disk_bytenr + extent_offset,
|
||||
prealloc_len, prealloc_flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int fiemap_find_last_extent_offset(struct btrfs_inode *inode,
|
||||
struct btrfs_path *path,
|
||||
u64 *last_extent_end_ret)
|
||||
{
|
||||
const u64 ino = btrfs_ino(inode);
|
||||
struct btrfs_root *root = inode->root;
|
||||
struct extent_buffer *leaf;
|
||||
struct btrfs_file_extent_item *ei;
|
||||
struct btrfs_key key;
|
||||
u64 disk_bytenr;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Lookup the last file extent. We're not using i_size here because
|
||||
* there might be preallocation past i_size.
|
||||
*/
|
||||
ret = btrfs_lookup_file_extent(NULL, root, path, ino, (u64)-1, 0);
|
||||
/* There can't be a file extent item at offset (u64)-1 */
|
||||
ASSERT(ret != 0);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* For a non-existing key, btrfs_search_slot() always leaves us at a
|
||||
* slot > 0, except if the btree is empty, which is impossible because
|
||||
* at least it has the inode item for this inode and all the items for
|
||||
* the root inode 256.
|
||||
*/
|
||||
ASSERT(path->slots[0] > 0);
|
||||
path->slots[0]--;
|
||||
leaf = path->nodes[0];
|
||||
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
|
||||
if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) {
|
||||
/* No file extent items in the subvolume tree. */
|
||||
*last_extent_end_ret = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* For an inline extent, the disk_bytenr is where inline data starts at,
|
||||
* so first check if we have an inline extent item before checking if we
|
||||
* have an implicit hole (disk_bytenr == 0).
|
||||
*/
|
||||
ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item);
|
||||
if (btrfs_file_extent_type(leaf, ei) == BTRFS_FILE_EXTENT_INLINE) {
|
||||
*last_extent_end_ret = btrfs_file_extent_end(path);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the last file extent item that is not a hole (when NO_HOLES is
|
||||
* not enabled). This should take at most 2 iterations in the worst
|
||||
* case: we have one hole file extent item at slot 0 of a leaf and
|
||||
* another hole file extent item as the last item in the previous leaf.
|
||||
* This is because we merge file extent items that represent holes.
|
||||
*/
|
||||
disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, ei);
|
||||
while (disk_bytenr == 0) {
|
||||
ret = btrfs_previous_item(root, path, ino, BTRFS_EXTENT_DATA_KEY);
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
} else if (ret > 0) {
|
||||
/* No file extent items that are not holes. */
|
||||
*last_extent_end_ret = 0;
|
||||
return 0;
|
||||
}
|
||||
leaf = path->nodes[0];
|
||||
ei = btrfs_item_ptr(leaf, path->slots[0],
|
||||
struct btrfs_file_extent_item);
|
||||
disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, ei);
|
||||
}
|
||||
|
||||
*last_extent_end_ret = btrfs_file_extent_end(path);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int extent_fiemap(struct btrfs_inode *inode,
|
||||
struct fiemap_extent_info *fieinfo,
|
||||
u64 start, u64 len)
|
||||
{
|
||||
const u64 ino = btrfs_ino(inode);
|
||||
struct extent_state *cached_state = NULL;
|
||||
struct extent_state *delalloc_cached_state = NULL;
|
||||
struct btrfs_path *path;
|
||||
struct fiemap_cache cache = { 0 };
|
||||
struct btrfs_backref_share_check_ctx *backref_ctx;
|
||||
u64 last_extent_end;
|
||||
u64 prev_extent_end;
|
||||
u64 range_start;
|
||||
u64 range_end;
|
||||
const u64 sectorsize = inode->root->fs_info->sectorsize;
|
||||
bool stopped = false;
|
||||
int ret;
|
||||
|
||||
cache.entries_size = PAGE_SIZE / sizeof(struct btrfs_fiemap_entry);
|
||||
cache.entries = kmalloc_array(cache.entries_size,
|
||||
sizeof(struct btrfs_fiemap_entry),
|
||||
GFP_KERNEL);
|
||||
backref_ctx = btrfs_alloc_backref_share_check_ctx();
|
||||
path = btrfs_alloc_path();
|
||||
if (!cache.entries || !backref_ctx || !path) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
restart:
|
||||
range_start = round_down(start, sectorsize);
|
||||
range_end = round_up(start + len, sectorsize);
|
||||
prev_extent_end = range_start;
|
||||
|
||||
lock_extent(&inode->io_tree, range_start, range_end, &cached_state);
|
||||
|
||||
ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end);
|
||||
if (ret < 0)
|
||||
goto out_unlock;
|
||||
btrfs_release_path(path);
|
||||
|
||||
path->reada = READA_FORWARD;
|
||||
ret = fiemap_search_slot(inode, path, range_start);
|
||||
if (ret < 0) {
|
||||
goto out_unlock;
|
||||
} else if (ret > 0) {
|
||||
/*
|
||||
* No file extent item found, but we may have delalloc between
|
||||
* the current offset and i_size. So check for that.
|
||||
*/
|
||||
ret = 0;
|
||||
goto check_eof_delalloc;
|
||||
}
|
||||
|
||||
while (prev_extent_end < range_end) {
|
||||
struct extent_buffer *leaf = path->nodes[0];
|
||||
struct btrfs_file_extent_item *ei;
|
||||
struct btrfs_key key;
|
||||
u64 extent_end;
|
||||
u64 extent_len;
|
||||
u64 extent_offset = 0;
|
||||
u64 extent_gen;
|
||||
u64 disk_bytenr = 0;
|
||||
u64 flags = 0;
|
||||
int extent_type;
|
||||
u8 compression;
|
||||
|
||||
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
|
||||
if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY)
|
||||
break;
|
||||
|
||||
extent_end = btrfs_file_extent_end(path);
|
||||
|
||||
/*
|
||||
* The first iteration can leave us at an extent item that ends
|
||||
* before our range's start. Move to the next item.
|
||||
*/
|
||||
if (extent_end <= range_start)
|
||||
goto next_item;
|
||||
|
||||
backref_ctx->curr_leaf_bytenr = leaf->start;
|
||||
|
||||
/* We have in implicit hole (NO_HOLES feature enabled). */
|
||||
if (prev_extent_end < key.offset) {
|
||||
const u64 hole_end = min(key.offset, range_end) - 1;
|
||||
|
||||
ret = fiemap_process_hole(inode, fieinfo, &cache,
|
||||
&delalloc_cached_state,
|
||||
backref_ctx, 0, 0, 0,
|
||||
prev_extent_end, hole_end);
|
||||
if (ret < 0) {
|
||||
goto out_unlock;
|
||||
} else if (ret > 0) {
|
||||
/* fiemap_fill_next_extent() told us to stop. */
|
||||
stopped = true;
|
||||
break;
|
||||
}
|
||||
|
||||
/* We've reached the end of the fiemap range, stop. */
|
||||
if (key.offset >= range_end) {
|
||||
stopped = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
extent_len = extent_end - key.offset;
|
||||
ei = btrfs_item_ptr(leaf, path->slots[0],
|
||||
struct btrfs_file_extent_item);
|
||||
compression = btrfs_file_extent_compression(leaf, ei);
|
||||
extent_type = btrfs_file_extent_type(leaf, ei);
|
||||
extent_gen = btrfs_file_extent_generation(leaf, ei);
|
||||
|
||||
if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
|
||||
disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, ei);
|
||||
if (compression == BTRFS_COMPRESS_NONE)
|
||||
extent_offset = btrfs_file_extent_offset(leaf, ei);
|
||||
}
|
||||
|
||||
if (compression != BTRFS_COMPRESS_NONE)
|
||||
flags |= FIEMAP_EXTENT_ENCODED;
|
||||
|
||||
if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
|
||||
flags |= FIEMAP_EXTENT_DATA_INLINE;
|
||||
flags |= FIEMAP_EXTENT_NOT_ALIGNED;
|
||||
ret = emit_fiemap_extent(fieinfo, &cache, key.offset, 0,
|
||||
extent_len, flags);
|
||||
} else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
|
||||
ret = fiemap_process_hole(inode, fieinfo, &cache,
|
||||
&delalloc_cached_state,
|
||||
backref_ctx,
|
||||
disk_bytenr, extent_offset,
|
||||
extent_gen, key.offset,
|
||||
extent_end - 1);
|
||||
} else if (disk_bytenr == 0) {
|
||||
/* We have an explicit hole. */
|
||||
ret = fiemap_process_hole(inode, fieinfo, &cache,
|
||||
&delalloc_cached_state,
|
||||
backref_ctx, 0, 0, 0,
|
||||
key.offset, extent_end - 1);
|
||||
} else {
|
||||
/* We have a regular extent. */
|
||||
if (fieinfo->fi_extents_max) {
|
||||
ret = btrfs_is_data_extent_shared(inode,
|
||||
disk_bytenr,
|
||||
extent_gen,
|
||||
backref_ctx);
|
||||
if (ret < 0)
|
||||
goto out_unlock;
|
||||
else if (ret > 0)
|
||||
flags |= FIEMAP_EXTENT_SHARED;
|
||||
}
|
||||
|
||||
ret = emit_fiemap_extent(fieinfo, &cache, key.offset,
|
||||
disk_bytenr + extent_offset,
|
||||
extent_len, flags);
|
||||
}
|
||||
|
||||
if (ret < 0) {
|
||||
goto out_unlock;
|
||||
} else if (ret > 0) {
|
||||
/* emit_fiemap_extent() told us to stop. */
|
||||
stopped = true;
|
||||
break;
|
||||
}
|
||||
|
||||
prev_extent_end = extent_end;
|
||||
next_item:
|
||||
if (fatal_signal_pending(current)) {
|
||||
ret = -EINTR;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
ret = fiemap_next_leaf_item(inode, path);
|
||||
if (ret < 0) {
|
||||
goto out_unlock;
|
||||
} else if (ret > 0) {
|
||||
/* No more file extent items for this inode. */
|
||||
break;
|
||||
}
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
check_eof_delalloc:
|
||||
if (!stopped && prev_extent_end < range_end) {
|
||||
ret = fiemap_process_hole(inode, fieinfo, &cache,
|
||||
&delalloc_cached_state, backref_ctx,
|
||||
0, 0, 0, prev_extent_end, range_end - 1);
|
||||
if (ret < 0)
|
||||
goto out_unlock;
|
||||
prev_extent_end = range_end;
|
||||
}
|
||||
|
||||
if (cache.cached && cache.offset + cache.len >= last_extent_end) {
|
||||
const u64 i_size = i_size_read(&inode->vfs_inode);
|
||||
|
||||
if (prev_extent_end < i_size) {
|
||||
u64 delalloc_start;
|
||||
u64 delalloc_end;
|
||||
bool delalloc;
|
||||
|
||||
delalloc = btrfs_find_delalloc_in_range(inode,
|
||||
prev_extent_end,
|
||||
i_size - 1,
|
||||
&delalloc_cached_state,
|
||||
&delalloc_start,
|
||||
&delalloc_end);
|
||||
if (!delalloc)
|
||||
cache.flags |= FIEMAP_EXTENT_LAST;
|
||||
} else {
|
||||
cache.flags |= FIEMAP_EXTENT_LAST;
|
||||
}
|
||||
}
|
||||
|
||||
out_unlock:
|
||||
unlock_extent(&inode->io_tree, range_start, range_end, &cached_state);
|
||||
|
||||
if (ret == BTRFS_FIEMAP_FLUSH_CACHE) {
|
||||
btrfs_release_path(path);
|
||||
ret = flush_fiemap_cache(fieinfo, &cache);
|
||||
if (ret)
|
||||
goto out;
|
||||
len -= cache.next_search_offset - start;
|
||||
start = cache.next_search_offset;
|
||||
goto restart;
|
||||
} else if (ret < 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Must free the path before emitting to the fiemap buffer because we
|
||||
* may have a non-cloned leaf and if the fiemap buffer is memory mapped
|
||||
* to a file, a write into it (through btrfs_page_mkwrite()) may trigger
|
||||
* waiting for an ordered extent that in order to complete needs to
|
||||
* modify that leaf, therefore leading to a deadlock.
|
||||
*/
|
||||
btrfs_free_path(path);
|
||||
path = NULL;
|
||||
|
||||
ret = flush_fiemap_cache(fieinfo, &cache);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = emit_last_fiemap_cache(fieinfo, &cache);
|
||||
out:
|
||||
free_extent_state(delalloc_cached_state);
|
||||
kfree(cache.entries);
|
||||
btrfs_free_backref_share_ctx(backref_ctx);
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
||||
u64 start, u64 len)
|
||||
{
|
||||
struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
|
||||
int ret;
|
||||
|
||||
ret = fiemap_prep(inode, fieinfo, start, &len, 0);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* fiemap_prep() called filemap_write_and_wait() for the whole possible
|
||||
* file range (0 to LLONG_MAX), but that is not enough if we have
|
||||
* compression enabled. The first filemap_fdatawrite_range() only kicks
|
||||
* in the compression of data (in an async thread) and will return
|
||||
* before the compression is done and writeback is started. A second
|
||||
* filemap_fdatawrite_range() is needed to wait for the compression to
|
||||
* complete and writeback to start. We also need to wait for ordered
|
||||
* extents to complete, because our fiemap implementation uses mainly
|
||||
* file extent items to list the extents, searching for extent maps
|
||||
* only for file ranges with holes or prealloc extents to figure out
|
||||
* if we have delalloc in those ranges.
|
||||
*/
|
||||
if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) {
|
||||
ret = btrfs_wait_ordered_range(btrfs_inode, 0, LLONG_MAX);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
btrfs_inode_lock(btrfs_inode, BTRFS_ILOCK_SHARED);
|
||||
|
||||
/*
|
||||
* We did an initial flush to avoid holding the inode's lock while
|
||||
* triggering writeback and waiting for the completion of IO and ordered
|
||||
* extents. Now after we locked the inode we do it again, because it's
|
||||
* possible a new write may have happened in between those two steps.
|
||||
*/
|
||||
if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) {
|
||||
ret = btrfs_wait_ordered_range(btrfs_inode, 0, LLONG_MAX);
|
||||
if (ret) {
|
||||
btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
ret = extent_fiemap(btrfs_inode, fieinfo, start, len);
|
||||
btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED);
|
||||
|
||||
return ret;
|
||||
}
|
11
fs/btrfs/fiemap.h
Normal file
11
fs/btrfs/fiemap.h
Normal file
@ -0,0 +1,11 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#ifndef BTRFS_FIEMAP_H
|
||||
#define BTRFS_FIEMAP_H
|
||||
|
||||
#include <linux/fiemap.h>
|
||||
|
||||
int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
||||
u64 start, u64 len);
|
||||
|
||||
#endif /* BTRFS_FIEMAP_H */
|
@ -45,13 +45,12 @@
|
||||
*/
|
||||
void btrfs_inode_safe_disk_i_size_write(struct btrfs_inode *inode, u64 new_i_size)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
u64 start, end, i_size;
|
||||
int ret;
|
||||
|
||||
spin_lock(&inode->lock);
|
||||
i_size = new_i_size ?: i_size_read(&inode->vfs_inode);
|
||||
if (btrfs_fs_incompat(fs_info, NO_HOLES)) {
|
||||
if (!inode->file_extent_tree) {
|
||||
inode->disk_i_size = i_size;
|
||||
goto out_unlock;
|
||||
}
|
||||
@ -84,13 +83,14 @@ out_unlock:
|
||||
int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start,
|
||||
u64 len)
|
||||
{
|
||||
if (!inode->file_extent_tree)
|
||||
return 0;
|
||||
|
||||
if (len == 0)
|
||||
return 0;
|
||||
|
||||
ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize));
|
||||
|
||||
if (btrfs_fs_incompat(inode->root->fs_info, NO_HOLES))
|
||||
return 0;
|
||||
return set_extent_bit(inode->file_extent_tree, start, start + len - 1,
|
||||
EXTENT_DIRTY, NULL);
|
||||
}
|
||||
@ -112,14 +112,15 @@ int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start,
|
||||
int btrfs_inode_clear_file_extent_range(struct btrfs_inode *inode, u64 start,
|
||||
u64 len)
|
||||
{
|
||||
if (!inode->file_extent_tree)
|
||||
return 0;
|
||||
|
||||
if (len == 0)
|
||||
return 0;
|
||||
|
||||
ASSERT(IS_ALIGNED(start + len, inode->root->fs_info->sectorsize) ||
|
||||
len == (u64)-1);
|
||||
|
||||
if (btrfs_fs_incompat(inode->root->fs_info, NO_HOLES))
|
||||
return 0;
|
||||
return clear_extent_bit(inode->file_extent_tree, start,
|
||||
start + len - 1, EXTENT_DIRTY, NULL);
|
||||
}
|
||||
@ -352,7 +353,7 @@ blk_status_t btrfs_lookup_bio_sums(struct btrfs_bio *bbio)
|
||||
u32 bio_offset = 0;
|
||||
|
||||
if ((inode->flags & BTRFS_INODE_NODATASUM) ||
|
||||
test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state))
|
||||
test_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state))
|
||||
return BLK_STS_OK;
|
||||
|
||||
/*
|
||||
@ -1280,7 +1281,6 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
|
||||
const int slot = path->slots[0];
|
||||
struct btrfs_key key;
|
||||
u64 extent_start;
|
||||
u64 bytenr;
|
||||
u8 type = btrfs_file_extent_type(leaf, fi);
|
||||
int compress_type = btrfs_file_extent_compression(leaf, fi);
|
||||
|
||||
@ -1290,24 +1290,29 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
|
||||
em->generation = btrfs_file_extent_generation(leaf, fi);
|
||||
if (type == BTRFS_FILE_EXTENT_REG ||
|
||||
type == BTRFS_FILE_EXTENT_PREALLOC) {
|
||||
const u64 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
|
||||
|
||||
em->start = extent_start;
|
||||
em->len = btrfs_file_extent_end(path) - extent_start;
|
||||
em->orig_start = extent_start -
|
||||
btrfs_file_extent_offset(leaf, fi);
|
||||
em->orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, fi);
|
||||
bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
|
||||
if (bytenr == 0) {
|
||||
em->block_start = EXTENT_MAP_HOLE;
|
||||
if (disk_bytenr == 0) {
|
||||
em->disk_bytenr = EXTENT_MAP_HOLE;
|
||||
em->disk_num_bytes = 0;
|
||||
em->offset = 0;
|
||||
return;
|
||||
}
|
||||
em->disk_bytenr = disk_bytenr;
|
||||
em->disk_num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
|
||||
em->offset = btrfs_file_extent_offset(leaf, fi);
|
||||
if (compress_type != BTRFS_COMPRESS_NONE) {
|
||||
extent_map_set_compression(em, compress_type);
|
||||
em->block_start = bytenr;
|
||||
em->block_len = em->orig_block_len;
|
||||
} else {
|
||||
bytenr += btrfs_file_extent_offset(leaf, fi);
|
||||
em->block_start = bytenr;
|
||||
em->block_len = em->len;
|
||||
/*
|
||||
* Older kernels can create regular non-hole data
|
||||
* extents with ram_bytes smaller than disk_num_bytes.
|
||||
* Not a big deal, just always use disk_num_bytes
|
||||
* for ram_bytes.
|
||||
*/
|
||||
em->ram_bytes = em->disk_num_bytes;
|
||||
if (type == BTRFS_FILE_EXTENT_PREALLOC)
|
||||
em->flags |= EXTENT_FLAG_PREALLOC;
|
||||
}
|
||||
@ -1315,15 +1320,10 @@ void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
|
||||
/* Tree-checker has ensured this. */
|
||||
ASSERT(extent_start == 0);
|
||||
|
||||
em->block_start = EXTENT_MAP_INLINE;
|
||||
em->disk_bytenr = EXTENT_MAP_INLINE;
|
||||
em->start = 0;
|
||||
em->len = fs_info->sectorsize;
|
||||
/*
|
||||
* Initialize orig_start and block_len with the same values
|
||||
* as in inode.c:btrfs_get_extent().
|
||||
*/
|
||||
em->orig_start = EXTENT_MAP_HOLE;
|
||||
em->block_len = (u64)-1;
|
||||
em->offset = 0;
|
||||
extent_map_set_compression(em, compress_type);
|
||||
} else {
|
||||
btrfs_err(fs_info,
|
||||
|
355
fs/btrfs/file.c
355
fs/btrfs/file.c
@ -17,8 +17,8 @@
|
||||
#include <linux/uio.h>
|
||||
#include <linux/iversion.h>
|
||||
#include <linux/fsverity.h>
|
||||
#include <linux/iomap.h>
|
||||
#include "ctree.h"
|
||||
#include "direct-io.h"
|
||||
#include "disk-io.h"
|
||||
#include "transaction.h"
|
||||
#include "btrfs_inode.h"
|
||||
@ -1104,7 +1104,7 @@ int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
|
||||
&cached_state);
|
||||
}
|
||||
ret = can_nocow_extent(&inode->vfs_inode, lockstart, &num_bytes,
|
||||
NULL, NULL, NULL, nowait, false);
|
||||
NULL, nowait, false);
|
||||
if (ret <= 0)
|
||||
btrfs_drew_write_unlock(&root->snapshot_lock);
|
||||
else
|
||||
@ -1140,8 +1140,7 @@ static void update_time_for_write(struct inode *inode)
|
||||
inode_inc_iversion(inode);
|
||||
}
|
||||
|
||||
static int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from,
|
||||
size_t count)
|
||||
int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from, size_t count)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct inode *inode = file_inode(file);
|
||||
@ -1187,8 +1186,7 @@ static int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
|
||||
struct iov_iter *i)
|
||||
ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
loff_t pos;
|
||||
@ -1451,194 +1449,6 @@ out:
|
||||
return num_written ? num_written : ret;
|
||||
}
|
||||
|
||||
static ssize_t check_direct_IO(struct btrfs_fs_info *fs_info,
|
||||
const struct iov_iter *iter, loff_t offset)
|
||||
{
|
||||
const u32 blocksize_mask = fs_info->sectorsize - 1;
|
||||
|
||||
if (offset & blocksize_mask)
|
||||
return -EINVAL;
|
||||
|
||||
if (iov_iter_alignment(iter) & blocksize_mask)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct inode *inode = file_inode(file);
|
||||
struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
|
||||
loff_t pos;
|
||||
ssize_t written = 0;
|
||||
ssize_t written_buffered;
|
||||
size_t prev_left = 0;
|
||||
loff_t endbyte;
|
||||
ssize_t ret;
|
||||
unsigned int ilock_flags = 0;
|
||||
struct iomap_dio *dio;
|
||||
|
||||
if (iocb->ki_flags & IOCB_NOWAIT)
|
||||
ilock_flags |= BTRFS_ILOCK_TRY;
|
||||
|
||||
/*
|
||||
* If the write DIO is within EOF, use a shared lock and also only if
|
||||
* security bits will likely not be dropped by file_remove_privs() called
|
||||
* from btrfs_write_check(). Either will need to be rechecked after the
|
||||
* lock was acquired.
|
||||
*/
|
||||
if (iocb->ki_pos + iov_iter_count(from) <= i_size_read(inode) && IS_NOSEC(inode))
|
||||
ilock_flags |= BTRFS_ILOCK_SHARED;
|
||||
|
||||
relock:
|
||||
ret = btrfs_inode_lock(BTRFS_I(inode), ilock_flags);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
/* Shared lock cannot be used with security bits set. */
|
||||
if ((ilock_flags & BTRFS_ILOCK_SHARED) && !IS_NOSEC(inode)) {
|
||||
btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
|
||||
ilock_flags &= ~BTRFS_ILOCK_SHARED;
|
||||
goto relock;
|
||||
}
|
||||
|
||||
ret = generic_write_checks(iocb, from);
|
||||
if (ret <= 0) {
|
||||
btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = btrfs_write_check(iocb, from, ret);
|
||||
if (ret < 0) {
|
||||
btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
|
||||
goto out;
|
||||
}
|
||||
|
||||
pos = iocb->ki_pos;
|
||||
/*
|
||||
* Re-check since file size may have changed just before taking the
|
||||
* lock or pos may have changed because of O_APPEND in generic_write_check()
|
||||
*/
|
||||
if ((ilock_flags & BTRFS_ILOCK_SHARED) &&
|
||||
pos + iov_iter_count(from) > i_size_read(inode)) {
|
||||
btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
|
||||
ilock_flags &= ~BTRFS_ILOCK_SHARED;
|
||||
goto relock;
|
||||
}
|
||||
|
||||
if (check_direct_IO(fs_info, from, pos)) {
|
||||
btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
|
||||
goto buffered;
|
||||
}
|
||||
|
||||
/*
|
||||
* The iov_iter can be mapped to the same file range we are writing to.
|
||||
* If that's the case, then we will deadlock in the iomap code, because
|
||||
* it first calls our callback btrfs_dio_iomap_begin(), which will create
|
||||
* an ordered extent, and after that it will fault in the pages that the
|
||||
* iov_iter refers to. During the fault in we end up in the readahead
|
||||
* pages code (starting at btrfs_readahead()), which will lock the range,
|
||||
* find that ordered extent and then wait for it to complete (at
|
||||
* btrfs_lock_and_flush_ordered_range()), resulting in a deadlock since
|
||||
* obviously the ordered extent can never complete as we didn't submit
|
||||
* yet the respective bio(s). This always happens when the buffer is
|
||||
* memory mapped to the same file range, since the iomap DIO code always
|
||||
* invalidates pages in the target file range (after starting and waiting
|
||||
* for any writeback).
|
||||
*
|
||||
* So here we disable page faults in the iov_iter and then retry if we
|
||||
* got -EFAULT, faulting in the pages before the retry.
|
||||
*/
|
||||
from->nofault = true;
|
||||
dio = btrfs_dio_write(iocb, from, written);
|
||||
from->nofault = false;
|
||||
|
||||
/*
|
||||
* iomap_dio_complete() will call btrfs_sync_file() if we have a dsync
|
||||
* iocb, and that needs to lock the inode. So unlock it before calling
|
||||
* iomap_dio_complete() to avoid a deadlock.
|
||||
*/
|
||||
btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
|
||||
|
||||
if (IS_ERR_OR_NULL(dio))
|
||||
ret = PTR_ERR_OR_ZERO(dio);
|
||||
else
|
||||
ret = iomap_dio_complete(dio);
|
||||
|
||||
/* No increment (+=) because iomap returns a cumulative value. */
|
||||
if (ret > 0)
|
||||
written = ret;
|
||||
|
||||
if (iov_iter_count(from) > 0 && (ret == -EFAULT || ret > 0)) {
|
||||
const size_t left = iov_iter_count(from);
|
||||
/*
|
||||
* We have more data left to write. Try to fault in as many as
|
||||
* possible of the remainder pages and retry. We do this without
|
||||
* releasing and locking again the inode, to prevent races with
|
||||
* truncate.
|
||||
*
|
||||
* Also, in case the iov refers to pages in the file range of the
|
||||
* file we want to write to (due to a mmap), we could enter an
|
||||
* infinite loop if we retry after faulting the pages in, since
|
||||
* iomap will invalidate any pages in the range early on, before
|
||||
* it tries to fault in the pages of the iov. So we keep track of
|
||||
* how much was left of iov in the previous EFAULT and fallback
|
||||
* to buffered IO in case we haven't made any progress.
|
||||
*/
|
||||
if (left == prev_left) {
|
||||
ret = -ENOTBLK;
|
||||
} else {
|
||||
fault_in_iov_iter_readable(from, left);
|
||||
prev_left = left;
|
||||
goto relock;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If 'ret' is -ENOTBLK or we have not written all data, then it means
|
||||
* we must fallback to buffered IO.
|
||||
*/
|
||||
if ((ret < 0 && ret != -ENOTBLK) || !iov_iter_count(from))
|
||||
goto out;
|
||||
|
||||
buffered:
|
||||
/*
|
||||
* If we are in a NOWAIT context, then return -EAGAIN to signal the caller
|
||||
* it must retry the operation in a context where blocking is acceptable,
|
||||
* because even if we end up not blocking during the buffered IO attempt
|
||||
* below, we will block when flushing and waiting for the IO.
|
||||
*/
|
||||
if (iocb->ki_flags & IOCB_NOWAIT) {
|
||||
ret = -EAGAIN;
|
||||
goto out;
|
||||
}
|
||||
|
||||
pos = iocb->ki_pos;
|
||||
written_buffered = btrfs_buffered_write(iocb, from);
|
||||
if (written_buffered < 0) {
|
||||
ret = written_buffered;
|
||||
goto out;
|
||||
}
|
||||
/*
|
||||
* Ensure all data is persisted. We want the next direct IO read to be
|
||||
* able to read what was just written.
|
||||
*/
|
||||
endbyte = pos + written_buffered - 1;
|
||||
ret = btrfs_fdatawrite_range(inode, pos, endbyte);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = filemap_fdatawait_range(inode->i_mapping, pos, endbyte);
|
||||
if (ret)
|
||||
goto out;
|
||||
written += written_buffered;
|
||||
iocb->ki_pos = pos + written_buffered;
|
||||
invalidate_mapping_pages(file->f_mapping, pos >> PAGE_SHIFT,
|
||||
endbyte >> PAGE_SHIFT);
|
||||
out:
|
||||
return ret < 0 ? ret : written;
|
||||
}
|
||||
|
||||
static ssize_t btrfs_encoded_write(struct kiocb *iocb, struct iov_iter *from,
|
||||
const struct btrfs_ioctl_encoded_io_args *encoded)
|
||||
{
|
||||
@ -1738,7 +1548,7 @@ int btrfs_release_file(struct inode *inode, struct file *filp)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end)
|
||||
static int start_ordered_ops(struct btrfs_inode *inode, loff_t start, loff_t end)
|
||||
{
|
||||
int ret;
|
||||
struct blk_plug plug;
|
||||
@ -1758,7 +1568,7 @@ static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end)
|
||||
|
||||
static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx)
|
||||
{
|
||||
struct btrfs_inode *inode = BTRFS_I(ctx->inode);
|
||||
struct btrfs_inode *inode = ctx->inode;
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
|
||||
if (btrfs_inode_in_log(inode, btrfs_get_fs_generation(fs_info)) &&
|
||||
@ -1794,9 +1604,9 @@ static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx)
|
||||
int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
||||
{
|
||||
struct dentry *dentry = file_dentry(file);
|
||||
struct inode *inode = d_inode(dentry);
|
||||
struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
struct btrfs_inode *inode = BTRFS_I(d_inode(dentry));
|
||||
struct btrfs_root *root = inode->root;
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_trans_handle *trans;
|
||||
struct btrfs_log_ctx ctx;
|
||||
int ret = 0, err;
|
||||
@ -1829,7 +1639,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
btrfs_inode_lock(BTRFS_I(inode), BTRFS_ILOCK_MMAP);
|
||||
btrfs_inode_lock(inode, BTRFS_ILOCK_MMAP);
|
||||
|
||||
atomic_inc(&root->log_batch);
|
||||
|
||||
@ -1853,7 +1663,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
||||
*/
|
||||
ret = start_ordered_ops(inode, start, end);
|
||||
if (ret) {
|
||||
btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP);
|
||||
btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -1865,8 +1675,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
||||
* running delalloc the full sync flag may be set if we need to drop
|
||||
* extra extent map ranges due to temporary memory allocation failures.
|
||||
*/
|
||||
full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
|
||||
&BTRFS_I(inode)->runtime_flags);
|
||||
full_sync = test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
|
||||
|
||||
/*
|
||||
* We have to do this here to avoid the priority inversion of waiting on
|
||||
@ -1885,16 +1694,15 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
||||
*/
|
||||
if (full_sync || btrfs_is_zoned(fs_info)) {
|
||||
ret = btrfs_wait_ordered_range(inode, start, len);
|
||||
clear_bit(BTRFS_INODE_COW_WRITE_ERROR, &BTRFS_I(inode)->runtime_flags);
|
||||
clear_bit(BTRFS_INODE_COW_WRITE_ERROR, &inode->runtime_flags);
|
||||
} else {
|
||||
/*
|
||||
* Get our ordered extents as soon as possible to avoid doing
|
||||
* checksum lookups in the csum tree, and use instead the
|
||||
* checksums attached to the ordered extents.
|
||||
*/
|
||||
btrfs_get_ordered_extents_for_logging(BTRFS_I(inode),
|
||||
&ctx.ordered_extents);
|
||||
ret = filemap_fdatawait_range(inode->i_mapping, start, end);
|
||||
btrfs_get_ordered_extents_for_logging(inode, &ctx.ordered_extents);
|
||||
ret = filemap_fdatawait_range(inode->vfs_inode.i_mapping, start, end);
|
||||
if (ret)
|
||||
goto out_release_extents;
|
||||
|
||||
@ -1907,8 +1715,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
||||
* extents to complete so that any extent maps that point to
|
||||
* unwritten locations are dropped and we don't log them.
|
||||
*/
|
||||
if (test_and_clear_bit(BTRFS_INODE_COW_WRITE_ERROR,
|
||||
&BTRFS_I(inode)->runtime_flags))
|
||||
if (test_and_clear_bit(BTRFS_INODE_COW_WRITE_ERROR, &inode->runtime_flags))
|
||||
ret = btrfs_wait_ordered_range(inode, start, len);
|
||||
}
|
||||
|
||||
@ -1923,8 +1730,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
||||
* modified so clear this flag in case it was set for whatever
|
||||
* reason, it's no longer relevant.
|
||||
*/
|
||||
clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
|
||||
&BTRFS_I(inode)->runtime_flags);
|
||||
clear_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags);
|
||||
/*
|
||||
* An ordered extent might have started before and completed
|
||||
* already with io errors, in which case the inode was not
|
||||
@ -1932,7 +1738,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
||||
* for any errors that might have happened since we last
|
||||
* checked called fsync.
|
||||
*/
|
||||
ret = filemap_check_wb_err(inode->i_mapping, file->f_wb_err);
|
||||
ret = filemap_check_wb_err(inode->vfs_inode.i_mapping, file->f_wb_err);
|
||||
goto out_release_extents;
|
||||
}
|
||||
|
||||
@ -1982,7 +1788,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
|
||||
* file again, but that will end up using the synchronization
|
||||
* inside btrfs_sync_log to keep things safe.
|
||||
*/
|
||||
btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP);
|
||||
btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP);
|
||||
|
||||
if (ret == BTRFS_NO_LOG_SYNC) {
|
||||
ret = btrfs_end_transaction(trans);
|
||||
@ -2051,7 +1857,7 @@ out:
|
||||
|
||||
out_release_extents:
|
||||
btrfs_release_log_ctx_extents(&ctx);
|
||||
btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP);
|
||||
btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -2350,11 +2156,9 @@ out:
|
||||
hole_em->start = offset;
|
||||
hole_em->len = end - offset;
|
||||
hole_em->ram_bytes = hole_em->len;
|
||||
hole_em->orig_start = offset;
|
||||
|
||||
hole_em->block_start = EXTENT_MAP_HOLE;
|
||||
hole_em->block_len = 0;
|
||||
hole_em->orig_block_len = 0;
|
||||
hole_em->disk_bytenr = EXTENT_MAP_HOLE;
|
||||
hole_em->disk_num_bytes = 0;
|
||||
hole_em->generation = trans->transid;
|
||||
|
||||
ret = btrfs_replace_extent_map_range(inode, hole_em, true);
|
||||
@ -2385,7 +2189,7 @@ static int find_first_non_hole(struct btrfs_inode *inode, u64 *start, u64 *len)
|
||||
return PTR_ERR(em);
|
||||
|
||||
/* Hole or vacuum extent(only exists in no-hole mode) */
|
||||
if (em->block_start == EXTENT_MAP_HOLE) {
|
||||
if (em->disk_bytenr == EXTENT_MAP_HOLE) {
|
||||
ret = 1;
|
||||
*len = em->start + em->len > *start + *len ?
|
||||
0 : *start + *len - em->start - em->len;
|
||||
@ -2814,7 +2618,7 @@ static int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len)
|
||||
|
||||
btrfs_inode_lock(BTRFS_I(inode), BTRFS_ILOCK_MMAP);
|
||||
|
||||
ret = btrfs_wait_ordered_range(inode, offset, len);
|
||||
ret = btrfs_wait_ordered_range(BTRFS_I(inode), offset, len);
|
||||
if (ret)
|
||||
goto out_only_mutex;
|
||||
|
||||
@ -3042,7 +2846,7 @@ static int btrfs_zero_range_check_range_boundary(struct btrfs_inode *inode,
|
||||
if (IS_ERR(em))
|
||||
return PTR_ERR(em);
|
||||
|
||||
if (em->block_start == EXTENT_MAP_HOLE)
|
||||
if (em->disk_bytenr == EXTENT_MAP_HOLE)
|
||||
ret = RANGE_BOUNDARY_HOLE;
|
||||
else if (em->flags & EXTENT_FLAG_PREALLOC)
|
||||
ret = RANGE_BOUNDARY_PREALLOC_EXTENT;
|
||||
@ -3106,7 +2910,7 @@ static int btrfs_zero_range(struct inode *inode,
|
||||
ASSERT(IS_ALIGNED(alloc_start, sectorsize));
|
||||
len = offset + len - alloc_start;
|
||||
offset = alloc_start;
|
||||
alloc_hint = em->block_start + em->len;
|
||||
alloc_hint = extent_map_block_start(em) + em->len;
|
||||
}
|
||||
free_extent_map(em);
|
||||
|
||||
@ -3124,7 +2928,7 @@ static int btrfs_zero_range(struct inode *inode,
|
||||
mode);
|
||||
goto out;
|
||||
}
|
||||
if (len < sectorsize && em->block_start != EXTENT_MAP_HOLE) {
|
||||
if (len < sectorsize && em->disk_bytenr != EXTENT_MAP_HOLE) {
|
||||
free_extent_map(em);
|
||||
ret = btrfs_truncate_block(BTRFS_I(inode), offset, len,
|
||||
0);
|
||||
@ -3309,7 +3113,7 @@ static long btrfs_fallocate(struct file *file, int mode,
|
||||
* the file range and, due to the previous locking we did, we know there
|
||||
* can't be more delalloc or ordered extents in the range.
|
||||
*/
|
||||
ret = btrfs_wait_ordered_range(inode, alloc_start,
|
||||
ret = btrfs_wait_ordered_range(BTRFS_I(inode), alloc_start,
|
||||
alloc_end - alloc_start);
|
||||
if (ret)
|
||||
goto out;
|
||||
@ -3337,7 +3141,7 @@ static long btrfs_fallocate(struct file *file, int mode,
|
||||
last_byte = min(extent_map_end(em), alloc_end);
|
||||
actual_end = min_t(u64, extent_map_end(em), offset + len);
|
||||
last_byte = ALIGN(last_byte, blocksize);
|
||||
if (em->block_start == EXTENT_MAP_HOLE ||
|
||||
if (em->disk_bytenr == EXTENT_MAP_HOLE ||
|
||||
(cur_offset >= inode->i_size &&
|
||||
!(em->flags & EXTENT_FLAG_PREALLOC))) {
|
||||
const u64 range_len = last_byte - cur_offset;
|
||||
@ -3920,97 +3724,6 @@ static int btrfs_file_open(struct inode *inode, struct file *filp)
|
||||
return generic_file_open(inode, filp);
|
||||
}
|
||||
|
||||
static int check_direct_read(struct btrfs_fs_info *fs_info,
|
||||
const struct iov_iter *iter, loff_t offset)
|
||||
{
|
||||
int ret;
|
||||
int i, seg;
|
||||
|
||||
ret = check_direct_IO(fs_info, iter, offset);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (!iter_is_iovec(iter))
|
||||
return 0;
|
||||
|
||||
for (seg = 0; seg < iter->nr_segs; seg++) {
|
||||
for (i = seg + 1; i < iter->nr_segs; i++) {
|
||||
const struct iovec *iov1 = iter_iov(iter) + seg;
|
||||
const struct iovec *iov2 = iter_iov(iter) + i;
|
||||
|
||||
if (iov1->iov_base == iov2->iov_base)
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to)
|
||||
{
|
||||
struct inode *inode = file_inode(iocb->ki_filp);
|
||||
size_t prev_left = 0;
|
||||
ssize_t read = 0;
|
||||
ssize_t ret;
|
||||
|
||||
if (fsverity_active(inode))
|
||||
return 0;
|
||||
|
||||
if (check_direct_read(inode_to_fs_info(inode), to, iocb->ki_pos))
|
||||
return 0;
|
||||
|
||||
btrfs_inode_lock(BTRFS_I(inode), BTRFS_ILOCK_SHARED);
|
||||
again:
|
||||
/*
|
||||
* This is similar to what we do for direct IO writes, see the comment
|
||||
* at btrfs_direct_write(), but we also disable page faults in addition
|
||||
* to disabling them only at the iov_iter level. This is because when
|
||||
* reading from a hole or prealloc extent, iomap calls iov_iter_zero(),
|
||||
* which can still trigger page fault ins despite having set ->nofault
|
||||
* to true of our 'to' iov_iter.
|
||||
*
|
||||
* The difference to direct IO writes is that we deadlock when trying
|
||||
* to lock the extent range in the inode's tree during he page reads
|
||||
* triggered by the fault in (while for writes it is due to waiting for
|
||||
* our own ordered extent). This is because for direct IO reads,
|
||||
* btrfs_dio_iomap_begin() returns with the extent range locked, which
|
||||
* is only unlocked in the endio callback (end_bio_extent_readpage()).
|
||||
*/
|
||||
pagefault_disable();
|
||||
to->nofault = true;
|
||||
ret = btrfs_dio_read(iocb, to, read);
|
||||
to->nofault = false;
|
||||
pagefault_enable();
|
||||
|
||||
/* No increment (+=) because iomap returns a cumulative value. */
|
||||
if (ret > 0)
|
||||
read = ret;
|
||||
|
||||
if (iov_iter_count(to) > 0 && (ret == -EFAULT || ret > 0)) {
|
||||
const size_t left = iov_iter_count(to);
|
||||
|
||||
if (left == prev_left) {
|
||||
/*
|
||||
* We didn't make any progress since the last attempt,
|
||||
* fallback to a buffered read for the remainder of the
|
||||
* range. This is just to avoid any possibility of looping
|
||||
* for too long.
|
||||
*/
|
||||
ret = read;
|
||||
} else {
|
||||
/*
|
||||
* We made some progress since the last retry or this is
|
||||
* the first time we are retrying. Fault in as many pages
|
||||
* as possible and retry.
|
||||
*/
|
||||
fault_in_iov_iter_writeable(to, left);
|
||||
prev_left = left;
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_SHARED);
|
||||
return ret < 0 ? ret : read;
|
||||
}
|
||||
|
||||
static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
||||
{
|
||||
ssize_t ret = 0;
|
||||
@ -4045,8 +3758,9 @@ const struct file_operations btrfs_file_operations = {
|
||||
.fop_flags = FOP_BUFFER_RASYNC | FOP_BUFFER_WASYNC,
|
||||
};
|
||||
|
||||
int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end)
|
||||
int btrfs_fdatawrite_range(struct btrfs_inode *inode, loff_t start, loff_t end)
|
||||
{
|
||||
struct address_space *mapping = inode->vfs_inode.i_mapping;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
@ -4063,10 +3777,9 @@ int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end)
|
||||
* know better and pull this out at some point in the future, it is
|
||||
* right and you are wrong.
|
||||
*/
|
||||
ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
|
||||
if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
|
||||
&BTRFS_I(inode)->runtime_flags))
|
||||
ret = filemap_fdatawrite_range(inode->i_mapping, start, end);
|
||||
ret = filemap_fdatawrite_range(mapping, start, end);
|
||||
if (!ret && test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags))
|
||||
ret = filemap_fdatawrite_range(mapping, start, end);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -37,12 +37,14 @@ int btrfs_release_file(struct inode *inode, struct file *file);
|
||||
int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
|
||||
size_t num_pages, loff_t pos, size_t write_bytes,
|
||||
struct extent_state **cached, bool noreserve);
|
||||
int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end);
|
||||
int btrfs_fdatawrite_range(struct btrfs_inode *inode, loff_t start, loff_t end);
|
||||
int btrfs_check_nocow_lock(struct btrfs_inode *inode, loff_t pos,
|
||||
size_t *write_bytes, bool nowait);
|
||||
void btrfs_check_nocow_unlock(struct btrfs_inode *inode);
|
||||
bool btrfs_find_delalloc_in_range(struct btrfs_inode *inode, u64 start, u64 end,
|
||||
struct extent_state **cached_state,
|
||||
u64 *delalloc_start_ret, u64 *delalloc_end_ret);
|
||||
int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from, size_t count);
|
||||
ssize_t btrfs_buffered_write(struct kiocb *iocb, struct iov_iter *i);
|
||||
|
||||
#endif
|
||||
|
@ -82,7 +82,6 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
|
||||
struct btrfs_path *path,
|
||||
u64 offset)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_key key;
|
||||
struct btrfs_key location;
|
||||
struct btrfs_disk_key disk_key;
|
||||
@ -116,7 +115,7 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root,
|
||||
* sure NOFS is set to keep us from deadlocking.
|
||||
*/
|
||||
nofs_flag = memalloc_nofs_save();
|
||||
inode = btrfs_iget_path(fs_info->sb, location.objectid, root, path);
|
||||
inode = btrfs_iget_path(location.objectid, root, path);
|
||||
btrfs_release_path(path);
|
||||
memalloc_nofs_restore(nofs_flag);
|
||||
if (IS_ERR(inode))
|
||||
@ -138,7 +137,7 @@ struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group,
|
||||
|
||||
spin_lock(&block_group->lock);
|
||||
if (block_group->inode)
|
||||
inode = igrab(block_group->inode);
|
||||
inode = igrab(&block_group->inode->vfs_inode);
|
||||
spin_unlock(&block_group->lock);
|
||||
if (inode)
|
||||
return inode;
|
||||
@ -157,7 +156,7 @@ struct inode *lookup_free_space_inode(struct btrfs_block_group *block_group,
|
||||
}
|
||||
|
||||
if (!test_and_set_bit(BLOCK_GROUP_FLAG_IREF, &block_group->runtime_flags))
|
||||
block_group->inode = igrab(inode);
|
||||
block_group->inode = BTRFS_I(igrab(inode));
|
||||
spin_unlock(&block_group->lock);
|
||||
|
||||
return inode;
|
||||
@ -858,6 +857,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
|
||||
spin_unlock(&ctl->tree_lock);
|
||||
btrfs_err(fs_info,
|
||||
"Duplicate entries in free space cache, dumping");
|
||||
kmem_cache_free(btrfs_free_space_bitmap_cachep, e->bitmap);
|
||||
kmem_cache_free(btrfs_free_space_cachep, e);
|
||||
goto free_cache;
|
||||
}
|
||||
@ -1268,7 +1268,7 @@ static int flush_dirty_cache(struct inode *inode)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = btrfs_wait_ordered_range(inode, 0, (u64)-1);
|
||||
ret = btrfs_wait_ordered_range(BTRFS_I(inode), 0, (u64)-1);
|
||||
if (ret)
|
||||
clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1,
|
||||
EXTENT_DELALLOC, NULL);
|
||||
@ -1483,7 +1483,7 @@ static int __btrfs_write_out_cache(struct inode *inode,
|
||||
io_ctl->entries = entries;
|
||||
io_ctl->bitmaps = bitmaps;
|
||||
|
||||
ret = btrfs_fdatawrite_range(inode, 0, (u64)-1);
|
||||
ret = btrfs_fdatawrite_range(BTRFS_I(inode), 0, (u64)-1);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
|
@ -1300,10 +1300,14 @@ int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info)
|
||||
btrfs_tree_lock(free_space_root->node);
|
||||
btrfs_clear_buffer_dirty(trans, free_space_root->node);
|
||||
btrfs_tree_unlock(free_space_root->node);
|
||||
btrfs_free_tree_block(trans, btrfs_root_id(free_space_root),
|
||||
free_space_root->node, 0, 1);
|
||||
|
||||
ret = btrfs_free_tree_block(trans, btrfs_root_id(free_space_root),
|
||||
free_space_root->node, 0, 1);
|
||||
btrfs_put_root(free_space_root);
|
||||
if (ret < 0) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
btrfs_end_transaction(trans);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return btrfs_commit_transaction(trans);
|
||||
}
|
||||
|
@ -29,7 +29,6 @@
|
||||
#include "extent-io-tree.h"
|
||||
#include "async-thread.h"
|
||||
#include "block-rsv.h"
|
||||
#include "fs.h"
|
||||
|
||||
struct inode;
|
||||
struct super_block;
|
||||
@ -99,7 +98,9 @@ enum {
|
||||
/* The btrfs_fs_info created for self-tests */
|
||||
BTRFS_FS_STATE_DUMMY_FS_INFO,
|
||||
|
||||
BTRFS_FS_STATE_NO_CSUMS,
|
||||
/* Checksum errors are ignored. */
|
||||
BTRFS_FS_STATE_NO_DATA_CSUMS,
|
||||
BTRFS_FS_STATE_SKIP_META_CSUMS,
|
||||
|
||||
/* Indicates there was an error cleaning up a log tree. */
|
||||
BTRFS_FS_STATE_LOG_CLEANUP_ERROR,
|
||||
@ -225,6 +226,8 @@ enum {
|
||||
BTRFS_MOUNT_IGNOREDATACSUMS = (1UL << 28),
|
||||
BTRFS_MOUNT_NODISCARD = (1UL << 29),
|
||||
BTRFS_MOUNT_NOSPACECACHE = (1UL << 30),
|
||||
BTRFS_MOUNT_IGNOREMETACSUMS = (1UL << 31),
|
||||
BTRFS_MOUNT_IGNORESUPERFLAGS = (1ULL << 32),
|
||||
};
|
||||
|
||||
/*
|
||||
@ -958,7 +961,7 @@ static inline bool btrfs_is_zoned(const struct btrfs_fs_info *fs_info)
|
||||
/*
|
||||
* Count how many fs_info->max_extent_size cover the @size
|
||||
*/
|
||||
static inline u32 count_max_extents(struct btrfs_fs_info *fs_info, u64 size)
|
||||
static inline u32 count_max_extents(const struct btrfs_fs_info *fs_info, u64 size)
|
||||
{
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
if (!fs_info)
|
||||
@ -1019,7 +1022,7 @@ void __btrfs_clear_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag,
|
||||
#define btrfs_test_opt(fs_info, opt) ((fs_info)->mount_opt & \
|
||||
BTRFS_MOUNT_##opt)
|
||||
|
||||
static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info)
|
||||
static inline int btrfs_fs_closing(const struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
/* Do it this way so we only ever do one test_bit in the normal case. */
|
||||
if (test_bit(BTRFS_FS_CLOSING_START, &fs_info->flags)) {
|
||||
@ -1038,7 +1041,7 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info)
|
||||
* since setting and checking for SB_RDONLY in the superblock's flags is not
|
||||
* atomic.
|
||||
*/
|
||||
static inline int btrfs_need_cleaner_sleep(struct btrfs_fs_info *fs_info)
|
||||
static inline int btrfs_need_cleaner_sleep(const struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
return test_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state) ||
|
||||
btrfs_fs_closing(fs_info);
|
||||
@ -1059,7 +1062,7 @@ static inline void btrfs_wake_unfinished_drop(struct btrfs_fs_info *fs_info)
|
||||
|
||||
#define EXPORT_FOR_TESTS
|
||||
|
||||
static inline int btrfs_is_testing(struct btrfs_fs_info *fs_info)
|
||||
static inline int btrfs_is_testing(const struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
return test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state);
|
||||
}
|
||||
@ -1070,7 +1073,7 @@ void btrfs_test_destroy_inode(struct inode *inode);
|
||||
|
||||
#define EXPORT_FOR_TESTS static
|
||||
|
||||
static inline int btrfs_is_testing(struct btrfs_fs_info *fs_info)
|
||||
static inline int btrfs_is_testing(const struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
@ -141,8 +141,8 @@ static int btrfs_del_inode_extref(struct btrfs_trans_handle *trans,
|
||||
extref = btrfs_find_name_in_ext_backref(path->nodes[0], path->slots[0],
|
||||
ref_objectid, name);
|
||||
if (!extref) {
|
||||
btrfs_handle_fs_error(root->fs_info, -ENOENT, NULL);
|
||||
ret = -EROFS;
|
||||
btrfs_abort_transaction(trans, -ENOENT);
|
||||
ret = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
1475
fs/btrfs/inode.c
1475
fs/btrfs/inode.c
File diff suppressed because it is too large
Load Diff
@ -375,15 +375,15 @@ int btrfs_fileattr_set(struct mnt_idmap *idmap,
|
||||
return PTR_ERR(trans);
|
||||
|
||||
if (comp) {
|
||||
ret = btrfs_set_prop(trans, inode, "btrfs.compression", comp,
|
||||
strlen(comp), 0);
|
||||
ret = btrfs_set_prop(trans, BTRFS_I(inode), "btrfs.compression",
|
||||
comp, strlen(comp), 0);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out_end_trans;
|
||||
}
|
||||
} else {
|
||||
ret = btrfs_set_prop(trans, inode, "btrfs.compression", NULL,
|
||||
0, 0);
|
||||
ret = btrfs_set_prop(trans, BTRFS_I(inode), "btrfs.compression",
|
||||
NULL, 0, 0);
|
||||
if (ret && ret != -ENODATA) {
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out_end_trans;
|
||||
@ -552,7 +552,7 @@ static noinline int btrfs_ioctl_fitrim(struct btrfs_fs_info *fs_info,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __pure btrfs_is_empty_uuid(u8 *uuid)
|
||||
int __pure btrfs_is_empty_uuid(const u8 *uuid)
|
||||
{
|
||||
int i;
|
||||
|
||||
@ -658,15 +658,10 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
|
||||
ret = PTR_ERR(trans);
|
||||
goto out_release_rsv;
|
||||
}
|
||||
ret = btrfs_record_root_in_trans(trans, BTRFS_I(dir)->root);
|
||||
if (ret)
|
||||
goto out;
|
||||
btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
|
||||
qgroup_reserved = 0;
|
||||
trans->block_rsv = &block_rsv;
|
||||
trans->bytes_reserved = block_rsv.size;
|
||||
/* Tree log can't currently deal with an inode which is a new root. */
|
||||
btrfs_set_log_full_commit(trans);
|
||||
|
||||
ret = btrfs_qgroup_inherit(trans, 0, objectid, btrfs_root_id(root), inherit);
|
||||
if (ret)
|
||||
@ -719,6 +714,8 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
|
||||
ret = btrfs_insert_root(trans, fs_info->tree_root, &key,
|
||||
root_item);
|
||||
if (ret) {
|
||||
int ret2;
|
||||
|
||||
/*
|
||||
* Since we don't abort the transaction in this case, free the
|
||||
* tree block so that we don't leak space and leave the
|
||||
@ -729,7 +726,9 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
|
||||
btrfs_tree_lock(leaf);
|
||||
btrfs_clear_buffer_dirty(trans, leaf);
|
||||
btrfs_tree_unlock(leaf);
|
||||
btrfs_free_tree_block(trans, objectid, leaf, 0, 1);
|
||||
ret2 = btrfs_free_tree_block(trans, objectid, leaf, 0, 1);
|
||||
if (ret2 < 0)
|
||||
btrfs_abort_transaction(trans, ret2);
|
||||
free_extent_buffer(leaf);
|
||||
goto out;
|
||||
}
|
||||
@ -767,6 +766,8 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
|
||||
goto out;
|
||||
}
|
||||
|
||||
btrfs_record_new_subvolume(trans, BTRFS_I(dir));
|
||||
|
||||
d_instantiate_new(dentry, new_inode_args.inode);
|
||||
new_inode_args.inode = NULL;
|
||||
|
||||
@ -854,7 +855,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
|
||||
pending_snapshot->dentry = dentry;
|
||||
pending_snapshot->root = root;
|
||||
pending_snapshot->readonly = readonly;
|
||||
pending_snapshot->dir = dir;
|
||||
pending_snapshot->dir = BTRFS_I(dir);
|
||||
pending_snapshot->inherit = inherit;
|
||||
|
||||
trans = btrfs_start_transaction(root, 0);
|
||||
@ -1070,7 +1071,7 @@ static noinline int btrfs_mksnapshot(const struct path *parent,
|
||||
atomic_inc(&root->snapshot_force_cow);
|
||||
snapshot_force_cow = true;
|
||||
|
||||
btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
|
||||
btrfs_wait_ordered_extents(root, U64_MAX, NULL);
|
||||
|
||||
ret = btrfs_mksubvol(parent, idmap, name, namelen,
|
||||
root, readonly, inherit);
|
||||
@ -1917,8 +1918,7 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap,
|
||||
struct btrfs_ioctl_ino_lookup_user_args *args)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct btrfs_key upper_limit = BTRFS_I(inode)->location;
|
||||
u64 upper_limit = btrfs_ino(BTRFS_I(inode));
|
||||
u64 treeid = btrfs_root_id(BTRFS_I(inode)->root);
|
||||
u64 dirid = args->dirid;
|
||||
unsigned long item_off;
|
||||
@ -1944,7 +1944,7 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap,
|
||||
* If the bottom subvolume does not exist directly under upper_limit,
|
||||
* construct the path in from the bottom up.
|
||||
*/
|
||||
if (dirid != upper_limit.objectid) {
|
||||
if (dirid != upper_limit) {
|
||||
ptr = &args->path[BTRFS_INO_LOOKUP_USER_PATH_MAX - 1];
|
||||
|
||||
root = btrfs_get_fs_root(fs_info, treeid, true);
|
||||
@ -2006,7 +2006,7 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap,
|
||||
* btree and lock the same leaf.
|
||||
*/
|
||||
btrfs_release_path(path);
|
||||
temp_inode = btrfs_iget(sb, key2.objectid, root);
|
||||
temp_inode = btrfs_iget(key2.objectid, root);
|
||||
if (IS_ERR(temp_inode)) {
|
||||
ret = PTR_ERR(temp_inode);
|
||||
goto out_put;
|
||||
@ -2019,7 +2019,7 @@ static int btrfs_search_path_in_tree_user(struct mnt_idmap *idmap,
|
||||
goto out_put;
|
||||
}
|
||||
|
||||
if (key.offset == upper_limit.objectid)
|
||||
if (key.offset == upper_limit)
|
||||
break;
|
||||
if (key.objectid == BTRFS_FIRST_FREE_OBJECTID) {
|
||||
ret = -EACCES;
|
||||
@ -2140,7 +2140,7 @@ static int btrfs_ioctl_ino_lookup_user(struct file *file, void __user *argp)
|
||||
inode = file_inode(file);
|
||||
|
||||
if (args->dirid == BTRFS_FIRST_FREE_OBJECTID &&
|
||||
BTRFS_I(inode)->location.objectid != BTRFS_FIRST_FREE_OBJECTID) {
|
||||
btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) {
|
||||
/*
|
||||
* The subvolume does not exist under fd with which this is
|
||||
* called
|
||||
@ -3807,12 +3807,29 @@ drop_write:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Quick check for ioctl handlers if quotas are enabled. Proper locking must be
|
||||
* done before any operations.
|
||||
*/
|
||||
static bool qgroup_enabled(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
bool ret = true;
|
||||
|
||||
mutex_lock(&fs_info->qgroup_ioctl_lock);
|
||||
if (!fs_info->quota_root)
|
||||
ret = false;
|
||||
mutex_unlock(&fs_info->qgroup_ioctl_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
|
||||
{
|
||||
struct inode *inode = file_inode(file);
|
||||
struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
struct btrfs_ioctl_qgroup_assign_args *sa;
|
||||
struct btrfs_qgroup_list *prealloc = NULL;
|
||||
struct btrfs_trans_handle *trans;
|
||||
int ret;
|
||||
int err;
|
||||
@ -3820,6 +3837,9 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (!qgroup_enabled(root->fs_info))
|
||||
return -ENOTCONN;
|
||||
|
||||
ret = mnt_want_write_file(file);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -3830,14 +3850,27 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
|
||||
goto drop_write;
|
||||
}
|
||||
|
||||
if (sa->assign) {
|
||||
prealloc = kzalloc(sizeof(*prealloc), GFP_KERNEL);
|
||||
if (!prealloc) {
|
||||
ret = -ENOMEM;
|
||||
goto drop_write;
|
||||
}
|
||||
}
|
||||
|
||||
trans = btrfs_join_transaction(root);
|
||||
if (IS_ERR(trans)) {
|
||||
ret = PTR_ERR(trans);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Prealloc ownership is moved to the relation handler, there it's used
|
||||
* or freed on error.
|
||||
*/
|
||||
if (sa->assign) {
|
||||
ret = btrfs_add_qgroup_relation(trans, sa->src, sa->dst);
|
||||
ret = btrfs_add_qgroup_relation(trans, sa->src, sa->dst, prealloc);
|
||||
prealloc = NULL;
|
||||
} else {
|
||||
ret = btrfs_del_qgroup_relation(trans, sa->src, sa->dst);
|
||||
}
|
||||
@ -3847,13 +3880,15 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
|
||||
err = btrfs_run_qgroups(trans);
|
||||
mutex_unlock(&fs_info->qgroup_ioctl_lock);
|
||||
if (err < 0)
|
||||
btrfs_handle_fs_error(fs_info, err,
|
||||
"failed to update qgroup status and info");
|
||||
btrfs_warn(fs_info,
|
||||
"qgroup status update failed after %s relation, marked as inconsistent",
|
||||
sa->assign ? "adding" : "deleting");
|
||||
err = btrfs_end_transaction(trans);
|
||||
if (err && !ret)
|
||||
ret = err;
|
||||
|
||||
out:
|
||||
kfree(prealloc);
|
||||
kfree(sa);
|
||||
drop_write:
|
||||
mnt_drop_write_file(file);
|
||||
@ -3872,6 +3907,9 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (!qgroup_enabled(root->fs_info))
|
||||
return -ENOTCONN;
|
||||
|
||||
ret = mnt_want_write_file(file);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -3928,6 +3966,9 @@ static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg)
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (!qgroup_enabled(root->fs_info))
|
||||
return -ENOTCONN;
|
||||
|
||||
ret = mnt_want_write_file(file);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -3973,6 +4014,9 @@ static long btrfs_ioctl_quota_rescan(struct file *file, void __user *arg)
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (!qgroup_enabled(fs_info))
|
||||
return -ENOTCONN;
|
||||
|
||||
ret = mnt_want_write_file(file);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -4429,7 +4473,7 @@ out_drop_write:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int _btrfs_ioctl_send(struct inode *inode, void __user *argp, bool compat)
|
||||
static int _btrfs_ioctl_send(struct btrfs_inode *inode, void __user *argp, bool compat)
|
||||
{
|
||||
struct btrfs_ioctl_send_args *arg;
|
||||
int ret;
|
||||
@ -4751,10 +4795,10 @@ long btrfs_ioctl(struct file *file, unsigned int
|
||||
return btrfs_ioctl_set_received_subvol_32(file, argp);
|
||||
#endif
|
||||
case BTRFS_IOC_SEND:
|
||||
return _btrfs_ioctl_send(inode, argp, false);
|
||||
return _btrfs_ioctl_send(BTRFS_I(inode), argp, false);
|
||||
#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT)
|
||||
case BTRFS_IOC_SEND_32:
|
||||
return _btrfs_ioctl_send(inode, argp, true);
|
||||
return _btrfs_ioctl_send(BTRFS_I(inode), argp, true);
|
||||
#endif
|
||||
case BTRFS_IOC_GET_DEV_STATS:
|
||||
return btrfs_ioctl_get_dev_stats(fs_info, argp);
|
||||
|
@ -19,7 +19,7 @@ int btrfs_fileattr_set(struct mnt_idmap *idmap,
|
||||
struct dentry *dentry, struct fileattr *fa);
|
||||
int btrfs_ioctl_get_supported_features(void __user *arg);
|
||||
void btrfs_sync_inode_flags_to_i_flags(struct inode *inode);
|
||||
int __pure btrfs_is_empty_uuid(u8 *uuid);
|
||||
int __pure btrfs_is_empty_uuid(const u8 *uuid);
|
||||
void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_ioctl_balance_args *bargs);
|
||||
|
||||
|
@ -11,7 +11,6 @@
|
||||
#include <linux/lockdep.h>
|
||||
#include <linux/percpu_counter.h>
|
||||
#include "extent_io.h"
|
||||
#include "locking.h"
|
||||
|
||||
struct extent_buffer;
|
||||
struct btrfs_path;
|
||||
|
@ -6,7 +6,6 @@
|
||||
#include <linux/types.h>
|
||||
#include <linux/maple_tree.h>
|
||||
#include <linux/list.h>
|
||||
#include "lru_cache.h"
|
||||
|
||||
/*
|
||||
* A cache entry. This is meant to be embedded in a structure of a user of
|
||||
|
@ -258,8 +258,8 @@ int lzo_compress_folios(struct list_head *ws, struct address_space *mapping,
|
||||
workspace->cbuf, &out_len,
|
||||
workspace->mem);
|
||||
kunmap_local(data_in);
|
||||
if (ret < 0) {
|
||||
pr_debug("BTRFS: lzo in loop returned %d\n", ret);
|
||||
if (unlikely(ret < 0)) {
|
||||
/* lzo1x_1_compress never fails. */
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
@ -354,11 +354,14 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
|
||||
* and all sectors should be used.
|
||||
* If this happens, it means the compressed extent is corrupted.
|
||||
*/
|
||||
if (len_in > min_t(size_t, BTRFS_MAX_COMPRESSED, cb->compressed_len) ||
|
||||
round_up(len_in, sectorsize) < cb->compressed_len) {
|
||||
if (unlikely(len_in > min_t(size_t, BTRFS_MAX_COMPRESSED, cb->compressed_len) ||
|
||||
round_up(len_in, sectorsize) < cb->compressed_len)) {
|
||||
struct btrfs_inode *inode = cb->bbio.inode;
|
||||
|
||||
btrfs_err(fs_info,
|
||||
"invalid lzo header, lzo len %u compressed len %u",
|
||||
len_in, cb->compressed_len);
|
||||
"lzo header invalid, root %llu inode %llu offset %llu lzo len %u compressed len %u",
|
||||
btrfs_root_id(inode->root), btrfs_ino(inode),
|
||||
cb->start, len_in, cb->compressed_len);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
@ -383,13 +386,17 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
|
||||
kunmap_local(kaddr);
|
||||
cur_in += LZO_LEN;
|
||||
|
||||
if (seg_len > WORKSPACE_CBUF_LENGTH) {
|
||||
if (unlikely(seg_len > WORKSPACE_CBUF_LENGTH)) {
|
||||
struct btrfs_inode *inode = cb->bbio.inode;
|
||||
|
||||
/*
|
||||
* seg_len shouldn't be larger than we have allocated
|
||||
* for workspace->cbuf
|
||||
*/
|
||||
btrfs_err(fs_info, "unexpectedly large lzo segment len %u",
|
||||
seg_len);
|
||||
btrfs_err(fs_info,
|
||||
"lzo segment too big, root %llu inode %llu offset %llu len %u",
|
||||
btrfs_root_id(inode->root), btrfs_ino(inode),
|
||||
cb->start, seg_len);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
@ -399,8 +406,13 @@ int lzo_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
|
||||
/* Decompress the data */
|
||||
ret = lzo1x_decompress_safe(workspace->cbuf, seg_len,
|
||||
workspace->buf, &out_len);
|
||||
if (ret != LZO_E_OK) {
|
||||
btrfs_err(fs_info, "failed to decompress");
|
||||
if (unlikely(ret != LZO_E_OK)) {
|
||||
struct btrfs_inode *inode = cb->bbio.inode;
|
||||
|
||||
btrfs_err(fs_info,
|
||||
"lzo decompression failed, error %d root %llu inode %llu offset %llu",
|
||||
ret, btrfs_root_id(inode->root), btrfs_ino(inode),
|
||||
cb->start);
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
@ -454,8 +466,13 @@ int lzo_decompress(struct list_head *ws, const u8 *data_in,
|
||||
|
||||
out_len = sectorsize;
|
||||
ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len);
|
||||
if (ret != LZO_E_OK) {
|
||||
pr_warn("BTRFS: decompress failed!\n");
|
||||
if (unlikely(ret != LZO_E_OK)) {
|
||||
struct btrfs_inode *inode = BTRFS_I(dest_page->mapping->host);
|
||||
|
||||
btrfs_err(fs_info,
|
||||
"lzo decompression failed, error %d root %llu inode %llu offset %llu",
|
||||
ret, btrfs_root_id(inode->root), btrfs_ino(inode),
|
||||
page_offset(dest_page));
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
@ -20,7 +20,8 @@ static const char fs_state_chars[] = {
|
||||
[BTRFS_FS_STATE_TRANS_ABORTED] = 'A',
|
||||
[BTRFS_FS_STATE_DEV_REPLACING] = 'R',
|
||||
[BTRFS_FS_STATE_DUMMY_FS_INFO] = 0,
|
||||
[BTRFS_FS_STATE_NO_CSUMS] = 'C',
|
||||
[BTRFS_FS_STATE_NO_DATA_CSUMS] = 'C',
|
||||
[BTRFS_FS_STATE_SKIP_META_CSUMS] = 'S',
|
||||
[BTRFS_FS_STATE_LOG_CLEANUP_ERROR] = 'L',
|
||||
};
|
||||
|
||||
|
@ -66,7 +66,7 @@ struct rb_simple_node {
|
||||
u64 bytenr;
|
||||
};
|
||||
|
||||
static inline struct rb_node *rb_simple_search(struct rb_root *root, u64 bytenr)
|
||||
static inline struct rb_node *rb_simple_search(const struct rb_root *root, u64 bytenr)
|
||||
{
|
||||
struct rb_node *node = root->rb_node;
|
||||
struct rb_simple_node *entry;
|
||||
@ -93,7 +93,7 @@ static inline struct rb_node *rb_simple_search(struct rb_root *root, u64 bytenr)
|
||||
* Return the rb_node that start at or after @bytenr. If there is no entry at
|
||||
* or after @bytner return NULL.
|
||||
*/
|
||||
static inline struct rb_node *rb_simple_search_first(struct rb_root *root,
|
||||
static inline struct rb_node *rb_simple_search_first(const struct rb_root *root,
|
||||
u64 bytenr)
|
||||
{
|
||||
struct rb_node *node = root->rb_node, *ret = NULL;
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include "qgroup.h"
|
||||
#include "subpage.h"
|
||||
#include "file.h"
|
||||
#include "block-group.h"
|
||||
|
||||
static struct kmem_cache *btrfs_ordered_extent_cache;
|
||||
|
||||
@ -179,7 +180,7 @@ static struct btrfs_ordered_extent *alloc_ordered_extent(
|
||||
entry->disk_num_bytes = disk_num_bytes;
|
||||
entry->offset = offset;
|
||||
entry->bytes_left = num_bytes;
|
||||
entry->inode = igrab(&inode->vfs_inode);
|
||||
entry->inode = BTRFS_I(igrab(&inode->vfs_inode));
|
||||
entry->compress_type = compress_type;
|
||||
entry->truncated_len = (u64)-1;
|
||||
entry->qgroup_rsv = qgroup_rsv;
|
||||
@ -207,7 +208,7 @@ static struct btrfs_ordered_extent *alloc_ordered_extent(
|
||||
|
||||
static void insert_ordered_extent(struct btrfs_ordered_extent *entry)
|
||||
{
|
||||
struct btrfs_inode *inode = BTRFS_I(entry->inode);
|
||||
struct btrfs_inode *inode = entry->inode;
|
||||
struct btrfs_root *root = inode->root;
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct rb_node *node;
|
||||
@ -223,7 +224,7 @@ static void insert_ordered_extent(struct btrfs_ordered_extent *entry)
|
||||
spin_lock_irq(&inode->ordered_tree_lock);
|
||||
node = tree_insert(&inode->ordered_tree, entry->file_offset,
|
||||
&entry->rb_node);
|
||||
if (node)
|
||||
if (unlikely(node))
|
||||
btrfs_panic(fs_info, -EEXIST,
|
||||
"inconsistency in ordered tree at offset %llu",
|
||||
entry->file_offset);
|
||||
@ -263,17 +264,39 @@ static void insert_ordered_extent(struct btrfs_ordered_extent *entry)
|
||||
*/
|
||||
struct btrfs_ordered_extent *btrfs_alloc_ordered_extent(
|
||||
struct btrfs_inode *inode, u64 file_offset,
|
||||
u64 num_bytes, u64 ram_bytes, u64 disk_bytenr,
|
||||
u64 disk_num_bytes, u64 offset, unsigned long flags,
|
||||
int compress_type)
|
||||
const struct btrfs_file_extent *file_extent, unsigned long flags)
|
||||
{
|
||||
struct btrfs_ordered_extent *entry;
|
||||
|
||||
ASSERT((flags & ~BTRFS_ORDERED_TYPE_FLAGS) == 0);
|
||||
|
||||
entry = alloc_ordered_extent(inode, file_offset, num_bytes, ram_bytes,
|
||||
disk_bytenr, disk_num_bytes, offset, flags,
|
||||
compress_type);
|
||||
/*
|
||||
* For regular writes, we just use the members in @file_extent.
|
||||
*
|
||||
* For NOCOW, we don't really care about the numbers except @start and
|
||||
* file_extent->num_bytes, as we won't insert a file extent item at all.
|
||||
*
|
||||
* For PREALLOC, we do not use ordered extent members, but
|
||||
* btrfs_mark_extent_written() handles everything.
|
||||
*
|
||||
* So here we always pass 0 as offset for NOCOW/PREALLOC ordered extents,
|
||||
* or btrfs_split_ordered_extent() cannot handle it correctly.
|
||||
*/
|
||||
if (flags & ((1U << BTRFS_ORDERED_NOCOW) | (1U << BTRFS_ORDERED_PREALLOC)))
|
||||
entry = alloc_ordered_extent(inode, file_offset,
|
||||
file_extent->num_bytes,
|
||||
file_extent->num_bytes,
|
||||
file_extent->disk_bytenr + file_extent->offset,
|
||||
file_extent->num_bytes, 0, flags,
|
||||
file_extent->compression);
|
||||
else
|
||||
entry = alloc_ordered_extent(inode, file_offset,
|
||||
file_extent->num_bytes,
|
||||
file_extent->ram_bytes,
|
||||
file_extent->disk_bytenr,
|
||||
file_extent->disk_num_bytes,
|
||||
file_extent->offset, flags,
|
||||
file_extent->compression);
|
||||
if (!IS_ERR(entry))
|
||||
insert_ordered_extent(entry);
|
||||
return entry;
|
||||
@ -287,7 +310,7 @@ struct btrfs_ordered_extent *btrfs_alloc_ordered_extent(
|
||||
void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
|
||||
struct btrfs_ordered_sum *sum)
|
||||
{
|
||||
struct btrfs_inode *inode = BTRFS_I(entry->inode);
|
||||
struct btrfs_inode *inode = entry->inode;
|
||||
|
||||
spin_lock_irq(&inode->ordered_tree_lock);
|
||||
list_add_tail(&sum->list, &entry->list);
|
||||
@ -297,7 +320,7 @@ void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
|
||||
void btrfs_mark_ordered_extent_error(struct btrfs_ordered_extent *ordered)
|
||||
{
|
||||
if (!test_and_set_bit(BTRFS_ORDERED_IOERR, &ordered->flags))
|
||||
mapping_set_error(ordered->inode->i_mapping, -EIO);
|
||||
mapping_set_error(ordered->inode->vfs_inode.i_mapping, -EIO);
|
||||
}
|
||||
|
||||
static void finish_ordered_fn(struct btrfs_work *work)
|
||||
@ -312,7 +335,7 @@ static bool can_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
|
||||
struct page *page, u64 file_offset,
|
||||
u64 len, bool uptodate)
|
||||
{
|
||||
struct btrfs_inode *inode = BTRFS_I(ordered->inode);
|
||||
struct btrfs_inode *inode = ordered->inode;
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
|
||||
lockdep_assert_held(&inode->ordered_tree_lock);
|
||||
@ -365,7 +388,7 @@ static bool can_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
|
||||
|
||||
static void btrfs_queue_ordered_fn(struct btrfs_ordered_extent *ordered)
|
||||
{
|
||||
struct btrfs_inode *inode = BTRFS_I(ordered->inode);
|
||||
struct btrfs_inode *inode = ordered->inode;
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
struct btrfs_workqueue *wq = btrfs_is_free_space_inode(inode) ?
|
||||
fs_info->endio_freespace_worker : fs_info->endio_write_workers;
|
||||
@ -374,11 +397,11 @@ static void btrfs_queue_ordered_fn(struct btrfs_ordered_extent *ordered)
|
||||
btrfs_queue_work(wq, &ordered->work);
|
||||
}
|
||||
|
||||
bool btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
|
||||
void btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
|
||||
struct page *page, u64 file_offset, u64 len,
|
||||
bool uptodate)
|
||||
{
|
||||
struct btrfs_inode *inode = BTRFS_I(ordered->inode);
|
||||
struct btrfs_inode *inode = ordered->inode;
|
||||
unsigned long flags;
|
||||
bool ret;
|
||||
|
||||
@ -421,7 +444,6 @@ bool btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
|
||||
|
||||
if (ret)
|
||||
btrfs_queue_ordered_fn(ordered);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -588,14 +610,14 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
|
||||
struct list_head *cur;
|
||||
struct btrfs_ordered_sum *sum;
|
||||
|
||||
trace_btrfs_ordered_extent_put(BTRFS_I(entry->inode), entry);
|
||||
trace_btrfs_ordered_extent_put(entry->inode, entry);
|
||||
|
||||
if (refcount_dec_and_test(&entry->refs)) {
|
||||
ASSERT(list_empty(&entry->root_extent_list));
|
||||
ASSERT(list_empty(&entry->log_list));
|
||||
ASSERT(RB_EMPTY_NODE(&entry->rb_node));
|
||||
if (entry->inode)
|
||||
btrfs_add_delayed_iput(BTRFS_I(entry->inode));
|
||||
btrfs_add_delayed_iput(entry->inode);
|
||||
while (!list_empty(&entry->list)) {
|
||||
cur = entry->list.next;
|
||||
sum = list_entry(cur, struct btrfs_ordered_sum, list);
|
||||
@ -626,7 +648,7 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
|
||||
freespace_inode = btrfs_is_free_space_inode(btrfs_inode);
|
||||
|
||||
btrfs_lockdep_acquire(fs_info, btrfs_trans_pending_ordered);
|
||||
/* This is paired with btrfs_alloc_ordered_extent. */
|
||||
/* This is paired with alloc_ordered_extent(). */
|
||||
spin_lock(&btrfs_inode->lock);
|
||||
btrfs_mod_outstanding_extents(btrfs_inode, -1);
|
||||
spin_unlock(&btrfs_inode->lock);
|
||||
@ -712,11 +734,11 @@ static void btrfs_run_ordered_extent_work(struct btrfs_work *work)
|
||||
}
|
||||
|
||||
/*
|
||||
* wait for all the ordered extents in a root. This is done when balancing
|
||||
* space between drives.
|
||||
* Wait for all the ordered extents in a root. Use @bg as range or do whole
|
||||
* range if it's NULL.
|
||||
*/
|
||||
u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
|
||||
const u64 range_start, const u64 range_len)
|
||||
const struct btrfs_block_group *bg)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
LIST_HEAD(splice);
|
||||
@ -724,7 +746,17 @@ u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
|
||||
LIST_HEAD(works);
|
||||
struct btrfs_ordered_extent *ordered, *next;
|
||||
u64 count = 0;
|
||||
const u64 range_end = range_start + range_len;
|
||||
u64 range_start, range_len;
|
||||
u64 range_end;
|
||||
|
||||
if (bg) {
|
||||
range_start = bg->start;
|
||||
range_len = bg->length;
|
||||
} else {
|
||||
range_start = 0;
|
||||
range_len = U64_MAX;
|
||||
}
|
||||
range_end = range_start + range_len;
|
||||
|
||||
mutex_lock(&root->ordered_extent_mutex);
|
||||
spin_lock(&root->ordered_extent_lock);
|
||||
@ -751,10 +783,10 @@ u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
|
||||
btrfs_queue_work(fs_info->flush_workers, &ordered->flush_work);
|
||||
|
||||
cond_resched();
|
||||
spin_lock(&root->ordered_extent_lock);
|
||||
if (nr != U64_MAX)
|
||||
nr--;
|
||||
count++;
|
||||
spin_lock(&root->ordered_extent_lock);
|
||||
}
|
||||
list_splice_tail(&skipped, &root->ordered_extents);
|
||||
list_splice_tail(&splice, &root->ordered_extents);
|
||||
@ -771,8 +803,12 @@ u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
|
||||
return count;
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for @nr ordered extents that intersect the @bg, or the whole range of
|
||||
* the filesystem if @bg is NULL.
|
||||
*/
|
||||
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
|
||||
const u64 range_start, const u64 range_len)
|
||||
const struct btrfs_block_group *bg)
|
||||
{
|
||||
struct btrfs_root *root;
|
||||
LIST_HEAD(splice);
|
||||
@ -790,14 +826,13 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
|
||||
&fs_info->ordered_roots);
|
||||
spin_unlock(&fs_info->ordered_root_lock);
|
||||
|
||||
done = btrfs_wait_ordered_extents(root, nr,
|
||||
range_start, range_len);
|
||||
done = btrfs_wait_ordered_extents(root, nr, bg);
|
||||
btrfs_put_root(root);
|
||||
|
||||
spin_lock(&fs_info->ordered_root_lock);
|
||||
if (nr != U64_MAX) {
|
||||
if (nr != U64_MAX)
|
||||
nr -= done;
|
||||
}
|
||||
|
||||
spin_lock(&fs_info->ordered_root_lock);
|
||||
}
|
||||
list_splice_tail(&splice, &fs_info->ordered_roots);
|
||||
spin_unlock(&fs_info->ordered_root_lock);
|
||||
@ -814,7 +849,7 @@ void btrfs_start_ordered_extent(struct btrfs_ordered_extent *entry)
|
||||
{
|
||||
u64 start = entry->file_offset;
|
||||
u64 end = start + entry->num_bytes - 1;
|
||||
struct btrfs_inode *inode = BTRFS_I(entry->inode);
|
||||
struct btrfs_inode *inode = entry->inode;
|
||||
bool freespace_inode;
|
||||
|
||||
trace_btrfs_ordered_extent_start(inode, entry);
|
||||
@ -841,7 +876,7 @@ void btrfs_start_ordered_extent(struct btrfs_ordered_extent *entry)
|
||||
/*
|
||||
* Used to wait on ordered extents across a large range of bytes.
|
||||
*/
|
||||
int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
|
||||
int btrfs_wait_ordered_range(struct btrfs_inode *inode, u64 start, u64 len)
|
||||
{
|
||||
int ret = 0;
|
||||
int ret_wb = 0;
|
||||
@ -871,11 +906,11 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
|
||||
* before the ordered extents complete - to avoid failures (-EEXIST)
|
||||
* when adding the new ordered extents to the ordered tree.
|
||||
*/
|
||||
ret_wb = filemap_fdatawait_range(inode->i_mapping, start, orig_end);
|
||||
ret_wb = filemap_fdatawait_range(inode->vfs_inode.i_mapping, start, orig_end);
|
||||
|
||||
end = orig_end;
|
||||
while (1) {
|
||||
ordered = btrfs_lookup_first_ordered_extent(BTRFS_I(inode), end);
|
||||
ordered = btrfs_lookup_first_ordered_extent(inode, end);
|
||||
if (!ordered)
|
||||
break;
|
||||
if (ordered->file_offset > orig_end) {
|
||||
@ -1173,7 +1208,7 @@ bool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end,
|
||||
struct btrfs_ordered_extent *btrfs_split_ordered_extent(
|
||||
struct btrfs_ordered_extent *ordered, u64 len)
|
||||
{
|
||||
struct btrfs_inode *inode = BTRFS_I(ordered->inode);
|
||||
struct btrfs_inode *inode = ordered->inode;
|
||||
struct btrfs_root *root = inode->root;
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
u64 file_offset = ordered->file_offset;
|
||||
@ -1212,15 +1247,32 @@ struct btrfs_ordered_extent *btrfs_split_ordered_extent(
|
||||
/* One ref for the tree. */
|
||||
refcount_inc(&new->refs);
|
||||
|
||||
/*
|
||||
* Take the root's ordered_extent_lock to avoid a race with
|
||||
* btrfs_wait_ordered_extents() when updating the disk_bytenr and
|
||||
* disk_num_bytes fields of the ordered extent below. And we disable
|
||||
* IRQs because the inode's ordered_tree_lock is used in IRQ context
|
||||
* elsewhere.
|
||||
*
|
||||
* There's no concern about a previous caller of
|
||||
* btrfs_wait_ordered_extents() getting the trimmed ordered extent
|
||||
* before we insert the new one, because even if it gets the ordered
|
||||
* extent before it's trimmed and the new one inserted, right before it
|
||||
* uses it or during its use, the ordered extent might have been
|
||||
* trimmed in the meanwhile, and it missed the new ordered extent.
|
||||
* There's no way around this and it's harmless for current use cases,
|
||||
* so we take the root's ordered_extent_lock to fix that race during
|
||||
* trimming and silence tools like KCSAN.
|
||||
*/
|
||||
spin_lock_irq(&root->ordered_extent_lock);
|
||||
spin_lock(&inode->ordered_tree_lock);
|
||||
/* Remove from tree once */
|
||||
node = &ordered->rb_node;
|
||||
rb_erase(node, &inode->ordered_tree);
|
||||
RB_CLEAR_NODE(node);
|
||||
if (inode->ordered_tree_last == node)
|
||||
inode->ordered_tree_last = NULL;
|
||||
|
||||
/*
|
||||
* We don't have overlapping ordered extents (that would imply double
|
||||
* allocation of extents) and we checked above that the split length
|
||||
* does not cross the ordered extent's num_bytes field, so there's
|
||||
* no need to remove it and re-insert it in the tree.
|
||||
*/
|
||||
ordered->file_offset += len;
|
||||
ordered->disk_bytenr += len;
|
||||
ordered->num_bytes -= len;
|
||||
@ -1250,18 +1302,10 @@ struct btrfs_ordered_extent *btrfs_split_ordered_extent(
|
||||
offset += sum->len;
|
||||
}
|
||||
|
||||
/* Re-insert the node */
|
||||
node = tree_insert(&inode->ordered_tree, ordered->file_offset,
|
||||
&ordered->rb_node);
|
||||
if (node)
|
||||
btrfs_panic(fs_info, -EEXIST,
|
||||
"zoned: inconsistency in ordered tree at offset %llu",
|
||||
ordered->file_offset);
|
||||
|
||||
node = tree_insert(&inode->ordered_tree, new->file_offset, &new->rb_node);
|
||||
if (node)
|
||||
if (unlikely(node))
|
||||
btrfs_panic(fs_info, -EEXIST,
|
||||
"zoned: inconsistency in ordered tree at offset %llu",
|
||||
"inconsistency in ordered tree at offset %llu after split",
|
||||
new->file_offset);
|
||||
spin_unlock(&inode->ordered_tree_lock);
|
||||
|
||||
|
@ -130,7 +130,7 @@ struct btrfs_ordered_extent {
|
||||
refcount_t refs;
|
||||
|
||||
/* the inode we belong to */
|
||||
struct inode *inode;
|
||||
struct btrfs_inode *inode;
|
||||
|
||||
/* list of checksums for insertion when the extent io is done */
|
||||
struct list_head list;
|
||||
@ -162,7 +162,7 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
|
||||
void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry);
|
||||
void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
|
||||
struct btrfs_ordered_extent *entry);
|
||||
bool btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
|
||||
void btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
|
||||
struct page *page, u64 file_offset, u64 len,
|
||||
bool uptodate);
|
||||
void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,
|
||||
@ -171,17 +171,28 @@ void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,
|
||||
bool btrfs_dec_test_ordered_pending(struct btrfs_inode *inode,
|
||||
struct btrfs_ordered_extent **cached,
|
||||
u64 file_offset, u64 io_size);
|
||||
|
||||
/*
|
||||
* This represents details about the target file extent item of a write operation.
|
||||
*/
|
||||
struct btrfs_file_extent {
|
||||
u64 disk_bytenr;
|
||||
u64 disk_num_bytes;
|
||||
u64 num_bytes;
|
||||
u64 ram_bytes;
|
||||
u64 offset;
|
||||
u8 compression;
|
||||
};
|
||||
|
||||
struct btrfs_ordered_extent *btrfs_alloc_ordered_extent(
|
||||
struct btrfs_inode *inode, u64 file_offset,
|
||||
u64 num_bytes, u64 ram_bytes, u64 disk_bytenr,
|
||||
u64 disk_num_bytes, u64 offset, unsigned long flags,
|
||||
int compress_type);
|
||||
const struct btrfs_file_extent *file_extent, unsigned long flags);
|
||||
void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
|
||||
struct btrfs_ordered_sum *sum);
|
||||
struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *inode,
|
||||
u64 file_offset);
|
||||
void btrfs_start_ordered_extent(struct btrfs_ordered_extent *entry);
|
||||
int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
|
||||
int btrfs_wait_ordered_range(struct btrfs_inode *inode, u64 start, u64 len);
|
||||
struct btrfs_ordered_extent *
|
||||
btrfs_lookup_first_ordered_extent(struct btrfs_inode *inode, u64 file_offset);
|
||||
struct btrfs_ordered_extent *btrfs_lookup_first_ordered_range(
|
||||
@ -193,9 +204,9 @@ struct btrfs_ordered_extent *btrfs_lookup_ordered_range(
|
||||
void btrfs_get_ordered_extents_for_logging(struct btrfs_inode *inode,
|
||||
struct list_head *list);
|
||||
u64 btrfs_wait_ordered_extents(struct btrfs_root *root, u64 nr,
|
||||
const u64 range_start, const u64 range_len);
|
||||
const struct btrfs_block_group *bg);
|
||||
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
|
||||
const u64 range_start, const u64 range_len);
|
||||
const struct btrfs_block_group *bg);
|
||||
void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start,
|
||||
u64 end,
|
||||
struct extent_state **cached_state);
|
||||
|
@ -109,7 +109,7 @@ static void print_extent_item(const struct extent_buffer *eb, int slot, int type
|
||||
btrfs_err(eb->fs_info,
|
||||
"unexpected extent item size, has %u expect >= %zu",
|
||||
item_size, sizeof(*ei));
|
||||
btrfs_handle_fs_error(eb->fs_info, -EUCLEAN, NULL);
|
||||
return;
|
||||
}
|
||||
|
||||
ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
|
||||
@ -208,11 +208,6 @@ static void print_raid_stripe_key(const struct extent_buffer *eb, u32 item_size,
|
||||
struct btrfs_stripe_extent *stripe)
|
||||
{
|
||||
const int num_stripes = btrfs_num_raid_stripes(item_size);
|
||||
const u8 encoding = btrfs_stripe_extent_encoding(eb, stripe);
|
||||
|
||||
pr_info("\t\t\tencoding: %s\n",
|
||||
(encoding && encoding < BTRFS_NR_RAID_TYPES) ?
|
||||
btrfs_raid_array[encoding].raid_name : "unknown");
|
||||
|
||||
for (int i = 0; i < num_stripes; i++)
|
||||
pr_info("\t\t\tstride %d devid %llu physical %llu\n",
|
||||
@ -310,6 +305,9 @@ void btrfs_print_leaf(const struct extent_buffer *l)
|
||||
case BTRFS_EXTENT_DATA_KEY:
|
||||
fi = btrfs_item_ptr(l, i,
|
||||
struct btrfs_file_extent_item);
|
||||
pr_info("\t\tgeneration %llu type %hhu\n",
|
||||
btrfs_file_extent_generation(l, fi),
|
||||
btrfs_file_extent_type(l, fi));
|
||||
if (btrfs_file_extent_type(l, fi) ==
|
||||
BTRFS_FILE_EXTENT_INLINE) {
|
||||
pr_info("\t\tinline extent data size %llu\n",
|
||||
|
@ -27,7 +27,7 @@ struct prop_handler {
|
||||
int (*validate)(const struct btrfs_inode *inode, const char *value,
|
||||
size_t len);
|
||||
int (*apply)(struct inode *inode, const char *value, size_t len);
|
||||
const char *(*extract)(struct inode *inode);
|
||||
const char *(*extract)(const struct inode *inode);
|
||||
bool (*ignore)(const struct btrfs_inode *inode);
|
||||
int inheritable;
|
||||
};
|
||||
@ -104,7 +104,7 @@ bool btrfs_ignore_prop(const struct btrfs_inode *inode, const char *name)
|
||||
return handler->ignore(inode);
|
||||
}
|
||||
|
||||
int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode,
|
||||
int btrfs_set_prop(struct btrfs_trans_handle *trans, struct btrfs_inode *inode,
|
||||
const char *name, const char *value, size_t value_len,
|
||||
int flags)
|
||||
{
|
||||
@ -116,29 +116,29 @@ int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode,
|
||||
return -EINVAL;
|
||||
|
||||
if (value_len == 0) {
|
||||
ret = btrfs_setxattr(trans, inode, handler->xattr_name,
|
||||
ret = btrfs_setxattr(trans, &inode->vfs_inode, handler->xattr_name,
|
||||
NULL, 0, flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = handler->apply(inode, NULL, 0);
|
||||
ret = handler->apply(&inode->vfs_inode, NULL, 0);
|
||||
ASSERT(ret == 0);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = btrfs_setxattr(trans, inode, handler->xattr_name, value,
|
||||
ret = btrfs_setxattr(trans, &inode->vfs_inode, handler->xattr_name, value,
|
||||
value_len, flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = handler->apply(inode, value, value_len);
|
||||
ret = handler->apply(&inode->vfs_inode, value, value_len);
|
||||
if (ret) {
|
||||
btrfs_setxattr(trans, inode, handler->xattr_name, NULL,
|
||||
btrfs_setxattr(trans, &inode->vfs_inode, handler->xattr_name, NULL,
|
||||
0, flags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
set_bit(BTRFS_INODE_HAS_PROPS, &BTRFS_I(inode)->runtime_flags);
|
||||
set_bit(BTRFS_INODE_HAS_PROPS, &inode->runtime_flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -359,7 +359,7 @@ static bool prop_compression_ignore(const struct btrfs_inode *inode)
|
||||
return false;
|
||||
}
|
||||
|
||||
static const char *prop_compression_extract(struct inode *inode)
|
||||
static const char *prop_compression_extract(const struct inode *inode)
|
||||
{
|
||||
switch (BTRFS_I(inode)->prop_compress) {
|
||||
case BTRFS_COMPRESS_ZLIB:
|
||||
@ -385,7 +385,7 @@ static struct prop_handler prop_handlers[] = {
|
||||
};
|
||||
|
||||
int btrfs_inode_inherit_props(struct btrfs_trans_handle *trans,
|
||||
struct inode *inode, struct inode *parent)
|
||||
struct inode *inode, const struct inode *parent)
|
||||
{
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
|
@ -15,7 +15,7 @@ struct btrfs_trans_handle;
|
||||
|
||||
int __init btrfs_props_init(void);
|
||||
|
||||
int btrfs_set_prop(struct btrfs_trans_handle *trans, struct inode *inode,
|
||||
int btrfs_set_prop(struct btrfs_trans_handle *trans, struct btrfs_inode *inode,
|
||||
const char *name, const char *value, size_t value_len,
|
||||
int flags);
|
||||
int btrfs_validate_prop(const struct btrfs_inode *inode, const char *name,
|
||||
@ -26,6 +26,6 @@ int btrfs_load_inode_props(struct inode *inode, struct btrfs_path *path);
|
||||
|
||||
int btrfs_inode_inherit_props(struct btrfs_trans_handle *trans,
|
||||
struct inode *inode,
|
||||
struct inode *dir);
|
||||
const struct inode *dir);
|
||||
|
||||
#endif
|
||||
|
@ -30,7 +30,7 @@
|
||||
#include "root-tree.h"
|
||||
#include "tree-checker.h"
|
||||
|
||||
enum btrfs_qgroup_mode btrfs_qgroup_mode(struct btrfs_fs_info *fs_info)
|
||||
enum btrfs_qgroup_mode btrfs_qgroup_mode(const struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags))
|
||||
return BTRFS_QGROUP_MODE_DISABLED;
|
||||
@ -39,12 +39,12 @@ enum btrfs_qgroup_mode btrfs_qgroup_mode(struct btrfs_fs_info *fs_info)
|
||||
return BTRFS_QGROUP_MODE_FULL;
|
||||
}
|
||||
|
||||
bool btrfs_qgroup_enabled(struct btrfs_fs_info *fs_info)
|
||||
bool btrfs_qgroup_enabled(const struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
return btrfs_qgroup_mode(fs_info) != BTRFS_QGROUP_MODE_DISABLED;
|
||||
}
|
||||
|
||||
bool btrfs_qgroup_full_accounting(struct btrfs_fs_info *fs_info)
|
||||
bool btrfs_qgroup_full_accounting(const struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
return btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_FULL;
|
||||
}
|
||||
@ -107,7 +107,7 @@ static void qgroup_rsv_release(struct btrfs_fs_info *fs_info,
|
||||
|
||||
static void qgroup_rsv_add_by_qgroup(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_qgroup *dest,
|
||||
struct btrfs_qgroup *src)
|
||||
const struct btrfs_qgroup *src)
|
||||
{
|
||||
int i;
|
||||
|
||||
@ -117,7 +117,7 @@ static void qgroup_rsv_add_by_qgroup(struct btrfs_fs_info *fs_info,
|
||||
|
||||
static void qgroup_rsv_release_by_qgroup(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_qgroup *dest,
|
||||
struct btrfs_qgroup *src)
|
||||
const struct btrfs_qgroup *src)
|
||||
{
|
||||
int i;
|
||||
|
||||
@ -141,37 +141,27 @@ static void btrfs_qgroup_update_new_refcnt(struct btrfs_qgroup *qg, u64 seq,
|
||||
qg->new_refcnt += mod;
|
||||
}
|
||||
|
||||
static inline u64 btrfs_qgroup_get_old_refcnt(struct btrfs_qgroup *qg, u64 seq)
|
||||
static inline u64 btrfs_qgroup_get_old_refcnt(const struct btrfs_qgroup *qg, u64 seq)
|
||||
{
|
||||
if (qg->old_refcnt < seq)
|
||||
return 0;
|
||||
return qg->old_refcnt - seq;
|
||||
}
|
||||
|
||||
static inline u64 btrfs_qgroup_get_new_refcnt(struct btrfs_qgroup *qg, u64 seq)
|
||||
static inline u64 btrfs_qgroup_get_new_refcnt(const struct btrfs_qgroup *qg, u64 seq)
|
||||
{
|
||||
if (qg->new_refcnt < seq)
|
||||
return 0;
|
||||
return qg->new_refcnt - seq;
|
||||
}
|
||||
|
||||
/*
|
||||
* glue structure to represent the relations between qgroups.
|
||||
*/
|
||||
struct btrfs_qgroup_list {
|
||||
struct list_head next_group;
|
||||
struct list_head next_member;
|
||||
struct btrfs_qgroup *group;
|
||||
struct btrfs_qgroup *member;
|
||||
};
|
||||
|
||||
static int
|
||||
qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
|
||||
int init_flags);
|
||||
static void qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info);
|
||||
|
||||
/* must be called with qgroup_ioctl_lock held */
|
||||
static struct btrfs_qgroup *find_qgroup_rb(struct btrfs_fs_info *fs_info,
|
||||
static struct btrfs_qgroup *find_qgroup_rb(const struct btrfs_fs_info *fs_info,
|
||||
u64 qgroupid)
|
||||
{
|
||||
struct rb_node *n = fs_info->qgroup_tree.rb_node;
|
||||
@ -346,7 +336,7 @@ static int del_relation_rb(struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
|
||||
int btrfs_verify_qgroup_counts(const struct btrfs_fs_info *fs_info, u64 qgroupid,
|
||||
u64 rfer, u64 excl)
|
||||
{
|
||||
struct btrfs_qgroup *qgroup;
|
||||
@ -608,7 +598,7 @@ out:
|
||||
* Return false if no reserved space is left.
|
||||
* Return true if some reserved space is leaked.
|
||||
*/
|
||||
bool btrfs_check_quota_leak(struct btrfs_fs_info *fs_info)
|
||||
bool btrfs_check_quota_leak(const struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct rb_node *node;
|
||||
bool ret = false;
|
||||
@ -1334,19 +1324,14 @@ out:
|
||||
*/
|
||||
static int flush_reservations(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct btrfs_trans_handle *trans;
|
||||
int ret;
|
||||
|
||||
ret = btrfs_start_delalloc_roots(fs_info, LONG_MAX, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
|
||||
trans = btrfs_join_transaction(fs_info->tree_root);
|
||||
if (IS_ERR(trans))
|
||||
return PTR_ERR(trans);
|
||||
ret = btrfs_commit_transaction(trans);
|
||||
btrfs_wait_ordered_roots(fs_info, U64_MAX, NULL);
|
||||
|
||||
return ret;
|
||||
return btrfs_commit_current_transaction(fs_info->tree_root);
|
||||
}
|
||||
|
||||
int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
|
||||
@ -1446,9 +1431,11 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
|
||||
btrfs_tree_lock(quota_root->node);
|
||||
btrfs_clear_buffer_dirty(trans, quota_root->node);
|
||||
btrfs_tree_unlock(quota_root->node);
|
||||
btrfs_free_tree_block(trans, btrfs_root_id(quota_root),
|
||||
quota_root->node, 0, 1);
|
||||
ret = btrfs_free_tree_block(trans, btrfs_root_id(quota_root),
|
||||
quota_root->node, 0, 1);
|
||||
|
||||
if (ret < 0)
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
|
||||
out:
|
||||
btrfs_put_root(quota_root);
|
||||
@ -1572,15 +1559,21 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, u64 dst)
|
||||
/*
|
||||
* Add relation between @src and @dst qgroup. The @prealloc is allocated by the
|
||||
* callers and transferred here (either used or freed on error).
|
||||
*/
|
||||
int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, u64 dst,
|
||||
struct btrfs_qgroup_list *prealloc)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
struct btrfs_qgroup *parent;
|
||||
struct btrfs_qgroup *member;
|
||||
struct btrfs_qgroup_list *list;
|
||||
struct btrfs_qgroup_list *prealloc = NULL;
|
||||
int ret = 0;
|
||||
|
||||
ASSERT(prealloc);
|
||||
|
||||
/* Check the level of src and dst first */
|
||||
if (btrfs_qgroup_level(src) >= btrfs_qgroup_level(dst))
|
||||
return -EINVAL;
|
||||
@ -1605,11 +1598,6 @@ int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, u64 dst
|
||||
}
|
||||
}
|
||||
|
||||
prealloc = kzalloc(sizeof(*list), GFP_NOFS);
|
||||
if (!prealloc) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
ret = add_qgroup_relation_item(trans, src, dst);
|
||||
if (ret)
|
||||
goto out;
|
||||
@ -1748,13 +1736,55 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool qgroup_has_usage(struct btrfs_qgroup *qgroup)
|
||||
/*
|
||||
* Return 0 if we can not delete the qgroup (not empty or has children etc).
|
||||
* Return >0 if we can delete the qgroup.
|
||||
* Return <0 for other errors during tree search.
|
||||
*/
|
||||
static int can_delete_qgroup(struct btrfs_fs_info *fs_info, struct btrfs_qgroup *qgroup)
|
||||
{
|
||||
return (qgroup->rfer > 0 || qgroup->rfer_cmpr > 0 ||
|
||||
qgroup->excl > 0 || qgroup->excl_cmpr > 0 ||
|
||||
qgroup->rsv.values[BTRFS_QGROUP_RSV_DATA] > 0 ||
|
||||
qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC] > 0 ||
|
||||
qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS] > 0);
|
||||
struct btrfs_key key;
|
||||
struct btrfs_path *path;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Squota would never be inconsistent, but there can still be case
|
||||
* where a dropped subvolume still has qgroup numbers, and squota
|
||||
* relies on such qgroup for future accounting.
|
||||
*
|
||||
* So for squota, do not allow dropping any non-zero qgroup.
|
||||
*/
|
||||
if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_SIMPLE &&
|
||||
(qgroup->rfer || qgroup->excl || qgroup->excl_cmpr || qgroup->rfer_cmpr))
|
||||
return 0;
|
||||
|
||||
/* For higher level qgroup, we can only delete it if it has no child. */
|
||||
if (btrfs_qgroup_level(qgroup->qgroupid)) {
|
||||
if (!list_empty(&qgroup->members))
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* For level-0 qgroups, we can only delete it if it has no subvolume
|
||||
* for it.
|
||||
* This means even a subvolume is unlinked but not yet fully dropped,
|
||||
* we can not delete the qgroup.
|
||||
*/
|
||||
key.objectid = qgroup->qgroupid;
|
||||
key.type = BTRFS_ROOT_ITEM_KEY;
|
||||
key.offset = -1ULL;
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = btrfs_find_root(fs_info->tree_root, &key, path, NULL, NULL);
|
||||
btrfs_free_path(path);
|
||||
/*
|
||||
* The @ret from btrfs_find_root() exactly matches our definition for
|
||||
* the return value, thus can be returned directly.
|
||||
*/
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
|
||||
@ -1776,7 +1806,10 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (is_fstree(qgroupid) && qgroup_has_usage(qgroup)) {
|
||||
ret = can_delete_qgroup(fs_info, qgroup);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (ret == 0) {
|
||||
ret = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
@ -1801,6 +1834,34 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
|
||||
}
|
||||
|
||||
spin_lock(&fs_info->qgroup_lock);
|
||||
/*
|
||||
* Warn on reserved space. The subvolume should has no child nor
|
||||
* corresponding subvolume.
|
||||
* Thus its reserved space should all be zero, no matter if qgroup
|
||||
* is consistent or the mode.
|
||||
*/
|
||||
WARN_ON(qgroup->rsv.values[BTRFS_QGROUP_RSV_DATA] ||
|
||||
qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PREALLOC] ||
|
||||
qgroup->rsv.values[BTRFS_QGROUP_RSV_META_PERTRANS]);
|
||||
/*
|
||||
* The same for rfer/excl numbers, but that's only if our qgroup is
|
||||
* consistent and if it's in regular qgroup mode.
|
||||
* For simple mode it's not as accurate thus we can hit non-zero values
|
||||
* very frequently.
|
||||
*/
|
||||
if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_FULL &&
|
||||
!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT)) {
|
||||
if (WARN_ON(qgroup->rfer || qgroup->excl ||
|
||||
qgroup->rfer_cmpr || qgroup->excl_cmpr)) {
|
||||
btrfs_warn_rl(fs_info,
|
||||
"to be deleted qgroup %u/%llu has non-zero numbers, rfer %llu rfer_cmpr %llu excl %llu excl_cmpr %llu",
|
||||
btrfs_qgroup_level(qgroup->qgroupid),
|
||||
btrfs_qgroup_subvolid(qgroup->qgroupid),
|
||||
qgroup->rfer, qgroup->rfer_cmpr,
|
||||
qgroup->excl, qgroup->excl_cmpr);
|
||||
qgroup_mark_inconsistent(fs_info);
|
||||
}
|
||||
}
|
||||
del_qgroup_rb(fs_info, qgroupid);
|
||||
spin_unlock(&fs_info->qgroup_lock);
|
||||
|
||||
@ -1816,6 +1877,41 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_qgroup_cleanup_dropped_subvolume(struct btrfs_fs_info *fs_info, u64 subvolid)
|
||||
{
|
||||
struct btrfs_trans_handle *trans;
|
||||
int ret;
|
||||
|
||||
if (!is_fstree(subvolid) || !btrfs_qgroup_enabled(fs_info) || !fs_info->quota_root)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Commit current transaction to make sure all the rfer/excl numbers
|
||||
* get updated.
|
||||
*/
|
||||
trans = btrfs_start_transaction(fs_info->quota_root, 0);
|
||||
if (IS_ERR(trans))
|
||||
return PTR_ERR(trans);
|
||||
|
||||
ret = btrfs_commit_transaction(trans);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
/* Start new trans to delete the qgroup info and limit items. */
|
||||
trans = btrfs_start_transaction(fs_info->quota_root, 2);
|
||||
if (IS_ERR(trans))
|
||||
return PTR_ERR(trans);
|
||||
ret = btrfs_remove_qgroup(trans, subvolid);
|
||||
btrfs_end_transaction(trans);
|
||||
/*
|
||||
* It's squota and the subvolume still has numbers needed for future
|
||||
* accounting, in this case we can not delete it. Just skip it.
|
||||
*/
|
||||
if (ret == -EBUSY)
|
||||
ret = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid,
|
||||
struct btrfs_qgroup_limit *limit)
|
||||
{
|
||||
@ -3222,7 +3318,6 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
|
||||
struct btrfs_qgroup_inherit *inherit)
|
||||
{
|
||||
int ret = 0;
|
||||
int i;
|
||||
u64 *i_qgroups;
|
||||
bool committing = false;
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
@ -3279,7 +3374,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
|
||||
i_qgroups = (u64 *)(inherit + 1);
|
||||
nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
|
||||
2 * inherit->num_excl_copies;
|
||||
for (i = 0; i < nums; ++i) {
|
||||
for (int i = 0; i < nums; i++) {
|
||||
srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
|
||||
|
||||
/*
|
||||
@ -3306,7 +3401,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
|
||||
*/
|
||||
if (inherit) {
|
||||
i_qgroups = (u64 *)(inherit + 1);
|
||||
for (i = 0; i < inherit->num_qgroups; ++i, ++i_qgroups) {
|
||||
for (int i = 0; i < inherit->num_qgroups; i++, i_qgroups++) {
|
||||
if (*i_qgroups == 0)
|
||||
continue;
|
||||
ret = add_qgroup_relation_item(trans, objectid,
|
||||
@ -3392,7 +3487,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
|
||||
goto unlock;
|
||||
|
||||
i_qgroups = (u64 *)(inherit + 1);
|
||||
for (i = 0; i < inherit->num_qgroups; ++i) {
|
||||
for (int i = 0; i < inherit->num_qgroups; i++) {
|
||||
if (*i_qgroups) {
|
||||
ret = add_relation_rb(fs_info, qlist_prealloc[i], objectid,
|
||||
*i_qgroups);
|
||||
@ -3412,7 +3507,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
|
||||
++i_qgroups;
|
||||
}
|
||||
|
||||
for (i = 0; i < inherit->num_ref_copies; ++i, i_qgroups += 2) {
|
||||
for (int i = 0; i < inherit->num_ref_copies; i++, i_qgroups += 2) {
|
||||
struct btrfs_qgroup *src;
|
||||
struct btrfs_qgroup *dst;
|
||||
|
||||
@ -3433,7 +3528,7 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans, u64 srcid,
|
||||
/* Manually tweaking numbers certainly needs a rescan */
|
||||
need_rescan = true;
|
||||
}
|
||||
for (i = 0; i < inherit->num_excl_copies; ++i, i_qgroups += 2) {
|
||||
for (int i = 0; i < inherit->num_excl_copies; i++, i_qgroups += 2) {
|
||||
struct btrfs_qgroup *src;
|
||||
struct btrfs_qgroup *dst;
|
||||
|
||||
@ -3918,7 +4013,6 @@ int
|
||||
btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
int ret = 0;
|
||||
struct btrfs_trans_handle *trans;
|
||||
|
||||
ret = qgroup_rescan_init(fs_info, 0, 1);
|
||||
if (ret)
|
||||
@ -3935,16 +4029,10 @@ btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
|
||||
* going to clear all tracking information for a clean start.
|
||||
*/
|
||||
|
||||
trans = btrfs_attach_transaction_barrier(fs_info->fs_root);
|
||||
if (IS_ERR(trans) && trans != ERR_PTR(-ENOENT)) {
|
||||
ret = btrfs_commit_current_transaction(fs_info->fs_root);
|
||||
if (ret) {
|
||||
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
|
||||
return PTR_ERR(trans);
|
||||
} else if (trans != ERR_PTR(-ENOENT)) {
|
||||
ret = btrfs_commit_transaction(trans);
|
||||
if (ret) {
|
||||
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
|
||||
return ret;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
qgroup_rescan_zero_tracking(fs_info);
|
||||
@ -4080,7 +4168,6 @@ static int qgroup_unreserve_range(struct btrfs_inode *inode,
|
||||
*/
|
||||
static int try_flush_qgroup(struct btrfs_root *root)
|
||||
{
|
||||
struct btrfs_trans_handle *trans;
|
||||
int ret;
|
||||
|
||||
/* Can't hold an open transaction or we run the risk of deadlocking. */
|
||||
@ -4101,17 +4188,9 @@ static int try_flush_qgroup(struct btrfs_root *root)
|
||||
ret = btrfs_start_delalloc_snapshot(root, true);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
|
||||
btrfs_wait_ordered_extents(root, U64_MAX, NULL);
|
||||
|
||||
trans = btrfs_attach_transaction_barrier(root);
|
||||
if (IS_ERR(trans)) {
|
||||
ret = PTR_ERR(trans);
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_commit_transaction(trans);
|
||||
ret = btrfs_commit_current_transaction(root);
|
||||
out:
|
||||
clear_bit(BTRFS_ROOT_QGROUP_FLUSHING, &root->state);
|
||||
wake_up(&root->qgroup_flush_wait);
|
||||
@ -4817,7 +4896,7 @@ void btrfs_free_squota_rsv(struct btrfs_fs_info *fs_info, u64 root, u64 rsv_byte
|
||||
}
|
||||
|
||||
int btrfs_record_squota_delta(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_squota_delta *delta)
|
||||
const struct btrfs_squota_delta *delta)
|
||||
{
|
||||
int ret;
|
||||
struct btrfs_qgroup *qgroup;
|
||||
|
@ -123,7 +123,6 @@ struct btrfs_inode;
|
||||
|
||||
/*
|
||||
* Record a dirty extent, and info qgroup to update quota on it
|
||||
* TODO: Use kmem cache to alloc it.
|
||||
*/
|
||||
struct btrfs_qgroup_extent_record {
|
||||
struct rb_node node;
|
||||
@ -279,6 +278,14 @@ struct btrfs_qgroup {
|
||||
struct kobject kobj;
|
||||
};
|
||||
|
||||
/* Glue structure to represent the relations between qgroups. */
|
||||
struct btrfs_qgroup_list {
|
||||
struct list_head next_group;
|
||||
struct list_head next_member;
|
||||
struct btrfs_qgroup *group;
|
||||
struct btrfs_qgroup *member;
|
||||
};
|
||||
|
||||
struct btrfs_squota_delta {
|
||||
/* The fstree root this delta counts against. */
|
||||
u64 root;
|
||||
@ -312,9 +319,9 @@ enum btrfs_qgroup_mode {
|
||||
BTRFS_QGROUP_MODE_SIMPLE
|
||||
};
|
||||
|
||||
enum btrfs_qgroup_mode btrfs_qgroup_mode(struct btrfs_fs_info *fs_info);
|
||||
bool btrfs_qgroup_enabled(struct btrfs_fs_info *fs_info);
|
||||
bool btrfs_qgroup_full_accounting(struct btrfs_fs_info *fs_info);
|
||||
enum btrfs_qgroup_mode btrfs_qgroup_mode(const struct btrfs_fs_info *fs_info);
|
||||
bool btrfs_qgroup_enabled(const struct btrfs_fs_info *fs_info);
|
||||
bool btrfs_qgroup_full_accounting(const struct btrfs_fs_info *fs_info);
|
||||
int btrfs_quota_enable(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_ioctl_quota_ctl_args *quota_ctl_args);
|
||||
int btrfs_quota_disable(struct btrfs_fs_info *fs_info);
|
||||
@ -322,11 +329,13 @@ int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
|
||||
bool interruptible);
|
||||
int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, u64 dst);
|
||||
int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans, u64 src, u64 dst,
|
||||
struct btrfs_qgroup_list *prealloc);
|
||||
int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans, u64 src,
|
||||
u64 dst);
|
||||
int btrfs_create_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid);
|
||||
int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid);
|
||||
int btrfs_qgroup_cleanup_dropped_subvolume(struct btrfs_fs_info *fs_info, u64 subvolid);
|
||||
int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid,
|
||||
struct btrfs_qgroup_limit *limit);
|
||||
int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info);
|
||||
@ -361,7 +370,7 @@ void btrfs_qgroup_free_refroot(struct btrfs_fs_info *fs_info,
|
||||
enum btrfs_qgroup_rsv_type type);
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
int btrfs_verify_qgroup_counts(struct btrfs_fs_info *fs_info, u64 qgroupid,
|
||||
int btrfs_verify_qgroup_counts(const struct btrfs_fs_info *fs_info, u64 qgroupid,
|
||||
u64 rfer, u64 excl);
|
||||
#endif
|
||||
|
||||
@ -431,9 +440,9 @@ int btrfs_qgroup_add_swapped_blocks(struct btrfs_trans_handle *trans,
|
||||
int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root, struct extent_buffer *eb);
|
||||
void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans);
|
||||
bool btrfs_check_quota_leak(struct btrfs_fs_info *fs_info);
|
||||
bool btrfs_check_quota_leak(const struct btrfs_fs_info *fs_info);
|
||||
void btrfs_free_squota_rsv(struct btrfs_fs_info *fs_info, u64 root, u64 rsv_bytes);
|
||||
int btrfs_record_squota_delta(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_squota_delta *delta);
|
||||
const struct btrfs_squota_delta *delta);
|
||||
|
||||
#endif
|
||||
|
@ -80,7 +80,6 @@ static int btrfs_insert_one_raid_extent(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_key stripe_key;
|
||||
struct btrfs_root *stripe_root = fs_info->stripe_root;
|
||||
const int num_stripes = btrfs_bg_type_to_factor(bioc->map_type);
|
||||
u8 encoding = btrfs_bg_flags_to_raid_index(bioc->map_type);
|
||||
struct btrfs_stripe_extent *stripe_extent;
|
||||
const size_t item_size = struct_size(stripe_extent, strides, num_stripes);
|
||||
int ret;
|
||||
@ -94,7 +93,6 @@ static int btrfs_insert_one_raid_extent(struct btrfs_trans_handle *trans,
|
||||
|
||||
trace_btrfs_insert_one_raid_extent(fs_info, bioc->logical, bioc->size,
|
||||
num_stripes);
|
||||
btrfs_set_stack_stripe_extent_encoding(stripe_extent, encoding);
|
||||
for (int i = 0; i < num_stripes; i++) {
|
||||
u64 devid = bioc->stripes[i].dev->devid;
|
||||
u64 physical = bioc->stripes[i].physical;
|
||||
@ -159,7 +157,6 @@ int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
|
||||
struct extent_buffer *leaf;
|
||||
const u64 end = logical + *length;
|
||||
int num_stripes;
|
||||
u8 encoding;
|
||||
u64 offset;
|
||||
u64 found_logical;
|
||||
u64 found_length;
|
||||
@ -222,16 +219,6 @@ int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info,
|
||||
|
||||
num_stripes = btrfs_num_raid_stripes(btrfs_item_size(leaf, slot));
|
||||
stripe_extent = btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent);
|
||||
encoding = btrfs_stripe_extent_encoding(leaf, stripe_extent);
|
||||
|
||||
if (encoding != btrfs_bg_flags_to_raid_index(map_type)) {
|
||||
ret = -EUCLEAN;
|
||||
btrfs_handle_fs_error(fs_info, ret,
|
||||
"on-disk stripe encoding %d doesn't match RAID index %d",
|
||||
encoding,
|
||||
btrfs_bg_flags_to_raid_index(map_type));
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (int i = 0; i < num_stripes; i++) {
|
||||
struct btrfs_raid_stride *stride = &stripe_extent->strides[i];
|
||||
|
@ -48,8 +48,7 @@ static inline bool btrfs_need_stripe_tree_update(struct btrfs_fs_info *fs_info,
|
||||
|
||||
static inline int btrfs_num_raid_stripes(u32 item_size)
|
||||
{
|
||||
return (item_size - offsetof(struct btrfs_stripe_extent, strides)) /
|
||||
sizeof(struct btrfs_raid_stride);
|
||||
return item_size / sizeof(struct btrfs_raid_stride);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -40,6 +40,85 @@
|
||||
|
||||
#define BTRFS_STRIPE_HASH_TABLE_BITS 11
|
||||
|
||||
static void dump_bioc(const struct btrfs_fs_info *fs_info, const struct btrfs_io_context *bioc)
|
||||
{
|
||||
if (unlikely(!bioc)) {
|
||||
btrfs_crit(fs_info, "bioc=NULL");
|
||||
return;
|
||||
}
|
||||
btrfs_crit(fs_info,
|
||||
"bioc logical=%llu full_stripe=%llu size=%llu map_type=0x%llx mirror=%u replace_nr_stripes=%u replace_stripe_src=%d num_stripes=%u",
|
||||
bioc->logical, bioc->full_stripe_logical, bioc->size,
|
||||
bioc->map_type, bioc->mirror_num, bioc->replace_nr_stripes,
|
||||
bioc->replace_stripe_src, bioc->num_stripes);
|
||||
for (int i = 0; i < bioc->num_stripes; i++) {
|
||||
btrfs_crit(fs_info, " nr=%d devid=%llu physical=%llu",
|
||||
i, bioc->stripes[i].dev->devid,
|
||||
bioc->stripes[i].physical);
|
||||
}
|
||||
}
|
||||
|
||||
static void btrfs_dump_rbio(const struct btrfs_fs_info *fs_info,
|
||||
const struct btrfs_raid_bio *rbio)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_BTRFS_ASSERT))
|
||||
return;
|
||||
|
||||
dump_bioc(fs_info, rbio->bioc);
|
||||
btrfs_crit(fs_info,
|
||||
"rbio flags=0x%lx nr_sectors=%u nr_data=%u real_stripes=%u stripe_nsectors=%u scrubp=%u dbitmap=0x%lx",
|
||||
rbio->flags, rbio->nr_sectors, rbio->nr_data,
|
||||
rbio->real_stripes, rbio->stripe_nsectors,
|
||||
rbio->scrubp, rbio->dbitmap);
|
||||
}
|
||||
|
||||
#define ASSERT_RBIO(expr, rbio) \
|
||||
({ \
|
||||
if (IS_ENABLED(CONFIG_BTRFS_ASSERT) && unlikely(!(expr))) { \
|
||||
const struct btrfs_fs_info *__fs_info = (rbio)->bioc ? \
|
||||
(rbio)->bioc->fs_info : NULL; \
|
||||
\
|
||||
btrfs_dump_rbio(__fs_info, (rbio)); \
|
||||
} \
|
||||
ASSERT((expr)); \
|
||||
})
|
||||
|
||||
#define ASSERT_RBIO_STRIPE(expr, rbio, stripe_nr) \
|
||||
({ \
|
||||
if (IS_ENABLED(CONFIG_BTRFS_ASSERT) && unlikely(!(expr))) { \
|
||||
const struct btrfs_fs_info *__fs_info = (rbio)->bioc ? \
|
||||
(rbio)->bioc->fs_info : NULL; \
|
||||
\
|
||||
btrfs_dump_rbio(__fs_info, (rbio)); \
|
||||
btrfs_crit(__fs_info, "stripe_nr=%d", (stripe_nr)); \
|
||||
} \
|
||||
ASSERT((expr)); \
|
||||
})
|
||||
|
||||
#define ASSERT_RBIO_SECTOR(expr, rbio, sector_nr) \
|
||||
({ \
|
||||
if (IS_ENABLED(CONFIG_BTRFS_ASSERT) && unlikely(!(expr))) { \
|
||||
const struct btrfs_fs_info *__fs_info = (rbio)->bioc ? \
|
||||
(rbio)->bioc->fs_info : NULL; \
|
||||
\
|
||||
btrfs_dump_rbio(__fs_info, (rbio)); \
|
||||
btrfs_crit(__fs_info, "sector_nr=%d", (sector_nr)); \
|
||||
} \
|
||||
ASSERT((expr)); \
|
||||
})
|
||||
|
||||
#define ASSERT_RBIO_LOGICAL(expr, rbio, logical) \
|
||||
({ \
|
||||
if (IS_ENABLED(CONFIG_BTRFS_ASSERT) && unlikely(!(expr))) { \
|
||||
const struct btrfs_fs_info *__fs_info = (rbio)->bioc ? \
|
||||
(rbio)->bioc->fs_info : NULL; \
|
||||
\
|
||||
btrfs_dump_rbio(__fs_info, (rbio)); \
|
||||
btrfs_crit(__fs_info, "logical=%llu", (logical)); \
|
||||
} \
|
||||
ASSERT((expr)); \
|
||||
})
|
||||
|
||||
/* Used by the raid56 code to lock stripes for read/modify/write */
|
||||
struct btrfs_stripe_hash {
|
||||
struct list_head hash_list;
|
||||
@ -592,8 +671,8 @@ static unsigned int rbio_stripe_sector_index(const struct btrfs_raid_bio *rbio,
|
||||
unsigned int stripe_nr,
|
||||
unsigned int sector_nr)
|
||||
{
|
||||
ASSERT(stripe_nr < rbio->real_stripes);
|
||||
ASSERT(sector_nr < rbio->stripe_nsectors);
|
||||
ASSERT_RBIO_STRIPE(stripe_nr < rbio->real_stripes, rbio, stripe_nr);
|
||||
ASSERT_RBIO_SECTOR(sector_nr < rbio->stripe_nsectors, rbio, sector_nr);
|
||||
|
||||
return stripe_nr * rbio->stripe_nsectors + sector_nr;
|
||||
}
|
||||
@ -873,8 +952,10 @@ static struct sector_ptr *sector_in_rbio(struct btrfs_raid_bio *rbio,
|
||||
struct sector_ptr *sector;
|
||||
int index;
|
||||
|
||||
ASSERT(stripe_nr >= 0 && stripe_nr < rbio->real_stripes);
|
||||
ASSERT(sector_nr >= 0 && sector_nr < rbio->stripe_nsectors);
|
||||
ASSERT_RBIO_STRIPE(stripe_nr >= 0 && stripe_nr < rbio->real_stripes,
|
||||
rbio, stripe_nr);
|
||||
ASSERT_RBIO_SECTOR(sector_nr >= 0 && sector_nr < rbio->stripe_nsectors,
|
||||
rbio, sector_nr);
|
||||
|
||||
index = stripe_nr * rbio->stripe_nsectors + sector_nr;
|
||||
ASSERT(index >= 0 && index < rbio->nr_sectors);
|
||||
@ -970,7 +1051,7 @@ static int alloc_rbio_pages(struct btrfs_raid_bio *rbio)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = btrfs_alloc_page_array(rbio->nr_pages, rbio->stripe_pages, 0);
|
||||
ret = btrfs_alloc_page_array(rbio->nr_pages, rbio->stripe_pages, false);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
/* Mapping all sectors */
|
||||
@ -985,7 +1066,7 @@ static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)
|
||||
int ret;
|
||||
|
||||
ret = btrfs_alloc_page_array(rbio->nr_pages - data_pages,
|
||||
rbio->stripe_pages + data_pages, 0);
|
||||
rbio->stripe_pages + data_pages, false);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
@ -1057,8 +1138,10 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
|
||||
* thus it can be larger than rbio->real_stripe.
|
||||
* So here we check against bioc->num_stripes, not rbio->real_stripes.
|
||||
*/
|
||||
ASSERT(stripe_nr >= 0 && stripe_nr < rbio->bioc->num_stripes);
|
||||
ASSERT(sector_nr >= 0 && sector_nr < rbio->stripe_nsectors);
|
||||
ASSERT_RBIO_STRIPE(stripe_nr >= 0 && stripe_nr < rbio->bioc->num_stripes,
|
||||
rbio, stripe_nr);
|
||||
ASSERT_RBIO_SECTOR(sector_nr >= 0 && sector_nr < rbio->stripe_nsectors,
|
||||
rbio, sector_nr);
|
||||
ASSERT(sector->page);
|
||||
|
||||
stripe = &rbio->bioc->stripes[stripe_nr];
|
||||
@ -1197,14 +1280,14 @@ static void assert_rbio(struct btrfs_raid_bio *rbio)
|
||||
* At least two stripes (2 disks RAID5), and since real_stripes is U8,
|
||||
* we won't go beyond 256 disks anyway.
|
||||
*/
|
||||
ASSERT(rbio->real_stripes >= 2);
|
||||
ASSERT(rbio->nr_data > 0);
|
||||
ASSERT_RBIO(rbio->real_stripes >= 2, rbio);
|
||||
ASSERT_RBIO(rbio->nr_data > 0, rbio);
|
||||
|
||||
/*
|
||||
* This is another check to make sure nr data stripes is smaller
|
||||
* than total stripes.
|
||||
*/
|
||||
ASSERT(rbio->nr_data < rbio->real_stripes);
|
||||
ASSERT_RBIO(rbio->nr_data < rbio->real_stripes, rbio);
|
||||
}
|
||||
|
||||
/* Generate PQ for one vertical stripe. */
|
||||
@ -1557,7 +1640,7 @@ static int alloc_rbio_data_pages(struct btrfs_raid_bio *rbio)
|
||||
const int data_pages = rbio->nr_data * rbio->stripe_npages;
|
||||
int ret;
|
||||
|
||||
ret = btrfs_alloc_page_array(data_pages, rbio->stripe_pages, 0);
|
||||
ret = btrfs_alloc_page_array(data_pages, rbio->stripe_pages, false);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
@ -1641,9 +1724,10 @@ static void rbio_add_bio(struct btrfs_raid_bio *rbio, struct bio *orig_bio)
|
||||
const u32 sectorsize = fs_info->sectorsize;
|
||||
u64 cur_logical;
|
||||
|
||||
ASSERT(orig_logical >= full_stripe_start &&
|
||||
orig_logical + orig_len <= full_stripe_start +
|
||||
rbio->nr_data * BTRFS_STRIPE_LEN);
|
||||
ASSERT_RBIO_LOGICAL(orig_logical >= full_stripe_start &&
|
||||
orig_logical + orig_len <= full_stripe_start +
|
||||
rbio->nr_data * BTRFS_STRIPE_LEN,
|
||||
rbio, orig_logical);
|
||||
|
||||
bio_list_add(&rbio->bio_list, orig_bio);
|
||||
rbio->bio_list_bytes += orig_bio->bi_iter.bi_size;
|
||||
@ -2389,7 +2473,7 @@ struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
|
||||
break;
|
||||
}
|
||||
}
|
||||
ASSERT(i < rbio->real_stripes);
|
||||
ASSERT_RBIO_STRIPE(i < rbio->real_stripes, rbio, i);
|
||||
|
||||
bitmap_copy(&rbio->dbitmap, dbitmap, stripe_nsectors);
|
||||
return rbio;
|
||||
@ -2555,7 +2639,7 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
|
||||
* Replace is running and our parity stripe needs to be duplicated to
|
||||
* the target device. Check we have a valid source stripe number.
|
||||
*/
|
||||
ASSERT(rbio->bioc->replace_stripe_src >= 0);
|
||||
ASSERT_RBIO(rbio->bioc->replace_stripe_src >= 0, rbio);
|
||||
for_each_set_bit(sectornr, pbitmap, rbio->stripe_nsectors) {
|
||||
struct sector_ptr *sector;
|
||||
|
||||
|
@ -733,7 +733,7 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
|
||||
* we found the previous extent covering eof and before we
|
||||
* attempted to increment its reference count).
|
||||
*/
|
||||
ret = btrfs_wait_ordered_range(inode, wb_start,
|
||||
ret = btrfs_wait_ordered_range(BTRFS_I(inode), wb_start,
|
||||
destoff - wb_start);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -755,7 +755,7 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
|
||||
* range, so wait for writeback to complete before truncating pages
|
||||
* from the page cache. This is a rare case.
|
||||
*/
|
||||
wb_ret = btrfs_wait_ordered_range(inode, destoff, len);
|
||||
wb_ret = btrfs_wait_ordered_range(BTRFS_I(inode), destoff, len);
|
||||
ret = ret ? ret : wb_ret;
|
||||
/*
|
||||
* Truncate page cache pages so that future reads will see the cloned
|
||||
@ -835,11 +835,11 @@ static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in,
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ret = btrfs_wait_ordered_range(inode_in, ALIGN_DOWN(pos_in, bs),
|
||||
ret = btrfs_wait_ordered_range(BTRFS_I(inode_in), ALIGN_DOWN(pos_in, bs),
|
||||
wb_len);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ret = btrfs_wait_ordered_range(inode_out, ALIGN_DOWN(pos_out, bs),
|
||||
ret = btrfs_wait_ordered_range(BTRFS_I(inode_out), ALIGN_DOWN(pos_out, bs),
|
||||
wb_len);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
@ -817,7 +817,7 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
|
||||
goto abort;
|
||||
}
|
||||
set_bit(BTRFS_ROOT_SHAREABLE, &reloc_root->state);
|
||||
reloc_root->last_trans = trans->transid;
|
||||
btrfs_set_root_last_trans(reloc_root, trans->transid);
|
||||
return reloc_root;
|
||||
fail:
|
||||
kfree(root_item);
|
||||
@ -864,7 +864,7 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
|
||||
*/
|
||||
if (root->reloc_root) {
|
||||
reloc_root = root->reloc_root;
|
||||
reloc_root->last_trans = trans->transid;
|
||||
btrfs_set_root_last_trans(reloc_root, trans->transid);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -962,7 +962,7 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
|
||||
bytenr -= BTRFS_I(reloc_inode)->index_cnt;
|
||||
bytenr -= BTRFS_I(reloc_inode)->reloc_block_group_start;
|
||||
ret = btrfs_lookup_file_extent(NULL, root, path,
|
||||
btrfs_ino(BTRFS_I(reloc_inode)), bytenr, 0);
|
||||
if (ret < 0)
|
||||
@ -1739,7 +1739,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
|
||||
* btrfs_update_reloc_root() and update our root item
|
||||
* appropriately.
|
||||
*/
|
||||
reloc_root->last_trans = trans->transid;
|
||||
btrfs_set_root_last_trans(reloc_root, trans->transid);
|
||||
trans->block_rsv = rc->block_rsv;
|
||||
|
||||
replaced = 0;
|
||||
@ -2082,7 +2082,7 @@ static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root;
|
||||
int ret;
|
||||
|
||||
if (reloc_root->last_trans == trans->transid)
|
||||
if (btrfs_get_root_last_trans(reloc_root) == trans->transid)
|
||||
return 0;
|
||||
|
||||
root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset, false);
|
||||
@ -2790,14 +2790,14 @@ out_free_blocks:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static noinline_for_stack int prealloc_file_extent_cluster(
|
||||
struct btrfs_inode *inode,
|
||||
const struct file_extent_cluster *cluster)
|
||||
static noinline_for_stack int prealloc_file_extent_cluster(struct reloc_control *rc)
|
||||
{
|
||||
const struct file_extent_cluster *cluster = &rc->cluster;
|
||||
struct btrfs_inode *inode = BTRFS_I(rc->data_inode);
|
||||
u64 alloc_hint = 0;
|
||||
u64 start;
|
||||
u64 end;
|
||||
u64 offset = inode->index_cnt;
|
||||
u64 offset = inode->reloc_block_group_start;
|
||||
u64 num_bytes;
|
||||
int nr;
|
||||
int ret = 0;
|
||||
@ -2899,11 +2899,14 @@ static noinline_for_stack int prealloc_file_extent_cluster(
|
||||
return ret;
|
||||
}
|
||||
|
||||
static noinline_for_stack int setup_relocation_extent_mapping(struct inode *inode,
|
||||
u64 start, u64 end, u64 block_start)
|
||||
static noinline_for_stack int setup_relocation_extent_mapping(struct reloc_control *rc)
|
||||
{
|
||||
struct btrfs_inode *inode = BTRFS_I(rc->data_inode);
|
||||
struct extent_map *em;
|
||||
struct extent_state *cached_state = NULL;
|
||||
u64 offset = inode->reloc_block_group_start;
|
||||
u64 start = rc->cluster.start - offset;
|
||||
u64 end = rc->cluster.end - offset;
|
||||
int ret = 0;
|
||||
|
||||
em = alloc_extent_map();
|
||||
@ -2912,13 +2915,14 @@ static noinline_for_stack int setup_relocation_extent_mapping(struct inode *inod
|
||||
|
||||
em->start = start;
|
||||
em->len = end + 1 - start;
|
||||
em->block_len = em->len;
|
||||
em->block_start = block_start;
|
||||
em->disk_bytenr = rc->cluster.start;
|
||||
em->disk_num_bytes = em->len;
|
||||
em->ram_bytes = em->len;
|
||||
em->flags |= EXTENT_FLAG_PINNED;
|
||||
|
||||
lock_extent(&BTRFS_I(inode)->io_tree, start, end, &cached_state);
|
||||
ret = btrfs_replace_extent_map_range(BTRFS_I(inode), em, false);
|
||||
unlock_extent(&BTRFS_I(inode)->io_tree, start, end, &cached_state);
|
||||
lock_extent(&inode->io_tree, start, end, &cached_state);
|
||||
ret = btrfs_replace_extent_map_range(inode, em, false);
|
||||
unlock_extent(&inode->io_tree, start, end, &cached_state);
|
||||
free_extent_map(em);
|
||||
|
||||
return ret;
|
||||
@ -2946,12 +2950,14 @@ static u64 get_cluster_boundary_end(const struct file_extent_cluster *cluster,
|
||||
return cluster->boundary[cluster_nr + 1] - 1;
|
||||
}
|
||||
|
||||
static int relocate_one_folio(struct inode *inode, struct file_ra_state *ra,
|
||||
const struct file_extent_cluster *cluster,
|
||||
static int relocate_one_folio(struct reloc_control *rc,
|
||||
struct file_ra_state *ra,
|
||||
int *cluster_nr, unsigned long index)
|
||||
{
|
||||
const struct file_extent_cluster *cluster = &rc->cluster;
|
||||
struct inode *inode = rc->data_inode;
|
||||
struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
|
||||
u64 offset = BTRFS_I(inode)->index_cnt;
|
||||
u64 offset = BTRFS_I(inode)->reloc_block_group_start;
|
||||
const unsigned long last_index = (cluster->end - offset) >> PAGE_SHIFT;
|
||||
gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
|
||||
struct folio *folio;
|
||||
@ -3083,10 +3089,11 @@ release_folio:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int relocate_file_extent_cluster(struct inode *inode,
|
||||
const struct file_extent_cluster *cluster)
|
||||
static int relocate_file_extent_cluster(struct reloc_control *rc)
|
||||
{
|
||||
u64 offset = BTRFS_I(inode)->index_cnt;
|
||||
struct inode *inode = rc->data_inode;
|
||||
const struct file_extent_cluster *cluster = &rc->cluster;
|
||||
u64 offset = BTRFS_I(inode)->reloc_block_group_start;
|
||||
unsigned long index;
|
||||
unsigned long last_index;
|
||||
struct file_ra_state *ra;
|
||||
@ -3100,21 +3107,20 @@ static int relocate_file_extent_cluster(struct inode *inode,
|
||||
if (!ra)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = prealloc_file_extent_cluster(BTRFS_I(inode), cluster);
|
||||
ret = prealloc_file_extent_cluster(rc);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
file_ra_state_init(ra, inode->i_mapping);
|
||||
|
||||
ret = setup_relocation_extent_mapping(inode, cluster->start - offset,
|
||||
cluster->end - offset, cluster->start);
|
||||
ret = setup_relocation_extent_mapping(rc);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
last_index = (cluster->end - offset) >> PAGE_SHIFT;
|
||||
for (index = (cluster->start - offset) >> PAGE_SHIFT;
|
||||
index <= last_index && !ret; index++)
|
||||
ret = relocate_one_folio(inode, ra, cluster, &cluster_nr, index);
|
||||
ret = relocate_one_folio(rc, ra, &cluster_nr, index);
|
||||
if (ret == 0)
|
||||
WARN_ON(cluster_nr != cluster->nr);
|
||||
out:
|
||||
@ -3122,15 +3128,16 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static noinline_for_stack int relocate_data_extent(struct inode *inode,
|
||||
const struct btrfs_key *extent_key,
|
||||
struct file_extent_cluster *cluster)
|
||||
static noinline_for_stack int relocate_data_extent(struct reloc_control *rc,
|
||||
const struct btrfs_key *extent_key)
|
||||
{
|
||||
struct inode *inode = rc->data_inode;
|
||||
struct file_extent_cluster *cluster = &rc->cluster;
|
||||
int ret;
|
||||
struct btrfs_root *root = BTRFS_I(inode)->root;
|
||||
|
||||
if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) {
|
||||
ret = relocate_file_extent_cluster(inode, cluster);
|
||||
ret = relocate_file_extent_cluster(rc);
|
||||
if (ret)
|
||||
return ret;
|
||||
cluster->nr = 0;
|
||||
@ -3156,7 +3163,7 @@ static noinline_for_stack int relocate_data_extent(struct inode *inode,
|
||||
* the cluster we need to relocate.
|
||||
*/
|
||||
root->relocation_src_root = cluster->owning_root;
|
||||
ret = relocate_file_extent_cluster(inode, cluster);
|
||||
ret = relocate_file_extent_cluster(rc);
|
||||
if (ret)
|
||||
return ret;
|
||||
cluster->nr = 0;
|
||||
@ -3175,7 +3182,7 @@ static noinline_for_stack int relocate_data_extent(struct inode *inode,
|
||||
cluster->nr++;
|
||||
|
||||
if (cluster->nr >= MAX_EXTENTS) {
|
||||
ret = relocate_file_extent_cluster(inode, cluster);
|
||||
ret = relocate_file_extent_cluster(rc);
|
||||
if (ret)
|
||||
return ret;
|
||||
cluster->nr = 0;
|
||||
@ -3369,7 +3376,7 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
|
||||
if (inode)
|
||||
goto truncate;
|
||||
|
||||
inode = btrfs_iget(fs_info->sb, ino, root);
|
||||
inode = btrfs_iget(ino, root);
|
||||
if (IS_ERR(inode))
|
||||
return -ENOENT;
|
||||
|
||||
@ -3744,8 +3751,7 @@ restart:
|
||||
if (rc->stage == MOVE_DATA_EXTENTS &&
|
||||
(flags & BTRFS_EXTENT_FLAG_DATA)) {
|
||||
rc->found_file_extent = true;
|
||||
ret = relocate_data_extent(rc->data_inode,
|
||||
&key, &rc->cluster);
|
||||
ret = relocate_data_extent(rc, &key);
|
||||
if (ret < 0) {
|
||||
err = ret;
|
||||
break;
|
||||
@ -3774,8 +3780,7 @@ restart:
|
||||
}
|
||||
|
||||
if (!err) {
|
||||
ret = relocate_file_extent_cluster(rc->data_inode,
|
||||
&rc->cluster);
|
||||
ret = relocate_file_extent_cluster(rc);
|
||||
if (ret < 0)
|
||||
err = ret;
|
||||
}
|
||||
@ -3908,14 +3913,14 @@ static noinline_for_stack struct inode *create_reloc_inode(
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
inode = btrfs_iget(fs_info->sb, objectid, root);
|
||||
inode = btrfs_iget(objectid, root);
|
||||
if (IS_ERR(inode)) {
|
||||
delete_orphan_inode(trans, root, objectid);
|
||||
ret = PTR_ERR(inode);
|
||||
inode = NULL;
|
||||
goto out;
|
||||
}
|
||||
BTRFS_I(inode)->index_cnt = group->start;
|
||||
BTRFS_I(inode)->reloc_block_group_start = group->start;
|
||||
|
||||
ret = btrfs_orphan_add(trans, BTRFS_I(inode));
|
||||
out:
|
||||
@ -4002,15 +4007,13 @@ static void free_reloc_control(struct reloc_control *rc)
|
||||
/*
|
||||
* Print the block group being relocated
|
||||
*/
|
||||
static void describe_relocation(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_block_group *block_group)
|
||||
static void describe_relocation(struct btrfs_block_group *block_group)
|
||||
{
|
||||
char buf[128] = {'\0'};
|
||||
|
||||
btrfs_describe_block_groups(block_group->flags, buf, sizeof(buf));
|
||||
|
||||
btrfs_info(fs_info,
|
||||
"relocating block group %llu flags %s",
|
||||
btrfs_info(block_group->fs_info, "relocating block group %llu flags %s",
|
||||
block_group->start, buf);
|
||||
}
|
||||
|
||||
@ -4118,13 +4121,11 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
|
||||
goto out;
|
||||
}
|
||||
|
||||
describe_relocation(fs_info, rc->block_group);
|
||||
describe_relocation(rc->block_group);
|
||||
|
||||
btrfs_wait_block_group_reservations(rc->block_group);
|
||||
btrfs_wait_nocow_writers(rc->block_group);
|
||||
btrfs_wait_ordered_roots(fs_info, U64_MAX,
|
||||
rc->block_group->start,
|
||||
rc->block_group->length);
|
||||
btrfs_wait_ordered_roots(fs_info, U64_MAX, rc->block_group);
|
||||
|
||||
ret = btrfs_zone_finish(rc->block_group);
|
||||
WARN_ON(ret && ret != -EAGAIN);
|
||||
@ -4149,7 +4150,7 @@ int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start)
|
||||
* out of the loop if we hit an error.
|
||||
*/
|
||||
if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
|
||||
ret = btrfs_wait_ordered_range(rc->data_inode, 0,
|
||||
ret = btrfs_wait_ordered_range(BTRFS_I(rc->data_inode), 0,
|
||||
(u64)-1);
|
||||
if (ret)
|
||||
err = ret;
|
||||
@ -4221,8 +4222,8 @@ int btrfs_recover_relocation(struct btrfs_fs_info *fs_info)
|
||||
struct extent_buffer *leaf;
|
||||
struct reloc_control *rc = NULL;
|
||||
struct btrfs_trans_handle *trans;
|
||||
int ret;
|
||||
int err = 0;
|
||||
int ret2;
|
||||
int ret = 0;
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
@ -4236,15 +4237,14 @@ int btrfs_recover_relocation(struct btrfs_fs_info *fs_info)
|
||||
while (1) {
|
||||
ret = btrfs_search_slot(NULL, fs_info->tree_root, &key,
|
||||
path, 0, 0);
|
||||
if (ret < 0) {
|
||||
err = ret;
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
if (ret > 0) {
|
||||
if (path->slots[0] == 0)
|
||||
break;
|
||||
path->slots[0]--;
|
||||
}
|
||||
ret = 0;
|
||||
leaf = path->nodes[0];
|
||||
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
|
||||
btrfs_release_path(path);
|
||||
@ -4255,7 +4255,7 @@ int btrfs_recover_relocation(struct btrfs_fs_info *fs_info)
|
||||
|
||||
reloc_root = btrfs_read_tree_root(fs_info->tree_root, &key);
|
||||
if (IS_ERR(reloc_root)) {
|
||||
err = PTR_ERR(reloc_root);
|
||||
ret = PTR_ERR(reloc_root);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -4267,15 +4267,12 @@ int btrfs_recover_relocation(struct btrfs_fs_info *fs_info)
|
||||
reloc_root->root_key.offset, false);
|
||||
if (IS_ERR(fs_root)) {
|
||||
ret = PTR_ERR(fs_root);
|
||||
if (ret != -ENOENT) {
|
||||
err = ret;
|
||||
if (ret != -ENOENT)
|
||||
goto out;
|
||||
}
|
||||
ret = mark_garbage_root(reloc_root);
|
||||
if (ret < 0) {
|
||||
err = ret;
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
ret = 0;
|
||||
} else {
|
||||
btrfs_put_root(fs_root);
|
||||
}
|
||||
@ -4293,15 +4290,13 @@ int btrfs_recover_relocation(struct btrfs_fs_info *fs_info)
|
||||
|
||||
rc = alloc_reloc_control(fs_info);
|
||||
if (!rc) {
|
||||
err = -ENOMEM;
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = reloc_chunk_start(fs_info);
|
||||
if (ret < 0) {
|
||||
err = ret;
|
||||
if (ret < 0)
|
||||
goto out_end;
|
||||
}
|
||||
|
||||
rc->extent_root = btrfs_extent_root(fs_info, 0);
|
||||
|
||||
@ -4309,7 +4304,7 @@ int btrfs_recover_relocation(struct btrfs_fs_info *fs_info)
|
||||
|
||||
trans = btrfs_join_transaction(rc->extent_root);
|
||||
if (IS_ERR(trans)) {
|
||||
err = PTR_ERR(trans);
|
||||
ret = PTR_ERR(trans);
|
||||
goto out_unset;
|
||||
}
|
||||
|
||||
@ -4329,15 +4324,15 @@ int btrfs_recover_relocation(struct btrfs_fs_info *fs_info)
|
||||
fs_root = btrfs_get_fs_root(fs_info, reloc_root->root_key.offset,
|
||||
false);
|
||||
if (IS_ERR(fs_root)) {
|
||||
err = PTR_ERR(fs_root);
|
||||
ret = PTR_ERR(fs_root);
|
||||
list_add_tail(&reloc_root->root_list, &reloc_roots);
|
||||
btrfs_end_transaction(trans);
|
||||
goto out_unset;
|
||||
}
|
||||
|
||||
err = __add_reloc_root(reloc_root);
|
||||
ASSERT(err != -EEXIST);
|
||||
if (err) {
|
||||
ret = __add_reloc_root(reloc_root);
|
||||
ASSERT(ret != -EEXIST);
|
||||
if (ret) {
|
||||
list_add_tail(&reloc_root->root_list, &reloc_roots);
|
||||
btrfs_put_root(fs_root);
|
||||
btrfs_end_transaction(trans);
|
||||
@ -4347,8 +4342,8 @@ int btrfs_recover_relocation(struct btrfs_fs_info *fs_info)
|
||||
btrfs_put_root(fs_root);
|
||||
}
|
||||
|
||||
err = btrfs_commit_transaction(trans);
|
||||
if (err)
|
||||
ret = btrfs_commit_transaction(trans);
|
||||
if (ret)
|
||||
goto out_unset;
|
||||
|
||||
merge_reloc_roots(rc);
|
||||
@ -4357,14 +4352,14 @@ int btrfs_recover_relocation(struct btrfs_fs_info *fs_info)
|
||||
|
||||
trans = btrfs_join_transaction(rc->extent_root);
|
||||
if (IS_ERR(trans)) {
|
||||
err = PTR_ERR(trans);
|
||||
ret = PTR_ERR(trans);
|
||||
goto out_clean;
|
||||
}
|
||||
err = btrfs_commit_transaction(trans);
|
||||
ret = btrfs_commit_transaction(trans);
|
||||
out_clean:
|
||||
ret = clean_dirty_subvols(rc);
|
||||
if (ret < 0 && !err)
|
||||
err = ret;
|
||||
ret2 = clean_dirty_subvols(rc);
|
||||
if (ret2 < 0 && !ret)
|
||||
ret = ret2;
|
||||
out_unset:
|
||||
unset_reloc_control(rc);
|
||||
out_end:
|
||||
@ -4375,14 +4370,14 @@ out:
|
||||
|
||||
btrfs_free_path(path);
|
||||
|
||||
if (err == 0) {
|
||||
if (ret == 0) {
|
||||
/* cleanup orphan inode in data relocation tree */
|
||||
fs_root = btrfs_grab_root(fs_info->data_reloc_root);
|
||||
ASSERT(fs_root);
|
||||
err = btrfs_orphan_cleanup(fs_root);
|
||||
ret = btrfs_orphan_cleanup(fs_root);
|
||||
btrfs_put_root(fs_root);
|
||||
}
|
||||
return err;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -4393,9 +4388,9 @@ out:
|
||||
*/
|
||||
int btrfs_reloc_clone_csums(struct btrfs_ordered_extent *ordered)
|
||||
{
|
||||
struct btrfs_inode *inode = BTRFS_I(ordered->inode);
|
||||
struct btrfs_inode *inode = ordered->inode;
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
u64 disk_bytenr = ordered->file_offset + inode->index_cnt;
|
||||
u64 disk_bytenr = ordered->file_offset + inode->reloc_block_group_start;
|
||||
struct btrfs_root *csum_root = btrfs_csum_root(fs_info, disk_bytenr);
|
||||
LIST_HEAD(list);
|
||||
int ret;
|
||||
|
@ -261,7 +261,7 @@ static int init_scrub_stripe(struct btrfs_fs_info *fs_info,
|
||||
atomic_set(&stripe->pending_io, 0);
|
||||
spin_lock_init(&stripe->write_error_lock);
|
||||
|
||||
ret = btrfs_alloc_page_array(SCRUB_STRIPE_PAGES, stripe->pages, 0);
|
||||
ret = btrfs_alloc_page_array(SCRUB_STRIPE_PAGES, stripe->pages, false);
|
||||
if (ret < 0)
|
||||
goto error;
|
||||
|
||||
@ -2441,19 +2441,15 @@ static int finish_extent_writes_for_zoned(struct btrfs_root *root,
|
||||
struct btrfs_block_group *cache)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = cache->fs_info;
|
||||
struct btrfs_trans_handle *trans;
|
||||
|
||||
if (!btrfs_is_zoned(fs_info))
|
||||
return 0;
|
||||
|
||||
btrfs_wait_block_group_reservations(cache);
|
||||
btrfs_wait_nocow_writers(cache);
|
||||
btrfs_wait_ordered_roots(fs_info, U64_MAX, cache->start, cache->length);
|
||||
btrfs_wait_ordered_roots(fs_info, U64_MAX, cache);
|
||||
|
||||
trans = btrfs_join_transaction(root);
|
||||
if (IS_ERR(trans))
|
||||
return PTR_ERR(trans);
|
||||
return btrfs_commit_transaction(trans);
|
||||
return btrfs_commit_current_transaction(root);
|
||||
}
|
||||
|
||||
static noinline_for_stack
|
||||
@ -2684,8 +2680,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
|
||||
*/
|
||||
if (sctx->is_dev_replace) {
|
||||
btrfs_wait_nocow_writers(cache);
|
||||
btrfs_wait_ordered_roots(fs_info, U64_MAX, cache->start,
|
||||
cache->length);
|
||||
btrfs_wait_ordered_roots(fs_info, U64_MAX, cache);
|
||||
}
|
||||
|
||||
scrub_pause_off(fs_info);
|
||||
|
@ -5188,11 +5188,10 @@ out:
|
||||
static int process_verity(struct send_ctx *sctx)
|
||||
{
|
||||
int ret = 0;
|
||||
struct btrfs_fs_info *fs_info = sctx->send_root->fs_info;
|
||||
struct inode *inode;
|
||||
struct fs_path *p;
|
||||
|
||||
inode = btrfs_iget(fs_info->sb, sctx->cur_ino, sctx->send_root);
|
||||
inode = btrfs_iget(sctx->cur_ino, sctx->send_root);
|
||||
if (IS_ERR(inode))
|
||||
return PTR_ERR(inode);
|
||||
|
||||
@ -5550,7 +5549,7 @@ static int send_encoded_inline_extent(struct send_ctx *sctx,
|
||||
size_t inline_size;
|
||||
int ret;
|
||||
|
||||
inode = btrfs_iget(fs_info->sb, sctx->cur_ino, root);
|
||||
inode = btrfs_iget(sctx->cur_ino, root);
|
||||
if (IS_ERR(inode))
|
||||
return PTR_ERR(inode);
|
||||
|
||||
@ -5617,7 +5616,7 @@ static int send_encoded_extent(struct send_ctx *sctx, struct btrfs_path *path,
|
||||
u32 crc;
|
||||
int ret;
|
||||
|
||||
inode = btrfs_iget(fs_info->sb, sctx->cur_ino, root);
|
||||
inode = btrfs_iget(sctx->cur_ino, root);
|
||||
if (IS_ERR(inode))
|
||||
return PTR_ERR(inode);
|
||||
|
||||
@ -5746,7 +5745,7 @@ static int send_extent_data(struct send_ctx *sctx, struct btrfs_path *path,
|
||||
if (sctx->cur_inode == NULL) {
|
||||
struct btrfs_root *root = sctx->send_root;
|
||||
|
||||
sctx->cur_inode = btrfs_iget(root->fs_info->sb, sctx->cur_ino, root);
|
||||
sctx->cur_inode = btrfs_iget(sctx->cur_ino, root);
|
||||
if (IS_ERR(sctx->cur_inode)) {
|
||||
int err = PTR_ERR(sctx->cur_inode);
|
||||
|
||||
@ -7998,34 +7997,18 @@ out:
|
||||
*/
|
||||
static int ensure_commit_roots_uptodate(struct send_ctx *sctx)
|
||||
{
|
||||
int i;
|
||||
struct btrfs_trans_handle *trans = NULL;
|
||||
struct btrfs_root *root = sctx->parent_root;
|
||||
|
||||
again:
|
||||
if (sctx->parent_root &&
|
||||
sctx->parent_root->node != sctx->parent_root->commit_root)
|
||||
goto commit_trans;
|
||||
if (root && root->node != root->commit_root)
|
||||
return btrfs_commit_current_transaction(root);
|
||||
|
||||
for (i = 0; i < sctx->clone_roots_cnt; i++)
|
||||
if (sctx->clone_roots[i].root->node !=
|
||||
sctx->clone_roots[i].root->commit_root)
|
||||
goto commit_trans;
|
||||
|
||||
if (trans)
|
||||
return btrfs_end_transaction(trans);
|
||||
|
||||
return 0;
|
||||
|
||||
commit_trans:
|
||||
/* Use any root, all fs roots will get their commit roots updated. */
|
||||
if (!trans) {
|
||||
trans = btrfs_join_transaction(sctx->send_root);
|
||||
if (IS_ERR(trans))
|
||||
return PTR_ERR(trans);
|
||||
goto again;
|
||||
for (int i = 0; i < sctx->clone_roots_cnt; i++) {
|
||||
root = sctx->clone_roots[i].root;
|
||||
if (root->node != root->commit_root)
|
||||
return btrfs_commit_current_transaction(root);
|
||||
}
|
||||
|
||||
return btrfs_commit_transaction(trans);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -8046,7 +8029,7 @@ static int flush_delalloc_roots(struct send_ctx *sctx)
|
||||
ret = btrfs_start_delalloc_snapshot(root, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
|
||||
btrfs_wait_ordered_extents(root, U64_MAX, NULL);
|
||||
}
|
||||
|
||||
for (i = 0; i < sctx->clone_roots_cnt; i++) {
|
||||
@ -8054,7 +8037,7 @@ static int flush_delalloc_roots(struct send_ctx *sctx)
|
||||
ret = btrfs_start_delalloc_snapshot(root, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
|
||||
btrfs_wait_ordered_extents(root, U64_MAX, NULL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -8082,10 +8065,10 @@ static void dedupe_in_progress_warn(const struct btrfs_root *root)
|
||||
btrfs_root_id(root), root->dedupe_in_progress);
|
||||
}
|
||||
|
||||
long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg)
|
||||
long btrfs_ioctl_send(struct btrfs_inode *inode, const struct btrfs_ioctl_send_args *arg)
|
||||
{
|
||||
int ret = 0;
|
||||
struct btrfs_root *send_root = BTRFS_I(inode)->root;
|
||||
struct btrfs_root *send_root = inode->root;
|
||||
struct btrfs_fs_info *fs_info = send_root->fs_info;
|
||||
struct btrfs_root *clone_root;
|
||||
struct send_ctx *sctx = NULL;
|
||||
|
@ -11,7 +11,7 @@
|
||||
#include <linux/sizes.h>
|
||||
#include <linux/align.h>
|
||||
|
||||
struct inode;
|
||||
struct btrfs_inode;
|
||||
struct btrfs_ioctl_send_args;
|
||||
|
||||
#define BTRFS_SEND_STREAM_MAGIC "btrfs-stream"
|
||||
@ -182,6 +182,6 @@ enum {
|
||||
__BTRFS_SEND_A_MAX = 35,
|
||||
};
|
||||
|
||||
long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg);
|
||||
long btrfs_ioctl_send(struct btrfs_inode *inode, const struct btrfs_ioctl_send_args *arg);
|
||||
|
||||
#endif
|
||||
|
@ -1,5 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "linux/spinlock.h"
|
||||
#include <linux/minmax.h>
|
||||
#include "misc.h"
|
||||
#include "ctree.h"
|
||||
#include "space-info.h"
|
||||
@ -190,6 +192,8 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
|
||||
*/
|
||||
#define BTRFS_DEFAULT_ZONED_RECLAIM_THRESH (75)
|
||||
|
||||
#define BTRFS_UNALLOC_BLOCK_GROUP_TARGET (10ULL)
|
||||
|
||||
/*
|
||||
* Calculate chunk size depending on volume type (regular or zoned).
|
||||
*/
|
||||
@ -232,6 +236,7 @@ static int create_space_info(struct btrfs_fs_info *info, u64 flags)
|
||||
if (!space_info)
|
||||
return -ENOMEM;
|
||||
|
||||
space_info->fs_info = info;
|
||||
for (i = 0; i < BTRFS_NR_RAID_TYPES; i++)
|
||||
INIT_LIST_HEAD(&space_info->block_groups[i]);
|
||||
init_rwsem(&space_info->groups_sem);
|
||||
@ -340,11 +345,32 @@ struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static u64 calc_effective_data_chunk_size(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct btrfs_space_info *data_sinfo;
|
||||
u64 data_chunk_size;
|
||||
|
||||
/*
|
||||
* Calculate the data_chunk_size, space_info->chunk_size is the
|
||||
* "optimal" chunk size based on the fs size. However when we actually
|
||||
* allocate the chunk we will strip this down further, making it no
|
||||
* more than 10% of the disk or 1G, whichever is smaller.
|
||||
*
|
||||
* On the zoned mode, we need to use zone_size (= data_sinfo->chunk_size)
|
||||
* as it is.
|
||||
*/
|
||||
data_sinfo = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
|
||||
if (btrfs_is_zoned(fs_info))
|
||||
return data_sinfo->chunk_size;
|
||||
data_chunk_size = min(data_sinfo->chunk_size,
|
||||
mult_perc(fs_info->fs_devices->total_rw_bytes, 10));
|
||||
return min_t(u64, data_chunk_size, SZ_1G);
|
||||
}
|
||||
|
||||
static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_space_info *space_info,
|
||||
enum btrfs_reserve_flush_enum flush)
|
||||
{
|
||||
struct btrfs_space_info *data_sinfo;
|
||||
u64 profile;
|
||||
u64 avail;
|
||||
u64 data_chunk_size;
|
||||
@ -368,23 +394,7 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
|
||||
if (avail == 0)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Calculate the data_chunk_size, space_info->chunk_size is the
|
||||
* "optimal" chunk size based on the fs size. However when we actually
|
||||
* allocate the chunk we will strip this down further, making it no more
|
||||
* than 10% of the disk or 1G, whichever is smaller.
|
||||
*
|
||||
* On the zoned mode, we need to use zone_size (=
|
||||
* data_sinfo->chunk_size) as it is.
|
||||
*/
|
||||
data_sinfo = btrfs_find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA);
|
||||
if (!btrfs_is_zoned(fs_info)) {
|
||||
data_chunk_size = min(data_sinfo->chunk_size,
|
||||
mult_perc(fs_info->fs_devices->total_rw_bytes, 10));
|
||||
data_chunk_size = min_t(u64, data_chunk_size, SZ_1G);
|
||||
} else {
|
||||
data_chunk_size = data_sinfo->chunk_size;
|
||||
}
|
||||
data_chunk_size = calc_effective_data_chunk_size(fs_info);
|
||||
|
||||
/*
|
||||
* Since data allocations immediately use block groups as part of the
|
||||
@ -605,8 +615,6 @@ static inline u64 calc_reclaim_items_nr(const struct btrfs_fs_info *fs_info,
|
||||
return nr;
|
||||
}
|
||||
|
||||
#define EXTENT_SIZE_PER_ITEM SZ_256K
|
||||
|
||||
/*
|
||||
* shrink metadata reservation for delalloc
|
||||
*/
|
||||
@ -706,7 +714,7 @@ static void shrink_delalloc(struct btrfs_fs_info *fs_info,
|
||||
skip_async:
|
||||
loops++;
|
||||
if (wait_ordered && !trans) {
|
||||
btrfs_wait_ordered_roots(fs_info, items, 0, (u64)-1);
|
||||
btrfs_wait_ordered_roots(fs_info, items, NULL);
|
||||
} else {
|
||||
time_left = schedule_timeout_killable(1);
|
||||
if (time_left)
|
||||
@ -825,14 +833,7 @@ static void flush_space(struct btrfs_fs_info *fs_info,
|
||||
* because that does not wait for a transaction to fully commit
|
||||
* (only for it to be unblocked, state TRANS_STATE_UNBLOCKED).
|
||||
*/
|
||||
trans = btrfs_attach_transaction_barrier(root);
|
||||
if (IS_ERR(trans)) {
|
||||
ret = PTR_ERR(trans);
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
ret = btrfs_commit_transaction(trans);
|
||||
ret = btrfs_commit_current_transaction(root);
|
||||
break;
|
||||
default:
|
||||
ret = -ENOSPC;
|
||||
@ -1886,3 +1887,209 @@ u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo)
|
||||
|
||||
return free_bytes;
|
||||
}
|
||||
|
||||
static u64 calc_pct_ratio(u64 x, u64 y)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (!y)
|
||||
return 0;
|
||||
again:
|
||||
err = check_mul_overflow(100, x, &x);
|
||||
if (err)
|
||||
goto lose_precision;
|
||||
return div64_u64(x, y);
|
||||
lose_precision:
|
||||
x >>= 10;
|
||||
y >>= 10;
|
||||
if (!y)
|
||||
y = 1;
|
||||
goto again;
|
||||
}
|
||||
|
||||
/*
|
||||
* A reasonable buffer for unallocated space is 10 data block_groups.
|
||||
* If we claw this back repeatedly, we can still achieve efficient
|
||||
* utilization when near full, and not do too much reclaim while
|
||||
* always maintaining a solid buffer for workloads that quickly
|
||||
* allocate and pressure the unallocated space.
|
||||
*/
|
||||
static u64 calc_unalloc_target(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
u64 chunk_sz = calc_effective_data_chunk_size(fs_info);
|
||||
|
||||
return BTRFS_UNALLOC_BLOCK_GROUP_TARGET * chunk_sz;
|
||||
}
|
||||
|
||||
/*
|
||||
* The fundamental goal of automatic reclaim is to protect the filesystem's
|
||||
* unallocated space and thus minimize the probability of the filesystem going
|
||||
* read only when a metadata allocation failure causes a transaction abort.
|
||||
*
|
||||
* However, relocations happen into the space_info's unused space, therefore
|
||||
* automatic reclaim must also back off as that space runs low. There is no
|
||||
* value in doing trivial "relocations" of re-writing the same block group
|
||||
* into a fresh one.
|
||||
*
|
||||
* Furthermore, we want to avoid doing too much reclaim even if there are good
|
||||
* candidates. This is because the allocator is pretty good at filling up the
|
||||
* holes with writes. So we want to do just enough reclaim to try and stay
|
||||
* safe from running out of unallocated space but not be wasteful about it.
|
||||
*
|
||||
* Therefore, the dynamic reclaim threshold is calculated as follows:
|
||||
* - calculate a target unallocated amount of 5 block group sized chunks
|
||||
* - ratchet up the intensity of reclaim depending on how far we are from
|
||||
* that target by using a formula of unalloc / target to set the threshold.
|
||||
*
|
||||
* Typically with 10 block groups as the target, the discrete values this comes
|
||||
* out to are 0, 10, 20, ... , 80, 90, and 99.
|
||||
*/
|
||||
static int calc_dynamic_reclaim_threshold(struct btrfs_space_info *space_info)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = space_info->fs_info;
|
||||
u64 unalloc = atomic64_read(&fs_info->free_chunk_space);
|
||||
u64 target = calc_unalloc_target(fs_info);
|
||||
u64 alloc = space_info->total_bytes;
|
||||
u64 used = btrfs_space_info_used(space_info, false);
|
||||
u64 unused = alloc - used;
|
||||
u64 want = target > unalloc ? target - unalloc : 0;
|
||||
u64 data_chunk_size = calc_effective_data_chunk_size(fs_info);
|
||||
|
||||
/* If we have no unused space, don't bother, it won't work anyway. */
|
||||
if (unused < data_chunk_size)
|
||||
return 0;
|
||||
|
||||
/* Cast to int is OK because want <= target. */
|
||||
return calc_pct_ratio(want, target);
|
||||
}
|
||||
|
||||
int btrfs_calc_reclaim_threshold(struct btrfs_space_info *space_info)
|
||||
{
|
||||
lockdep_assert_held(&space_info->lock);
|
||||
|
||||
if (READ_ONCE(space_info->dynamic_reclaim))
|
||||
return calc_dynamic_reclaim_threshold(space_info);
|
||||
return READ_ONCE(space_info->bg_reclaim_threshold);
|
||||
}
|
||||
|
||||
/*
|
||||
* Under "urgent" reclaim, we will reclaim even fresh block groups that have
|
||||
* recently seen successful allocations, as we are desperate to reclaim
|
||||
* whatever we can to avoid ENOSPC in a transaction leading to a readonly fs.
|
||||
*/
|
||||
static bool is_reclaim_urgent(struct btrfs_space_info *space_info)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = space_info->fs_info;
|
||||
u64 unalloc = atomic64_read(&fs_info->free_chunk_space);
|
||||
u64 data_chunk_size = calc_effective_data_chunk_size(fs_info);
|
||||
|
||||
return unalloc < data_chunk_size;
|
||||
}
|
||||
|
||||
static int do_reclaim_sweep(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_space_info *space_info, int raid)
|
||||
{
|
||||
struct btrfs_block_group *bg;
|
||||
int thresh_pct;
|
||||
bool try_again = true;
|
||||
bool urgent;
|
||||
|
||||
spin_lock(&space_info->lock);
|
||||
urgent = is_reclaim_urgent(space_info);
|
||||
thresh_pct = btrfs_calc_reclaim_threshold(space_info);
|
||||
spin_unlock(&space_info->lock);
|
||||
|
||||
down_read(&space_info->groups_sem);
|
||||
again:
|
||||
list_for_each_entry(bg, &space_info->block_groups[raid], list) {
|
||||
u64 thresh;
|
||||
bool reclaim = false;
|
||||
|
||||
btrfs_get_block_group(bg);
|
||||
spin_lock(&bg->lock);
|
||||
thresh = mult_perc(bg->length, thresh_pct);
|
||||
if (bg->used < thresh && bg->reclaim_mark) {
|
||||
try_again = false;
|
||||
reclaim = true;
|
||||
}
|
||||
bg->reclaim_mark++;
|
||||
spin_unlock(&bg->lock);
|
||||
if (reclaim)
|
||||
btrfs_mark_bg_to_reclaim(bg);
|
||||
btrfs_put_block_group(bg);
|
||||
}
|
||||
|
||||
/*
|
||||
* In situations where we are very motivated to reclaim (low unalloc)
|
||||
* use two passes to make the reclaim mark check best effort.
|
||||
*
|
||||
* If we have any staler groups, we don't touch the fresher ones, but if we
|
||||
* really need a block group, do take a fresh one.
|
||||
*/
|
||||
if (try_again && urgent) {
|
||||
try_again = false;
|
||||
goto again;
|
||||
}
|
||||
|
||||
up_read(&space_info->groups_sem);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void btrfs_space_info_update_reclaimable(struct btrfs_space_info *space_info, s64 bytes)
|
||||
{
|
||||
u64 chunk_sz = calc_effective_data_chunk_size(space_info->fs_info);
|
||||
|
||||
lockdep_assert_held(&space_info->lock);
|
||||
space_info->reclaimable_bytes += bytes;
|
||||
|
||||
if (space_info->reclaimable_bytes >= chunk_sz)
|
||||
btrfs_set_periodic_reclaim_ready(space_info, true);
|
||||
}
|
||||
|
||||
void btrfs_set_periodic_reclaim_ready(struct btrfs_space_info *space_info, bool ready)
|
||||
{
|
||||
lockdep_assert_held(&space_info->lock);
|
||||
if (!READ_ONCE(space_info->periodic_reclaim))
|
||||
return;
|
||||
if (ready != space_info->periodic_reclaim_ready) {
|
||||
space_info->periodic_reclaim_ready = ready;
|
||||
if (!ready)
|
||||
space_info->reclaimable_bytes = 0;
|
||||
}
|
||||
}
|
||||
|
||||
bool btrfs_should_periodic_reclaim(struct btrfs_space_info *space_info)
|
||||
{
|
||||
bool ret;
|
||||
|
||||
if (space_info->flags & BTRFS_BLOCK_GROUP_SYSTEM)
|
||||
return false;
|
||||
if (!READ_ONCE(space_info->periodic_reclaim))
|
||||
return false;
|
||||
|
||||
spin_lock(&space_info->lock);
|
||||
ret = space_info->periodic_reclaim_ready;
|
||||
btrfs_set_periodic_reclaim_ready(space_info, false);
|
||||
spin_unlock(&space_info->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_reclaim_sweep(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
int ret;
|
||||
int raid;
|
||||
struct btrfs_space_info *space_info;
|
||||
|
||||
list_for_each_entry(space_info, &fs_info->space_info, list) {
|
||||
if (!btrfs_should_periodic_reclaim(space_info))
|
||||
continue;
|
||||
for (raid = 0; raid < BTRFS_NR_RAID_TYPES; raid++) {
|
||||
ret = do_reclaim_sweep(fs_info, space_info, raid);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -94,6 +94,7 @@ enum btrfs_flush_state {
|
||||
};
|
||||
|
||||
struct btrfs_space_info {
|
||||
struct btrfs_fs_info *fs_info;
|
||||
spinlock_t lock;
|
||||
|
||||
u64 total_bytes; /* total bytes in the space,
|
||||
@ -165,6 +166,47 @@ struct btrfs_space_info {
|
||||
|
||||
struct kobject kobj;
|
||||
struct kobject *block_group_kobjs[BTRFS_NR_RAID_TYPES];
|
||||
|
||||
/*
|
||||
* Monotonically increasing counter of block group reclaim attempts
|
||||
* Exposed in /sys/fs/<uuid>/allocation/<type>/reclaim_count
|
||||
*/
|
||||
u64 reclaim_count;
|
||||
|
||||
/*
|
||||
* Monotonically increasing counter of reclaimed bytes
|
||||
* Exposed in /sys/fs/<uuid>/allocation/<type>/reclaim_bytes
|
||||
*/
|
||||
u64 reclaim_bytes;
|
||||
|
||||
/*
|
||||
* Monotonically increasing counter of reclaim errors
|
||||
* Exposed in /sys/fs/<uuid>/allocation/<type>/reclaim_errors
|
||||
*/
|
||||
u64 reclaim_errors;
|
||||
|
||||
/*
|
||||
* If true, use the dynamic relocation threshold, instead of the
|
||||
* fixed bg_reclaim_threshold.
|
||||
*/
|
||||
bool dynamic_reclaim;
|
||||
|
||||
/*
|
||||
* Periodically check all block groups against the reclaim
|
||||
* threshold in the cleaner thread.
|
||||
*/
|
||||
bool periodic_reclaim;
|
||||
|
||||
/*
|
||||
* Periodic reclaim should be a no-op if a space_info hasn't
|
||||
* freed any space since the last time we tried.
|
||||
*/
|
||||
bool periodic_reclaim_ready;
|
||||
|
||||
/*
|
||||
* Net bytes freed or allocated since the last reclaim pass.
|
||||
*/
|
||||
s64 reclaimable_bytes;
|
||||
};
|
||||
|
||||
struct reserve_ticket {
|
||||
@ -247,4 +289,10 @@ void btrfs_dump_space_info_for_trans_abort(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_init_async_reclaim_work(struct btrfs_fs_info *fs_info);
|
||||
u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
|
||||
|
||||
void btrfs_space_info_update_reclaimable(struct btrfs_space_info *space_info, s64 bytes);
|
||||
void btrfs_set_periodic_reclaim_ready(struct btrfs_space_info *space_info, bool ready);
|
||||
bool btrfs_should_periodic_reclaim(struct btrfs_space_info *space_info);
|
||||
int btrfs_calc_reclaim_threshold(struct btrfs_space_info *space_info);
|
||||
int btrfs_reclaim_sweep(struct btrfs_fs_info *fs_info);
|
||||
|
||||
#endif /* BTRFS_SPACE_INFO_H */
|
||||
|
@ -74,7 +74,7 @@ bool btrfs_is_subpage(const struct btrfs_fs_info *fs_info, struct address_space
|
||||
* mapping. And if page->mapping->host is data inode, it's subpage.
|
||||
* As we have ruled our sectorsize >= PAGE_SIZE case already.
|
||||
*/
|
||||
if (!mapping || !mapping->host || is_data_inode(mapping->host))
|
||||
if (!mapping || !mapping->host || is_data_inode(BTRFS_I(mapping->host)))
|
||||
return true;
|
||||
|
||||
/*
|
||||
@ -242,12 +242,12 @@ static void btrfs_subpage_assert(const struct btrfs_fs_info *fs_info,
|
||||
|
||||
#define subpage_calc_start_bit(fs_info, folio, name, start, len) \
|
||||
({ \
|
||||
unsigned int start_bit; \
|
||||
unsigned int __start_bit; \
|
||||
\
|
||||
btrfs_subpage_assert(fs_info, folio, start, len); \
|
||||
start_bit = offset_in_page(start) >> fs_info->sectorsize_bits; \
|
||||
start_bit += fs_info->subpage_info->name##_offset; \
|
||||
start_bit; \
|
||||
__start_bit = offset_in_page(start) >> fs_info->sectorsize_bits; \
|
||||
__start_bit += fs_info->subpage_info->name##_offset; \
|
||||
__start_bit; \
|
||||
})
|
||||
|
||||
void btrfs_subpage_start_reader(const struct btrfs_fs_info *fs_info,
|
||||
@ -283,7 +283,7 @@ void btrfs_subpage_end_reader(const struct btrfs_fs_info *fs_info,
|
||||
bool last;
|
||||
|
||||
btrfs_subpage_assert(fs_info, folio, start, len);
|
||||
is_data = is_data_inode(folio->mapping->host);
|
||||
is_data = is_data_inode(BTRFS_I(folio->mapping->host));
|
||||
|
||||
spin_lock_irqsave(&subpage->lock, flags);
|
||||
|
||||
@ -703,19 +703,29 @@ IMPLEMENT_BTRFS_PAGE_OPS(checked, folio_set_checked, folio_clear_checked,
|
||||
* Make sure not only the page dirty bit is cleared, but also subpage dirty bit
|
||||
* is cleared.
|
||||
*/
|
||||
void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info, struct folio *folio)
|
||||
void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, u64 start, u32 len)
|
||||
{
|
||||
struct btrfs_subpage *subpage = folio_get_private(folio);
|
||||
struct btrfs_subpage *subpage;
|
||||
unsigned int start_bit;
|
||||
unsigned int nbits;
|
||||
unsigned long flags;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_BTRFS_ASSERT))
|
||||
return;
|
||||
|
||||
ASSERT(!folio_test_dirty(folio));
|
||||
if (!btrfs_is_subpage(fs_info, folio->mapping))
|
||||
if (!btrfs_is_subpage(fs_info, folio->mapping)) {
|
||||
ASSERT(!folio_test_dirty(folio));
|
||||
return;
|
||||
}
|
||||
|
||||
ASSERT(folio_test_private(folio) && folio_get_private(folio));
|
||||
ASSERT(subpage_test_bitmap_all_zero(fs_info, subpage, dirty));
|
||||
start_bit = subpage_calc_start_bit(fs_info, folio, dirty, start, len);
|
||||
nbits = len >> fs_info->sectorsize_bits;
|
||||
subpage = folio_get_private(folio);
|
||||
ASSERT(subpage);
|
||||
spin_lock_irqsave(&subpage->lock, flags);
|
||||
ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits));
|
||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -765,6 +775,130 @@ void btrfs_folio_unlock_writer(struct btrfs_fs_info *fs_info,
|
||||
btrfs_folio_end_writer_lock(fs_info, folio, start, len);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is for folio already locked by plain lock_page()/folio_lock(), which
|
||||
* doesn't have any subpage awareness.
|
||||
*
|
||||
* This populates the involved subpage ranges so that subpage helpers can
|
||||
* properly unlock them.
|
||||
*/
|
||||
void btrfs_folio_set_writer_lock(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, u64 start, u32 len)
|
||||
{
|
||||
struct btrfs_subpage *subpage;
|
||||
unsigned long flags;
|
||||
unsigned int start_bit;
|
||||
unsigned int nbits;
|
||||
int ret;
|
||||
|
||||
ASSERT(folio_test_locked(folio));
|
||||
if (unlikely(!fs_info) || !btrfs_is_subpage(fs_info, folio->mapping))
|
||||
return;
|
||||
|
||||
subpage = folio_get_private(folio);
|
||||
start_bit = subpage_calc_start_bit(fs_info, folio, locked, start, len);
|
||||
nbits = len >> fs_info->sectorsize_bits;
|
||||
spin_lock_irqsave(&subpage->lock, flags);
|
||||
/* Target range should not yet be locked. */
|
||||
ASSERT(bitmap_test_range_all_zero(subpage->bitmaps, start_bit, nbits));
|
||||
bitmap_set(subpage->bitmaps, start_bit, nbits);
|
||||
ret = atomic_add_return(nbits, &subpage->writers);
|
||||
ASSERT(ret <= fs_info->subpage_info->bitmap_nr_bits);
|
||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Find any subpage writer locked range inside @folio, starting at file offset
|
||||
* @search_start. The caller should ensure the folio is locked.
|
||||
*
|
||||
* Return true and update @found_start_ret and @found_len_ret to the first
|
||||
* writer locked range.
|
||||
* Return false if there is no writer locked range.
|
||||
*/
|
||||
bool btrfs_subpage_find_writer_locked(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, u64 search_start,
|
||||
u64 *found_start_ret, u32 *found_len_ret)
|
||||
{
|
||||
struct btrfs_subpage_info *subpage_info = fs_info->subpage_info;
|
||||
struct btrfs_subpage *subpage = folio_get_private(folio);
|
||||
const unsigned int len = PAGE_SIZE - offset_in_page(search_start);
|
||||
const unsigned int start_bit = subpage_calc_start_bit(fs_info, folio,
|
||||
locked, search_start, len);
|
||||
const unsigned int locked_bitmap_start = subpage_info->locked_offset;
|
||||
const unsigned int locked_bitmap_end = locked_bitmap_start +
|
||||
subpage_info->bitmap_nr_bits;
|
||||
unsigned long flags;
|
||||
int first_zero;
|
||||
int first_set;
|
||||
bool found = false;
|
||||
|
||||
ASSERT(folio_test_locked(folio));
|
||||
spin_lock_irqsave(&subpage->lock, flags);
|
||||
first_set = find_next_bit(subpage->bitmaps, locked_bitmap_end, start_bit);
|
||||
if (first_set >= locked_bitmap_end)
|
||||
goto out;
|
||||
|
||||
found = true;
|
||||
|
||||
*found_start_ret = folio_pos(folio) +
|
||||
((first_set - locked_bitmap_start) << fs_info->sectorsize_bits);
|
||||
/*
|
||||
* Since @first_set is ensured to be smaller than locked_bitmap_end
|
||||
* here, @found_start_ret should be inside the folio.
|
||||
*/
|
||||
ASSERT(*found_start_ret < folio_pos(folio) + PAGE_SIZE);
|
||||
|
||||
first_zero = find_next_zero_bit(subpage->bitmaps, locked_bitmap_end, first_set);
|
||||
*found_len_ret = (first_zero - first_set) << fs_info->sectorsize_bits;
|
||||
out:
|
||||
spin_unlock_irqrestore(&subpage->lock, flags);
|
||||
return found;
|
||||
}
|
||||
|
||||
/*
|
||||
* Unlike btrfs_folio_end_writer_lock() which unlocks a specified subpage range,
|
||||
* this ends all writer locked ranges of a page.
|
||||
*
|
||||
* This is for the locked page of __extent_writepage(), as the locked page
|
||||
* can contain several locked subpage ranges.
|
||||
*/
|
||||
void btrfs_folio_end_all_writers(const struct btrfs_fs_info *fs_info, struct folio *folio)
|
||||
{
|
||||
struct btrfs_subpage *subpage = folio_get_private(folio);
|
||||
u64 folio_start = folio_pos(folio);
|
||||
u64 cur = folio_start;
|
||||
|
||||
ASSERT(folio_test_locked(folio));
|
||||
if (!btrfs_is_subpage(fs_info, folio->mapping)) {
|
||||
folio_unlock(folio);
|
||||
return;
|
||||
}
|
||||
|
||||
/* The page has no new delalloc range locked on it. Just plain unlock. */
|
||||
if (atomic_read(&subpage->writers) == 0) {
|
||||
folio_unlock(folio);
|
||||
return;
|
||||
}
|
||||
while (cur < folio_start + PAGE_SIZE) {
|
||||
u64 found_start;
|
||||
u32 found_len;
|
||||
bool found;
|
||||
bool last;
|
||||
|
||||
found = btrfs_subpage_find_writer_locked(fs_info, folio, cur,
|
||||
&found_start, &found_len);
|
||||
if (!found)
|
||||
break;
|
||||
last = btrfs_subpage_end_and_test_writer(fs_info, folio,
|
||||
found_start, found_len);
|
||||
if (last) {
|
||||
folio_unlock(folio);
|
||||
break;
|
||||
}
|
||||
cur = found_start + found_len;
|
||||
}
|
||||
}
|
||||
|
||||
#define GET_SUBPAGE_BITMAP(subpage, subpage_info, name, dst) \
|
||||
bitmap_cut(dst, subpage->bitmaps, 0, \
|
||||
subpage_info->name##_offset, subpage_info->bitmap_nr_bits)
|
||||
@ -775,7 +909,6 @@ void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_subpage_info *subpage_info = fs_info->subpage_info;
|
||||
struct btrfs_subpage *subpage;
|
||||
unsigned long uptodate_bitmap;
|
||||
unsigned long error_bitmap;
|
||||
unsigned long dirty_bitmap;
|
||||
unsigned long writeback_bitmap;
|
||||
unsigned long ordered_bitmap;
|
||||
@ -797,10 +930,9 @@ void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info,
|
||||
|
||||
dump_page(folio_page(folio, 0), "btrfs subpage dump");
|
||||
btrfs_warn(fs_info,
|
||||
"start=%llu len=%u page=%llu, bitmaps uptodate=%*pbl error=%*pbl dirty=%*pbl writeback=%*pbl ordered=%*pbl checked=%*pbl",
|
||||
"start=%llu len=%u page=%llu, bitmaps uptodate=%*pbl dirty=%*pbl writeback=%*pbl ordered=%*pbl checked=%*pbl",
|
||||
start, len, folio_pos(folio),
|
||||
subpage_info->bitmap_nr_bits, &uptodate_bitmap,
|
||||
subpage_info->bitmap_nr_bits, &error_bitmap,
|
||||
subpage_info->bitmap_nr_bits, &dirty_bitmap,
|
||||
subpage_info->bitmap_nr_bits, &writeback_bitmap,
|
||||
subpage_info->bitmap_nr_bits, &ordered_bitmap,
|
||||
|
@ -112,6 +112,12 @@ int btrfs_folio_start_writer_lock(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, u64 start, u32 len);
|
||||
void btrfs_folio_end_writer_lock(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, u64 start, u32 len);
|
||||
void btrfs_folio_set_writer_lock(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, u64 start, u32 len);
|
||||
bool btrfs_subpage_find_writer_locked(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, u64 search_start,
|
||||
u64 *found_start_ret, u32 *found_len_ret);
|
||||
void btrfs_folio_end_all_writers(const struct btrfs_fs_info *fs_info, struct folio *folio);
|
||||
|
||||
/*
|
||||
* Template for subpage related operations.
|
||||
@ -156,7 +162,8 @@ DECLARE_BTRFS_SUBPAGE_OPS(checked);
|
||||
bool btrfs_subpage_clear_and_test_dirty(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, u64 start, u32 len);
|
||||
|
||||
void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info, struct folio *folio);
|
||||
void btrfs_folio_assert_not_dirty(const struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, u64 start, u32 len);
|
||||
void btrfs_folio_unlock_writer(struct btrfs_fs_info *fs_info,
|
||||
struct folio *folio, u64 start, u32 len);
|
||||
void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info,
|
||||
|
@ -34,6 +34,7 @@
|
||||
#include "disk-io.h"
|
||||
#include "transaction.h"
|
||||
#include "btrfs_inode.h"
|
||||
#include "direct-io.h"
|
||||
#include "props.h"
|
||||
#include "xattr.h"
|
||||
#include "bio.h"
|
||||
@ -125,9 +126,6 @@ enum {
|
||||
Opt_rescue,
|
||||
Opt_usebackuproot,
|
||||
Opt_nologreplay,
|
||||
Opt_ignorebadroots,
|
||||
Opt_ignoredatacsums,
|
||||
Opt_rescue_all,
|
||||
|
||||
/* Debugging options */
|
||||
Opt_enospc_debug,
|
||||
@ -178,6 +176,8 @@ enum {
|
||||
Opt_rescue_nologreplay,
|
||||
Opt_rescue_ignorebadroots,
|
||||
Opt_rescue_ignoredatacsums,
|
||||
Opt_rescue_ignoremetacsums,
|
||||
Opt_rescue_ignoresuperflags,
|
||||
Opt_rescue_parameter_all,
|
||||
};
|
||||
|
||||
@ -187,7 +187,11 @@ static const struct constant_table btrfs_parameter_rescue[] = {
|
||||
{ "ignorebadroots", Opt_rescue_ignorebadroots },
|
||||
{ "ibadroots", Opt_rescue_ignorebadroots },
|
||||
{ "ignoredatacsums", Opt_rescue_ignoredatacsums },
|
||||
{ "ignoremetacsums", Opt_rescue_ignoremetacsums},
|
||||
{ "ignoresuperflags", Opt_rescue_ignoresuperflags},
|
||||
{ "idatacsums", Opt_rescue_ignoredatacsums },
|
||||
{ "imetacsums", Opt_rescue_ignoremetacsums},
|
||||
{ "isuperflags", Opt_rescue_ignoresuperflags},
|
||||
{ "all", Opt_rescue_parameter_all },
|
||||
{}
|
||||
};
|
||||
@ -573,8 +577,16 @@ static int btrfs_parse_param(struct fs_context *fc, struct fs_parameter *param)
|
||||
case Opt_rescue_ignoredatacsums:
|
||||
btrfs_set_opt(ctx->mount_opt, IGNOREDATACSUMS);
|
||||
break;
|
||||
case Opt_rescue_ignoremetacsums:
|
||||
btrfs_set_opt(ctx->mount_opt, IGNOREMETACSUMS);
|
||||
break;
|
||||
case Opt_rescue_ignoresuperflags:
|
||||
btrfs_set_opt(ctx->mount_opt, IGNORESUPERFLAGS);
|
||||
break;
|
||||
case Opt_rescue_parameter_all:
|
||||
btrfs_set_opt(ctx->mount_opt, IGNOREDATACSUMS);
|
||||
btrfs_set_opt(ctx->mount_opt, IGNOREMETACSUMS);
|
||||
btrfs_set_opt(ctx->mount_opt, IGNORESUPERFLAGS);
|
||||
btrfs_set_opt(ctx->mount_opt, IGNOREBADROOTS);
|
||||
btrfs_set_opt(ctx->mount_opt, NOLOGREPLAY);
|
||||
break;
|
||||
@ -629,7 +641,7 @@ static void btrfs_clear_oneshot_options(struct btrfs_fs_info *fs_info)
|
||||
btrfs_clear_opt(fs_info->mount_opt, NOSPACECACHE);
|
||||
}
|
||||
|
||||
static bool check_ro_option(struct btrfs_fs_info *fs_info,
|
||||
static bool check_ro_option(const struct btrfs_fs_info *fs_info,
|
||||
unsigned long mount_opt, unsigned long opt,
|
||||
const char *opt_name)
|
||||
{
|
||||
@ -641,7 +653,7 @@ static bool check_ro_option(struct btrfs_fs_info *fs_info,
|
||||
return false;
|
||||
}
|
||||
|
||||
bool btrfs_check_options(struct btrfs_fs_info *info, unsigned long *mount_opt,
|
||||
bool btrfs_check_options(const struct btrfs_fs_info *info, unsigned long *mount_opt,
|
||||
unsigned long flags)
|
||||
{
|
||||
bool ret = true;
|
||||
@ -649,7 +661,9 @@ bool btrfs_check_options(struct btrfs_fs_info *info, unsigned long *mount_opt,
|
||||
if (!(flags & SB_RDONLY) &&
|
||||
(check_ro_option(info, *mount_opt, BTRFS_MOUNT_NOLOGREPLAY, "nologreplay") ||
|
||||
check_ro_option(info, *mount_opt, BTRFS_MOUNT_IGNOREBADROOTS, "ignorebadroots") ||
|
||||
check_ro_option(info, *mount_opt, BTRFS_MOUNT_IGNOREDATACSUMS, "ignoredatacsums")))
|
||||
check_ro_option(info, *mount_opt, BTRFS_MOUNT_IGNOREDATACSUMS, "ignoredatacsums") ||
|
||||
check_ro_option(info, *mount_opt, BTRFS_MOUNT_IGNOREMETACSUMS, "ignoremetacsums") ||
|
||||
check_ro_option(info, *mount_opt, BTRFS_MOUNT_IGNORESUPERFLAGS, "ignoresuperflags")))
|
||||
ret = false;
|
||||
|
||||
if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE) &&
|
||||
@ -949,7 +963,7 @@ static int btrfs_fill_super(struct super_block *sb,
|
||||
return err;
|
||||
}
|
||||
|
||||
inode = btrfs_iget(sb, BTRFS_FIRST_FREE_OBJECTID, fs_info->fs_root);
|
||||
inode = btrfs_iget(BTRFS_FIRST_FREE_OBJECTID, fs_info->fs_root);
|
||||
if (IS_ERR(inode)) {
|
||||
err = PTR_ERR(inode);
|
||||
btrfs_handle_fs_error(fs_info, err, NULL);
|
||||
@ -983,7 +997,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
|
||||
return 0;
|
||||
}
|
||||
|
||||
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
|
||||
btrfs_wait_ordered_roots(fs_info, U64_MAX, NULL);
|
||||
|
||||
trans = btrfs_attach_transaction_barrier(root);
|
||||
if (IS_ERR(trans)) {
|
||||
@ -1065,6 +1079,10 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
|
||||
print_rescue_option(seq, "ignorebadroots", &printed);
|
||||
if (btrfs_test_opt(info, IGNOREDATACSUMS))
|
||||
print_rescue_option(seq, "ignoredatacsums", &printed);
|
||||
if (btrfs_test_opt(info, IGNOREMETACSUMS))
|
||||
print_rescue_option(seq, "ignoremetacsums", &printed);
|
||||
if (btrfs_test_opt(info, IGNORESUPERFLAGS))
|
||||
print_rescue_option(seq, "ignoresuperflags", &printed);
|
||||
if (btrfs_test_opt(info, FLUSHONCOMMIT))
|
||||
seq_puts(seq, ",flushoncommit");
|
||||
if (btrfs_test_opt(info, DISCARD_SYNC))
|
||||
@ -1422,6 +1440,8 @@ static void btrfs_emit_options(struct btrfs_fs_info *info,
|
||||
btrfs_info_if_set(info, old, USEBACKUPROOT, "trying to use backup root at mount time");
|
||||
btrfs_info_if_set(info, old, IGNOREBADROOTS, "ignoring bad roots");
|
||||
btrfs_info_if_set(info, old, IGNOREDATACSUMS, "ignoring data csums");
|
||||
btrfs_info_if_set(info, old, IGNOREMETACSUMS, "ignoring meta csums");
|
||||
btrfs_info_if_set(info, old, IGNORESUPERFLAGS, "ignoring unknown super block flags");
|
||||
|
||||
btrfs_info_if_unset(info, old, NODATACOW, "setting datacow");
|
||||
btrfs_info_if_unset(info, old, SSD, "not using ssd optimizations");
|
||||
@ -2257,9 +2277,7 @@ out:
|
||||
|
||||
static int btrfs_freeze(struct super_block *sb)
|
||||
{
|
||||
struct btrfs_trans_handle *trans;
|
||||
struct btrfs_fs_info *fs_info = btrfs_sb(sb);
|
||||
struct btrfs_root *root = fs_info->tree_root;
|
||||
|
||||
set_bit(BTRFS_FS_FROZEN, &fs_info->flags);
|
||||
/*
|
||||
@ -2268,14 +2286,7 @@ static int btrfs_freeze(struct super_block *sb)
|
||||
* we want to avoid on a frozen filesystem), or do the commit
|
||||
* ourselves.
|
||||
*/
|
||||
trans = btrfs_attach_transaction_barrier(root);
|
||||
if (IS_ERR(trans)) {
|
||||
/* no transaction, don't bother */
|
||||
if (PTR_ERR(trans) == -ENOENT)
|
||||
return 0;
|
||||
return PTR_ERR(trans);
|
||||
}
|
||||
return btrfs_commit_transaction(trans);
|
||||
return btrfs_commit_current_transaction(fs_info->tree_root);
|
||||
}
|
||||
|
||||
static int check_dev_super(struct btrfs_device *dev)
|
||||
@ -2498,6 +2509,9 @@ static const struct init_sequence mod_init_seq[] = {
|
||||
}, {
|
||||
.init_func = btrfs_init_cachep,
|
||||
.exit_func = btrfs_destroy_cachep,
|
||||
}, {
|
||||
.init_func = btrfs_init_dio,
|
||||
.exit_func = btrfs_destroy_dio,
|
||||
}, {
|
||||
.init_func = btrfs_transaction_init,
|
||||
.exit_func = btrfs_transaction_exit,
|
||||
@ -2590,6 +2604,7 @@ static int __init init_btrfs_fs(void)
|
||||
late_initcall(init_btrfs_fs);
|
||||
module_exit(exit_btrfs_fs)
|
||||
|
||||
MODULE_DESCRIPTION("B-Tree File System (BTRFS)");
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_SOFTDEP("pre: crc32c");
|
||||
MODULE_SOFTDEP("pre: xxhash64");
|
||||
|
@ -10,7 +10,7 @@
|
||||
struct super_block;
|
||||
struct btrfs_fs_info;
|
||||
|
||||
bool btrfs_check_options(struct btrfs_fs_info *info, unsigned long *mount_opt,
|
||||
bool btrfs_check_options(const struct btrfs_fs_info *info, unsigned long *mount_opt,
|
||||
unsigned long flags);
|
||||
int btrfs_sync_fs(struct super_block *sb, int wait);
|
||||
char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info,
|
||||
|
@ -385,6 +385,8 @@ static const char *rescue_opts[] = {
|
||||
"nologreplay",
|
||||
"ignorebadroots",
|
||||
"ignoredatacsums",
|
||||
"ignoremetacsums",
|
||||
"ignoresuperflags",
|
||||
"all",
|
||||
};
|
||||
|
||||
@ -894,6 +896,9 @@ SPACE_INFO_ATTR(bytes_readonly);
|
||||
SPACE_INFO_ATTR(bytes_zone_unusable);
|
||||
SPACE_INFO_ATTR(disk_used);
|
||||
SPACE_INFO_ATTR(disk_total);
|
||||
SPACE_INFO_ATTR(reclaim_count);
|
||||
SPACE_INFO_ATTR(reclaim_bytes);
|
||||
SPACE_INFO_ATTR(reclaim_errors);
|
||||
BTRFS_ATTR_RW(space_info, chunk_size, btrfs_chunk_size_show, btrfs_chunk_size_store);
|
||||
BTRFS_ATTR(space_info, size_classes, btrfs_size_classes_show);
|
||||
|
||||
@ -902,8 +907,12 @@ static ssize_t btrfs_sinfo_bg_reclaim_threshold_show(struct kobject *kobj,
|
||||
char *buf)
|
||||
{
|
||||
struct btrfs_space_info *space_info = to_space_info(kobj);
|
||||
ssize_t ret;
|
||||
|
||||
return sysfs_emit(buf, "%d\n", READ_ONCE(space_info->bg_reclaim_threshold));
|
||||
spin_lock(&space_info->lock);
|
||||
ret = sysfs_emit(buf, "%d\n", btrfs_calc_reclaim_threshold(space_info));
|
||||
spin_unlock(&space_info->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t btrfs_sinfo_bg_reclaim_threshold_store(struct kobject *kobj,
|
||||
@ -914,6 +923,9 @@ static ssize_t btrfs_sinfo_bg_reclaim_threshold_store(struct kobject *kobj,
|
||||
int thresh;
|
||||
int ret;
|
||||
|
||||
if (READ_ONCE(space_info->dynamic_reclaim))
|
||||
return -EINVAL;
|
||||
|
||||
ret = kstrtoint(buf, 10, &thresh);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -930,6 +942,72 @@ BTRFS_ATTR_RW(space_info, bg_reclaim_threshold,
|
||||
btrfs_sinfo_bg_reclaim_threshold_show,
|
||||
btrfs_sinfo_bg_reclaim_threshold_store);
|
||||
|
||||
static ssize_t btrfs_sinfo_dynamic_reclaim_show(struct kobject *kobj,
|
||||
struct kobj_attribute *a,
|
||||
char *buf)
|
||||
{
|
||||
struct btrfs_space_info *space_info = to_space_info(kobj);
|
||||
|
||||
return sysfs_emit(buf, "%d\n", READ_ONCE(space_info->dynamic_reclaim));
|
||||
}
|
||||
|
||||
static ssize_t btrfs_sinfo_dynamic_reclaim_store(struct kobject *kobj,
|
||||
struct kobj_attribute *a,
|
||||
const char *buf, size_t len)
|
||||
{
|
||||
struct btrfs_space_info *space_info = to_space_info(kobj);
|
||||
int dynamic_reclaim;
|
||||
int ret;
|
||||
|
||||
ret = kstrtoint(buf, 10, &dynamic_reclaim);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (dynamic_reclaim < 0)
|
||||
return -EINVAL;
|
||||
|
||||
WRITE_ONCE(space_info->dynamic_reclaim, dynamic_reclaim != 0);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
BTRFS_ATTR_RW(space_info, dynamic_reclaim,
|
||||
btrfs_sinfo_dynamic_reclaim_show,
|
||||
btrfs_sinfo_dynamic_reclaim_store);
|
||||
|
||||
static ssize_t btrfs_sinfo_periodic_reclaim_show(struct kobject *kobj,
|
||||
struct kobj_attribute *a,
|
||||
char *buf)
|
||||
{
|
||||
struct btrfs_space_info *space_info = to_space_info(kobj);
|
||||
|
||||
return sysfs_emit(buf, "%d\n", READ_ONCE(space_info->periodic_reclaim));
|
||||
}
|
||||
|
||||
static ssize_t btrfs_sinfo_periodic_reclaim_store(struct kobject *kobj,
|
||||
struct kobj_attribute *a,
|
||||
const char *buf, size_t len)
|
||||
{
|
||||
struct btrfs_space_info *space_info = to_space_info(kobj);
|
||||
int periodic_reclaim;
|
||||
int ret;
|
||||
|
||||
ret = kstrtoint(buf, 10, &periodic_reclaim);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (periodic_reclaim < 0)
|
||||
return -EINVAL;
|
||||
|
||||
WRITE_ONCE(space_info->periodic_reclaim, periodic_reclaim != 0);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
BTRFS_ATTR_RW(space_info, periodic_reclaim,
|
||||
btrfs_sinfo_periodic_reclaim_show,
|
||||
btrfs_sinfo_periodic_reclaim_store);
|
||||
|
||||
/*
|
||||
* Allocation information about block group types.
|
||||
*
|
||||
@ -947,8 +1025,13 @@ static struct attribute *space_info_attrs[] = {
|
||||
BTRFS_ATTR_PTR(space_info, disk_used),
|
||||
BTRFS_ATTR_PTR(space_info, disk_total),
|
||||
BTRFS_ATTR_PTR(space_info, bg_reclaim_threshold),
|
||||
BTRFS_ATTR_PTR(space_info, dynamic_reclaim),
|
||||
BTRFS_ATTR_PTR(space_info, chunk_size),
|
||||
BTRFS_ATTR_PTR(space_info, size_classes),
|
||||
BTRFS_ATTR_PTR(space_info, reclaim_count),
|
||||
BTRFS_ATTR_PTR(space_info, reclaim_bytes),
|
||||
BTRFS_ATTR_PTR(space_info, reclaim_errors),
|
||||
BTRFS_ATTR_PTR(space_info, periodic_reclaim),
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
BTRFS_ATTR_PTR(space_info, force_chunk_alloc),
|
||||
#endif
|
||||
|
@ -61,10 +61,7 @@ struct inode *btrfs_new_test_inode(void)
|
||||
return NULL;
|
||||
|
||||
inode->i_mode = S_IFREG;
|
||||
inode->i_ino = BTRFS_FIRST_FREE_OBJECTID;
|
||||
BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
|
||||
BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
|
||||
BTRFS_I(inode)->location.offset = 0;
|
||||
btrfs_set_inode_number(BTRFS_I(inode), BTRFS_FIRST_FREE_OBJECTID);
|
||||
inode_init_owner(&nop_mnt_idmap, inode, NULL, S_IFREG);
|
||||
|
||||
return inode;
|
||||
|
@ -19,8 +19,8 @@ static int free_extent_map_tree(struct btrfs_inode *inode)
|
||||
int ret = 0;
|
||||
|
||||
write_lock(&em_tree->lock);
|
||||
while (!RB_EMPTY_ROOT(&em_tree->map.rb_root)) {
|
||||
node = rb_first_cached(&em_tree->map);
|
||||
while (!RB_EMPTY_ROOT(&em_tree->root)) {
|
||||
node = rb_first(&em_tree->root);
|
||||
em = rb_entry(node, struct extent_map, rb_node);
|
||||
remove_extent_mapping(inode, em);
|
||||
|
||||
@ -28,9 +28,10 @@ static int free_extent_map_tree(struct btrfs_inode *inode)
|
||||
if (refcount_read(&em->refs) != 1) {
|
||||
ret = -EINVAL;
|
||||
test_err(
|
||||
"em leak: em (start %llu len %llu block_start %llu block_len %llu) refs %d",
|
||||
em->start, em->len, em->block_start,
|
||||
em->block_len, refcount_read(&em->refs));
|
||||
"em leak: em (start %llu len %llu disk_bytenr %llu disk_num_bytes %llu offset %llu) refs %d",
|
||||
em->start, em->len, em->disk_bytenr,
|
||||
em->disk_num_bytes, em->offset,
|
||||
refcount_read(&em->refs));
|
||||
|
||||
refcount_set(&em->refs, 1);
|
||||
}
|
||||
@ -76,8 +77,9 @@ static int test_case_1(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
|
||||
/* Add [0, 16K) */
|
||||
em->start = 0;
|
||||
em->len = SZ_16K;
|
||||
em->block_start = 0;
|
||||
em->block_len = SZ_16K;
|
||||
em->disk_bytenr = 0;
|
||||
em->disk_num_bytes = SZ_16K;
|
||||
em->ram_bytes = SZ_16K;
|
||||
write_lock(&em_tree->lock);
|
||||
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
|
||||
write_unlock(&em_tree->lock);
|
||||
@ -97,8 +99,9 @@ static int test_case_1(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
|
||||
|
||||
em->start = SZ_16K;
|
||||
em->len = SZ_4K;
|
||||
em->block_start = SZ_32K; /* avoid merging */
|
||||
em->block_len = SZ_4K;
|
||||
em->disk_bytenr = SZ_32K; /* avoid merging */
|
||||
em->disk_num_bytes = SZ_4K;
|
||||
em->ram_bytes = SZ_4K;
|
||||
write_lock(&em_tree->lock);
|
||||
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
|
||||
write_unlock(&em_tree->lock);
|
||||
@ -118,8 +121,9 @@ static int test_case_1(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
|
||||
/* Add [0, 8K), should return [0, 16K) instead. */
|
||||
em->start = start;
|
||||
em->len = len;
|
||||
em->block_start = start;
|
||||
em->block_len = len;
|
||||
em->disk_bytenr = start;
|
||||
em->disk_num_bytes = len;
|
||||
em->ram_bytes = len;
|
||||
write_lock(&em_tree->lock);
|
||||
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
|
||||
write_unlock(&em_tree->lock);
|
||||
@ -134,11 +138,11 @@ static int test_case_1(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
|
||||
goto out;
|
||||
}
|
||||
if (em->start != 0 || extent_map_end(em) != SZ_16K ||
|
||||
em->block_start != 0 || em->block_len != SZ_16K) {
|
||||
em->disk_bytenr != 0 || em->disk_num_bytes != SZ_16K) {
|
||||
test_err(
|
||||
"case1 [%llu %llu]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu",
|
||||
"case1 [%llu %llu]: ret %d return a wrong em (start %llu len %llu disk_bytenr %llu disk_num_bytes %llu",
|
||||
start, start + len, ret, em->start, em->len,
|
||||
em->block_start, em->block_len);
|
||||
em->disk_bytenr, em->disk_num_bytes);
|
||||
ret = -EINVAL;
|
||||
}
|
||||
free_extent_map(em);
|
||||
@ -172,8 +176,9 @@ static int test_case_2(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
|
||||
/* Add [0, 1K) */
|
||||
em->start = 0;
|
||||
em->len = SZ_1K;
|
||||
em->block_start = EXTENT_MAP_INLINE;
|
||||
em->block_len = (u64)-1;
|
||||
em->disk_bytenr = EXTENT_MAP_INLINE;
|
||||
em->disk_num_bytes = 0;
|
||||
em->ram_bytes = SZ_1K;
|
||||
write_lock(&em_tree->lock);
|
||||
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
|
||||
write_unlock(&em_tree->lock);
|
||||
@ -193,8 +198,9 @@ static int test_case_2(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
|
||||
|
||||
em->start = SZ_4K;
|
||||
em->len = SZ_4K;
|
||||
em->block_start = SZ_4K;
|
||||
em->block_len = SZ_4K;
|
||||
em->disk_bytenr = SZ_4K;
|
||||
em->disk_num_bytes = SZ_4K;
|
||||
em->ram_bytes = SZ_4K;
|
||||
write_lock(&em_tree->lock);
|
||||
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
|
||||
write_unlock(&em_tree->lock);
|
||||
@ -214,8 +220,9 @@ static int test_case_2(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
|
||||
/* Add [0, 1K) */
|
||||
em->start = 0;
|
||||
em->len = SZ_1K;
|
||||
em->block_start = EXTENT_MAP_INLINE;
|
||||
em->block_len = (u64)-1;
|
||||
em->disk_bytenr = EXTENT_MAP_INLINE;
|
||||
em->disk_num_bytes = 0;
|
||||
em->ram_bytes = SZ_1K;
|
||||
write_lock(&em_tree->lock);
|
||||
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
|
||||
write_unlock(&em_tree->lock);
|
||||
@ -229,11 +236,10 @@ static int test_case_2(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
|
||||
goto out;
|
||||
}
|
||||
if (em->start != 0 || extent_map_end(em) != SZ_1K ||
|
||||
em->block_start != EXTENT_MAP_INLINE || em->block_len != (u64)-1) {
|
||||
em->disk_bytenr != EXTENT_MAP_INLINE) {
|
||||
test_err(
|
||||
"case2 [0 1K]: ret %d return a wrong em (start %llu len %llu block_start %llu block_len %llu",
|
||||
ret, em->start, em->len, em->block_start,
|
||||
em->block_len);
|
||||
"case2 [0 1K]: ret %d return a wrong em (start %llu len %llu disk_bytenr %llu",
|
||||
ret, em->start, em->len, em->disk_bytenr);
|
||||
ret = -EINVAL;
|
||||
}
|
||||
free_extent_map(em);
|
||||
@ -263,8 +269,9 @@ static int __test_case_3(struct btrfs_fs_info *fs_info,
|
||||
/* Add [4K, 8K) */
|
||||
em->start = SZ_4K;
|
||||
em->len = SZ_4K;
|
||||
em->block_start = SZ_4K;
|
||||
em->block_len = SZ_4K;
|
||||
em->disk_bytenr = SZ_4K;
|
||||
em->disk_num_bytes = SZ_4K;
|
||||
em->ram_bytes = SZ_4K;
|
||||
write_lock(&em_tree->lock);
|
||||
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
|
||||
write_unlock(&em_tree->lock);
|
||||
@ -284,8 +291,9 @@ static int __test_case_3(struct btrfs_fs_info *fs_info,
|
||||
/* Add [0, 16K) */
|
||||
em->start = 0;
|
||||
em->len = SZ_16K;
|
||||
em->block_start = 0;
|
||||
em->block_len = SZ_16K;
|
||||
em->disk_bytenr = 0;
|
||||
em->disk_num_bytes = SZ_16K;
|
||||
em->ram_bytes = SZ_16K;
|
||||
write_lock(&em_tree->lock);
|
||||
ret = btrfs_add_extent_mapping(inode, &em, start, len);
|
||||
write_unlock(&em_tree->lock);
|
||||
@ -305,11 +313,11 @@ static int __test_case_3(struct btrfs_fs_info *fs_info,
|
||||
* em->start.
|
||||
*/
|
||||
if (start < em->start || start + len > extent_map_end(em) ||
|
||||
em->start != em->block_start || em->len != em->block_len) {
|
||||
em->start != extent_map_block_start(em)) {
|
||||
test_err(
|
||||
"case3 [%llu %llu): ret %d em (start %llu len %llu block_start %llu block_len %llu)",
|
||||
"case3 [%llu %llu): ret %d em (start %llu len %llu disk_bytenr %llu block_len %llu)",
|
||||
start, start + len, ret, em->start, em->len,
|
||||
em->block_start, em->block_len);
|
||||
em->disk_bytenr, em->disk_num_bytes);
|
||||
ret = -EINVAL;
|
||||
}
|
||||
free_extent_map(em);
|
||||
@ -370,8 +378,9 @@ static int __test_case_4(struct btrfs_fs_info *fs_info,
|
||||
/* Add [0K, 8K) */
|
||||
em->start = 0;
|
||||
em->len = SZ_8K;
|
||||
em->block_start = 0;
|
||||
em->block_len = SZ_8K;
|
||||
em->disk_bytenr = 0;
|
||||
em->disk_num_bytes = SZ_8K;
|
||||
em->ram_bytes = SZ_8K;
|
||||
write_lock(&em_tree->lock);
|
||||
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
|
||||
write_unlock(&em_tree->lock);
|
||||
@ -391,8 +400,9 @@ static int __test_case_4(struct btrfs_fs_info *fs_info,
|
||||
/* Add [8K, 32K) */
|
||||
em->start = SZ_8K;
|
||||
em->len = 24 * SZ_1K;
|
||||
em->block_start = SZ_16K; /* avoid merging */
|
||||
em->block_len = 24 * SZ_1K;
|
||||
em->disk_bytenr = SZ_16K; /* avoid merging */
|
||||
em->disk_num_bytes = 24 * SZ_1K;
|
||||
em->ram_bytes = 24 * SZ_1K;
|
||||
write_lock(&em_tree->lock);
|
||||
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
|
||||
write_unlock(&em_tree->lock);
|
||||
@ -411,8 +421,9 @@ static int __test_case_4(struct btrfs_fs_info *fs_info,
|
||||
/* Add [0K, 32K) */
|
||||
em->start = 0;
|
||||
em->len = SZ_32K;
|
||||
em->block_start = 0;
|
||||
em->block_len = SZ_32K;
|
||||
em->disk_bytenr = 0;
|
||||
em->disk_num_bytes = SZ_32K;
|
||||
em->ram_bytes = SZ_32K;
|
||||
write_lock(&em_tree->lock);
|
||||
ret = btrfs_add_extent_mapping(inode, &em, start, len);
|
||||
write_unlock(&em_tree->lock);
|
||||
@ -429,9 +440,9 @@ static int __test_case_4(struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
if (start < em->start || start + len > extent_map_end(em)) {
|
||||
test_err(
|
||||
"case4 [%llu %llu): ret %d, added wrong em (start %llu len %llu block_start %llu block_len %llu)",
|
||||
start, start + len, ret, em->start, em->len, em->block_start,
|
||||
em->block_len);
|
||||
"case4 [%llu %llu): ret %d, added wrong em (start %llu len %llu disk_bytenr %llu disk_num_bytes %llu)",
|
||||
start, start + len, ret, em->start, em->len,
|
||||
em->disk_bytenr, em->disk_num_bytes);
|
||||
ret = -EINVAL;
|
||||
}
|
||||
free_extent_map(em);
|
||||
@ -495,8 +506,9 @@ static int add_compressed_extent(struct btrfs_inode *inode,
|
||||
|
||||
em->start = start;
|
||||
em->len = len;
|
||||
em->block_start = block_start;
|
||||
em->block_len = SZ_4K;
|
||||
em->disk_bytenr = block_start;
|
||||
em->disk_num_bytes = SZ_4K;
|
||||
em->ram_bytes = len;
|
||||
em->flags |= EXTENT_FLAG_COMPRESS_ZLIB;
|
||||
write_lock(&em_tree->lock);
|
||||
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
|
||||
@ -551,7 +563,7 @@ static int validate_range(struct extent_map_tree *em_tree, int index)
|
||||
struct rb_node *n;
|
||||
int i;
|
||||
|
||||
for (i = 0, n = rb_first_cached(&em_tree->map);
|
||||
for (i = 0, n = rb_first(&em_tree->root);
|
||||
valid_ranges[index][i].len && n;
|
||||
i++, n = rb_next(n)) {
|
||||
struct extent_map *entry = rb_entry(n, struct extent_map, rb_node);
|
||||
@ -716,8 +728,9 @@ static int test_case_6(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
|
||||
|
||||
em->start = SZ_4K;
|
||||
em->len = SZ_4K;
|
||||
em->block_start = SZ_16K;
|
||||
em->block_len = SZ_16K;
|
||||
em->disk_bytenr = SZ_16K;
|
||||
em->disk_num_bytes = SZ_16K;
|
||||
em->ram_bytes = SZ_16K;
|
||||
write_lock(&em_tree->lock);
|
||||
ret = btrfs_add_extent_mapping(inode, &em, 0, SZ_8K);
|
||||
write_unlock(&em_tree->lock);
|
||||
@ -769,9 +782,10 @@ static int test_case_7(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
|
||||
/* [0, 16K), pinned */
|
||||
em->start = 0;
|
||||
em->len = SZ_16K;
|
||||
em->block_start = 0;
|
||||
em->block_len = SZ_4K;
|
||||
em->flags |= EXTENT_FLAG_PINNED;
|
||||
em->disk_bytenr = 0;
|
||||
em->disk_num_bytes = SZ_4K;
|
||||
em->ram_bytes = SZ_16K;
|
||||
em->flags |= (EXTENT_FLAG_PINNED | EXTENT_FLAG_COMPRESS_ZLIB);
|
||||
write_lock(&em_tree->lock);
|
||||
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
|
||||
write_unlock(&em_tree->lock);
|
||||
@ -791,8 +805,9 @@ static int test_case_7(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
|
||||
/* [32K, 48K), not pinned */
|
||||
em->start = SZ_32K;
|
||||
em->len = SZ_16K;
|
||||
em->block_start = SZ_32K;
|
||||
em->block_len = SZ_16K;
|
||||
em->disk_bytenr = SZ_32K;
|
||||
em->disk_num_bytes = SZ_16K;
|
||||
em->ram_bytes = SZ_16K;
|
||||
write_lock(&em_tree->lock);
|
||||
ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len);
|
||||
write_unlock(&em_tree->lock);
|
||||
@ -855,8 +870,9 @@ static int test_case_7(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (em->block_start != SZ_32K + SZ_4K) {
|
||||
test_err("em->block_start is %llu, expected 36K", em->block_start);
|
||||
if (extent_map_block_start(em) != SZ_32K + SZ_4K) {
|
||||
test_err("em->block_start is %llu, expected 36K",
|
||||
extent_map_block_start(em));
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
@ -117,7 +117,7 @@ static void setup_file_extents(struct btrfs_root *root, u32 sectorsize)
|
||||
|
||||
/* Now for a regular extent */
|
||||
insert_extent(root, offset, sectorsize - 1, sectorsize - 1, 0,
|
||||
disk_bytenr, sectorsize, BTRFS_FILE_EXTENT_REG, 0, slot);
|
||||
disk_bytenr, sectorsize - 1, BTRFS_FILE_EXTENT_REG, 0, slot);
|
||||
slot++;
|
||||
disk_bytenr += sectorsize;
|
||||
offset += sectorsize - 1;
|
||||
@ -264,8 +264,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
}
|
||||
if (em->block_start != EXTENT_MAP_HOLE) {
|
||||
test_err("expected a hole, got %llu", em->block_start);
|
||||
if (em->disk_bytenr != EXTENT_MAP_HOLE) {
|
||||
test_err("expected a hole, got %llu", em->disk_bytenr);
|
||||
goto out;
|
||||
}
|
||||
free_extent_map(em);
|
||||
@ -283,8 +283,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
}
|
||||
if (em->block_start != EXTENT_MAP_INLINE) {
|
||||
test_err("expected an inline, got %llu", em->block_start);
|
||||
if (em->disk_bytenr != EXTENT_MAP_INLINE) {
|
||||
test_err("expected an inline, got %llu", em->disk_bytenr);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -321,8 +321,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
}
|
||||
if (em->block_start != EXTENT_MAP_HOLE) {
|
||||
test_err("expected a hole, got %llu", em->block_start);
|
||||
if (em->disk_bytenr != EXTENT_MAP_HOLE) {
|
||||
test_err("expected a hole, got %llu", em->disk_bytenr);
|
||||
goto out;
|
||||
}
|
||||
if (em->start != offset || em->len != 4) {
|
||||
@ -344,8 +344,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
}
|
||||
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
|
||||
test_err("expected a real extent, got %llu", em->block_start);
|
||||
if (em->disk_bytenr >= EXTENT_MAP_LAST_BYTE) {
|
||||
test_err("expected a real extent, got %llu", em->disk_bytenr);
|
||||
goto out;
|
||||
}
|
||||
if (em->start != offset || em->len != sectorsize - 1) {
|
||||
@ -358,9 +358,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
test_err("unexpected flags set, want 0 have %u", em->flags);
|
||||
goto out;
|
||||
}
|
||||
if (em->orig_start != em->start) {
|
||||
test_err("wrong orig offset, want %llu, have %llu", em->start,
|
||||
em->orig_start);
|
||||
if (em->offset != 0) {
|
||||
test_err("wrong offset, want 0, have %llu", em->offset);
|
||||
goto out;
|
||||
}
|
||||
offset = em->start + em->len;
|
||||
@ -372,8 +371,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
}
|
||||
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
|
||||
test_err("expected a real extent, got %llu", em->block_start);
|
||||
if (em->disk_bytenr >= EXTENT_MAP_LAST_BYTE) {
|
||||
test_err("expected a real extent, got %llu", em->disk_bytenr);
|
||||
goto out;
|
||||
}
|
||||
if (em->start != offset || em->len != sectorsize) {
|
||||
@ -386,12 +385,11 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
test_err("unexpected flags set, want 0 have %u", em->flags);
|
||||
goto out;
|
||||
}
|
||||
if (em->orig_start != em->start) {
|
||||
test_err("wrong orig offset, want %llu, have %llu", em->start,
|
||||
em->orig_start);
|
||||
if (em->offset != 0) {
|
||||
test_err("wrong offset, want 0, have %llu", em->offset);
|
||||
goto out;
|
||||
}
|
||||
disk_bytenr = em->block_start;
|
||||
disk_bytenr = extent_map_block_start(em);
|
||||
orig_start = em->start;
|
||||
offset = em->start + em->len;
|
||||
free_extent_map(em);
|
||||
@ -401,8 +399,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
}
|
||||
if (em->block_start != EXTENT_MAP_HOLE) {
|
||||
test_err("expected a hole, got %llu", em->block_start);
|
||||
if (em->disk_bytenr != EXTENT_MAP_HOLE) {
|
||||
test_err("expected a hole, got %llu", em->disk_bytenr);
|
||||
goto out;
|
||||
}
|
||||
if (em->start != offset || em->len != sectorsize) {
|
||||
@ -423,8 +421,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
}
|
||||
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
|
||||
test_err("expected a real extent, got %llu", em->block_start);
|
||||
if (em->disk_bytenr >= EXTENT_MAP_LAST_BYTE) {
|
||||
test_err("expected a real extent, got %llu", em->disk_bytenr);
|
||||
goto out;
|
||||
}
|
||||
if (em->start != offset || em->len != 2 * sectorsize) {
|
||||
@ -437,15 +435,15 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
test_err("unexpected flags set, want 0 have %u", em->flags);
|
||||
goto out;
|
||||
}
|
||||
if (em->orig_start != orig_start) {
|
||||
test_err("wrong orig offset, want %llu, have %llu",
|
||||
orig_start, em->orig_start);
|
||||
if (em->start - em->offset != orig_start) {
|
||||
test_err("wrong offset, em->start=%llu em->offset=%llu orig_start=%llu",
|
||||
em->start, em->offset, orig_start);
|
||||
goto out;
|
||||
}
|
||||
disk_bytenr += (em->start - orig_start);
|
||||
if (em->block_start != disk_bytenr) {
|
||||
if (extent_map_block_start(em) != disk_bytenr) {
|
||||
test_err("wrong block start, want %llu, have %llu",
|
||||
disk_bytenr, em->block_start);
|
||||
disk_bytenr, extent_map_block_start(em));
|
||||
goto out;
|
||||
}
|
||||
offset = em->start + em->len;
|
||||
@ -457,8 +455,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
}
|
||||
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
|
||||
test_err("expected a real extent, got %llu", em->block_start);
|
||||
if (em->disk_bytenr >= EXTENT_MAP_LAST_BYTE) {
|
||||
test_err("expected a real extent, got %llu", em->disk_bytenr);
|
||||
goto out;
|
||||
}
|
||||
if (em->start != offset || em->len != sectorsize) {
|
||||
@ -472,9 +470,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
prealloc_only, em->flags);
|
||||
goto out;
|
||||
}
|
||||
if (em->orig_start != em->start) {
|
||||
test_err("wrong orig offset, want %llu, have %llu", em->start,
|
||||
em->orig_start);
|
||||
if (em->offset != 0) {
|
||||
test_err("wrong offset, want 0, have %llu", em->offset);
|
||||
goto out;
|
||||
}
|
||||
offset = em->start + em->len;
|
||||
@ -486,8 +483,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
}
|
||||
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
|
||||
test_err("expected a real extent, got %llu", em->block_start);
|
||||
if (em->disk_bytenr >= EXTENT_MAP_LAST_BYTE) {
|
||||
test_err("expected a real extent, got %llu", em->disk_bytenr);
|
||||
goto out;
|
||||
}
|
||||
if (em->start != offset || em->len != sectorsize) {
|
||||
@ -501,12 +498,11 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
prealloc_only, em->flags);
|
||||
goto out;
|
||||
}
|
||||
if (em->orig_start != em->start) {
|
||||
test_err("wrong orig offset, want %llu, have %llu", em->start,
|
||||
em->orig_start);
|
||||
if (em->offset != 0) {
|
||||
test_err("wrong offset, want 0, have %llu", em->offset);
|
||||
goto out;
|
||||
}
|
||||
disk_bytenr = em->block_start;
|
||||
disk_bytenr = extent_map_block_start(em);
|
||||
orig_start = em->start;
|
||||
offset = em->start + em->len;
|
||||
free_extent_map(em);
|
||||
@ -516,8 +512,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
}
|
||||
if (em->block_start >= EXTENT_MAP_HOLE) {
|
||||
test_err("expected a real extent, got %llu", em->block_start);
|
||||
if (em->disk_bytenr >= EXTENT_MAP_HOLE) {
|
||||
test_err("expected a real extent, got %llu", em->disk_bytenr);
|
||||
goto out;
|
||||
}
|
||||
if (em->start != offset || em->len != sectorsize) {
|
||||
@ -530,15 +526,14 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
test_err("unexpected flags set, want 0 have %u", em->flags);
|
||||
goto out;
|
||||
}
|
||||
if (em->orig_start != orig_start) {
|
||||
test_err("unexpected orig offset, wanted %llu, have %llu",
|
||||
orig_start, em->orig_start);
|
||||
if (em->start - em->offset != orig_start) {
|
||||
test_err("unexpected offset, wanted %llu, have %llu",
|
||||
em->start - orig_start, em->offset);
|
||||
goto out;
|
||||
}
|
||||
if (em->block_start != (disk_bytenr + (em->start - em->orig_start))) {
|
||||
if (extent_map_block_start(em) != disk_bytenr + em->offset) {
|
||||
test_err("unexpected block start, wanted %llu, have %llu",
|
||||
disk_bytenr + (em->start - em->orig_start),
|
||||
em->block_start);
|
||||
disk_bytenr + em->offset, extent_map_block_start(em));
|
||||
goto out;
|
||||
}
|
||||
offset = em->start + em->len;
|
||||
@ -549,8 +544,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
}
|
||||
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
|
||||
test_err("expected a real extent, got %llu", em->block_start);
|
||||
if (em->disk_bytenr >= EXTENT_MAP_LAST_BYTE) {
|
||||
test_err("expected a real extent, got %llu", em->disk_bytenr);
|
||||
goto out;
|
||||
}
|
||||
if (em->start != offset || em->len != 2 * sectorsize) {
|
||||
@ -564,15 +559,14 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
prealloc_only, em->flags);
|
||||
goto out;
|
||||
}
|
||||
if (em->orig_start != orig_start) {
|
||||
test_err("wrong orig offset, want %llu, have %llu", orig_start,
|
||||
em->orig_start);
|
||||
if (em->start - em->offset != orig_start) {
|
||||
test_err("wrong offset, em->start=%llu em->offset=%llu orig_start=%llu",
|
||||
em->start, em->offset, orig_start);
|
||||
goto out;
|
||||
}
|
||||
if (em->block_start != (disk_bytenr + (em->start - em->orig_start))) {
|
||||
if (extent_map_block_start(em) != disk_bytenr + em->offset) {
|
||||
test_err("unexpected block start, wanted %llu, have %llu",
|
||||
disk_bytenr + (em->start - em->orig_start),
|
||||
em->block_start);
|
||||
disk_bytenr + em->offset, extent_map_block_start(em));
|
||||
goto out;
|
||||
}
|
||||
offset = em->start + em->len;
|
||||
@ -584,8 +578,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
}
|
||||
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
|
||||
test_err("expected a real extent, got %llu", em->block_start);
|
||||
if (em->disk_bytenr >= EXTENT_MAP_LAST_BYTE) {
|
||||
test_err("expected a real extent, got %llu", em->disk_bytenr);
|
||||
goto out;
|
||||
}
|
||||
if (em->start != offset || em->len != 2 * sectorsize) {
|
||||
@ -599,9 +593,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
compressed_only, em->flags);
|
||||
goto out;
|
||||
}
|
||||
if (em->orig_start != em->start) {
|
||||
test_err("wrong orig offset, want %llu, have %llu",
|
||||
em->start, em->orig_start);
|
||||
if (em->offset != 0) {
|
||||
test_err("wrong offset, want 0, have %llu", em->offset);
|
||||
goto out;
|
||||
}
|
||||
if (extent_map_compression(em) != BTRFS_COMPRESS_ZLIB) {
|
||||
@ -618,8 +611,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
}
|
||||
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
|
||||
test_err("expected a real extent, got %llu", em->block_start);
|
||||
if (em->disk_bytenr >= EXTENT_MAP_LAST_BYTE) {
|
||||
test_err("expected a real extent, got %llu", em->disk_bytenr);
|
||||
goto out;
|
||||
}
|
||||
if (em->start != offset || em->len != sectorsize) {
|
||||
@ -633,9 +626,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
compressed_only, em->flags);
|
||||
goto out;
|
||||
}
|
||||
if (em->orig_start != em->start) {
|
||||
test_err("wrong orig offset, want %llu, have %llu",
|
||||
em->start, em->orig_start);
|
||||
if (em->offset != 0) {
|
||||
test_err("wrong offset, want 0, have %llu", em->offset);
|
||||
goto out;
|
||||
}
|
||||
if (extent_map_compression(em) != BTRFS_COMPRESS_ZLIB) {
|
||||
@ -643,7 +635,7 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
BTRFS_COMPRESS_ZLIB, extent_map_compression(em));
|
||||
goto out;
|
||||
}
|
||||
disk_bytenr = em->block_start;
|
||||
disk_bytenr = extent_map_block_start(em);
|
||||
orig_start = em->start;
|
||||
offset = em->start + em->len;
|
||||
free_extent_map(em);
|
||||
@ -653,8 +645,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
}
|
||||
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
|
||||
test_err("expected a real extent, got %llu", em->block_start);
|
||||
if (em->disk_bytenr >= EXTENT_MAP_LAST_BYTE) {
|
||||
test_err("expected a real extent, got %llu", em->disk_bytenr);
|
||||
goto out;
|
||||
}
|
||||
if (em->start != offset || em->len != sectorsize) {
|
||||
@ -667,9 +659,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
test_err("unexpected flags set, want 0 have %u", em->flags);
|
||||
goto out;
|
||||
}
|
||||
if (em->orig_start != em->start) {
|
||||
test_err("wrong orig offset, want %llu, have %llu", em->start,
|
||||
em->orig_start);
|
||||
if (em->offset != 0) {
|
||||
test_err("wrong offset, want 0, have %llu", em->offset);
|
||||
goto out;
|
||||
}
|
||||
offset = em->start + em->len;
|
||||
@ -680,9 +671,9 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
}
|
||||
if (em->block_start != disk_bytenr) {
|
||||
if (extent_map_block_start(em) != disk_bytenr) {
|
||||
test_err("block start does not match, want %llu got %llu",
|
||||
disk_bytenr, em->block_start);
|
||||
disk_bytenr, extent_map_block_start(em));
|
||||
goto out;
|
||||
}
|
||||
if (em->start != offset || em->len != 2 * sectorsize) {
|
||||
@ -696,9 +687,9 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
compressed_only, em->flags);
|
||||
goto out;
|
||||
}
|
||||
if (em->orig_start != orig_start) {
|
||||
test_err("wrong orig offset, want %llu, have %llu",
|
||||
em->start, orig_start);
|
||||
if (em->start - em->offset != orig_start) {
|
||||
test_err("wrong offset, em->start=%llu em->offset=%llu orig_start=%llu",
|
||||
em->start, em->offset, orig_start);
|
||||
goto out;
|
||||
}
|
||||
if (extent_map_compression(em) != BTRFS_COMPRESS_ZLIB) {
|
||||
@ -715,8 +706,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
}
|
||||
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
|
||||
test_err("expected a real extent, got %llu", em->block_start);
|
||||
if (em->disk_bytenr >= EXTENT_MAP_LAST_BYTE) {
|
||||
test_err("expected a real extent, got %llu", em->disk_bytenr);
|
||||
goto out;
|
||||
}
|
||||
if (em->start != offset || em->len != sectorsize) {
|
||||
@ -729,9 +720,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
test_err("unexpected flags set, want 0 have %u", em->flags);
|
||||
goto out;
|
||||
}
|
||||
if (em->orig_start != em->start) {
|
||||
test_err("wrong orig offset, want %llu, have %llu", em->start,
|
||||
em->orig_start);
|
||||
if (em->offset != 0) {
|
||||
test_err("wrong offset, want 0, have %llu", em->offset);
|
||||
goto out;
|
||||
}
|
||||
offset = em->start + em->len;
|
||||
@ -742,8 +732,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
}
|
||||
if (em->block_start != EXTENT_MAP_HOLE) {
|
||||
test_err("expected a hole extent, got %llu", em->block_start);
|
||||
if (em->disk_bytenr != EXTENT_MAP_HOLE) {
|
||||
test_err("expected a hole extent, got %llu", em->disk_bytenr);
|
||||
goto out;
|
||||
}
|
||||
/*
|
||||
@ -762,9 +752,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
vacancy_only, em->flags);
|
||||
goto out;
|
||||
}
|
||||
if (em->orig_start != em->start) {
|
||||
test_err("wrong orig offset, want %llu, have %llu", em->start,
|
||||
em->orig_start);
|
||||
if (em->offset != 0) {
|
||||
test_err("wrong offset, want 0, have %llu", em->offset);
|
||||
goto out;
|
||||
}
|
||||
offset = em->start + em->len;
|
||||
@ -775,8 +764,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
}
|
||||
if (em->block_start >= EXTENT_MAP_LAST_BYTE) {
|
||||
test_err("expected a real extent, got %llu", em->block_start);
|
||||
if (em->disk_bytenr >= EXTENT_MAP_LAST_BYTE) {
|
||||
test_err("expected a real extent, got %llu", em->disk_bytenr);
|
||||
goto out;
|
||||
}
|
||||
if (em->start != offset || em->len != sectorsize) {
|
||||
@ -789,9 +778,8 @@ static noinline int test_btrfs_get_extent(u32 sectorsize, u32 nodesize)
|
||||
test_err("unexpected flags set, want 0 have %u", em->flags);
|
||||
goto out;
|
||||
}
|
||||
if (em->orig_start != em->start) {
|
||||
test_err("wrong orig offset, want %llu, have %llu", em->start,
|
||||
em->orig_start);
|
||||
if (em->offset != 0) {
|
||||
test_err("wrong orig offset, want 0, have %llu", em->offset);
|
||||
goto out;
|
||||
}
|
||||
ret = 0;
|
||||
@ -855,8 +843,8 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
}
|
||||
if (em->block_start != EXTENT_MAP_HOLE) {
|
||||
test_err("expected a hole, got %llu", em->block_start);
|
||||
if (em->disk_bytenr != EXTENT_MAP_HOLE) {
|
||||
test_err("expected a hole, got %llu", em->disk_bytenr);
|
||||
goto out;
|
||||
}
|
||||
if (em->start != 0 || em->len != sectorsize) {
|
||||
@ -877,8 +865,8 @@ static int test_hole_first(u32 sectorsize, u32 nodesize)
|
||||
test_err("got an error when we shouldn't have");
|
||||
goto out;
|
||||
}
|
||||
if (em->block_start != sectorsize) {
|
||||
test_err("expected a real extent, got %llu", em->block_start);
|
||||
if (extent_map_block_start(em) != sectorsize) {
|
||||
test_err("expected a real extent, got %llu", extent_map_block_start(em));
|
||||
goto out;
|
||||
}
|
||||
if (em->start != sectorsize || em->len != sectorsize) {
|
||||
|
@ -405,7 +405,7 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans,
|
||||
int ret = 0;
|
||||
|
||||
if ((test_bit(BTRFS_ROOT_SHAREABLE, &root->state) &&
|
||||
root->last_trans < trans->transid) || force) {
|
||||
btrfs_get_root_last_trans(root) < trans->transid) || force) {
|
||||
WARN_ON(!force && root->commit_root != root->node);
|
||||
|
||||
/*
|
||||
@ -421,7 +421,7 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans,
|
||||
smp_wmb();
|
||||
|
||||
spin_lock(&fs_info->fs_roots_radix_lock);
|
||||
if (root->last_trans == trans->transid && !force) {
|
||||
if (btrfs_get_root_last_trans(root) == trans->transid && !force) {
|
||||
spin_unlock(&fs_info->fs_roots_radix_lock);
|
||||
return 0;
|
||||
}
|
||||
@ -429,7 +429,7 @@ static int record_root_in_trans(struct btrfs_trans_handle *trans,
|
||||
(unsigned long)btrfs_root_id(root),
|
||||
BTRFS_ROOT_TRANS_TAG);
|
||||
spin_unlock(&fs_info->fs_roots_radix_lock);
|
||||
root->last_trans = trans->transid;
|
||||
btrfs_set_root_last_trans(root, trans->transid);
|
||||
|
||||
/* this is pretty tricky. We don't want to
|
||||
* take the relocation lock in btrfs_record_root_in_trans
|
||||
@ -491,7 +491,7 @@ int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
|
||||
* and barriers
|
||||
*/
|
||||
smp_rmb();
|
||||
if (root->last_trans == trans->transid &&
|
||||
if (btrfs_get_root_last_trans(root) == trans->transid &&
|
||||
!test_bit(BTRFS_ROOT_IN_TRANS_SETUP, &root->state))
|
||||
return 0;
|
||||
|
||||
@ -1637,7 +1637,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_root *root = pending->root;
|
||||
struct btrfs_root *parent_root;
|
||||
struct btrfs_block_rsv *rsv;
|
||||
struct inode *parent_inode = pending->dir;
|
||||
struct inode *parent_inode = &pending->dir->vfs_inode;
|
||||
struct btrfs_path *path;
|
||||
struct btrfs_dir_item *dir_item;
|
||||
struct extent_buffer *tmp;
|
||||
@ -1989,6 +1989,25 @@ void btrfs_commit_transaction_async(struct btrfs_trans_handle *trans)
|
||||
btrfs_put_transaction(cur_trans);
|
||||
}
|
||||
|
||||
/*
|
||||
* If there is a running transaction commit it or if it's already committing,
|
||||
* wait for its commit to complete. Does not start and commit a new transaction
|
||||
* if there isn't any running.
|
||||
*/
|
||||
int btrfs_commit_current_transaction(struct btrfs_root *root)
|
||||
{
|
||||
struct btrfs_trans_handle *trans;
|
||||
|
||||
trans = btrfs_attach_transaction_barrier(root);
|
||||
if (IS_ERR(trans)) {
|
||||
int ret = PTR_ERR(trans);
|
||||
|
||||
return (ret == -ENOENT) ? 0 : ret;
|
||||
}
|
||||
|
||||
return btrfs_commit_transaction(trans);
|
||||
}
|
||||
|
||||
static void cleanup_transaction(struct btrfs_trans_handle *trans, int err)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
@ -2110,7 +2129,7 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
|
||||
static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
if (btrfs_test_opt(fs_info, FLUSHONCOMMIT))
|
||||
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
|
||||
btrfs_wait_ordered_roots(fs_info, U64_MAX, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -172,7 +172,7 @@ struct btrfs_trans_handle {
|
||||
|
||||
struct btrfs_pending_snapshot {
|
||||
struct dentry *dentry;
|
||||
struct inode *dir;
|
||||
struct btrfs_inode *dir;
|
||||
struct btrfs_root *root;
|
||||
struct btrfs_root_item *root_item;
|
||||
struct btrfs_root *snap;
|
||||
@ -229,11 +229,11 @@ bool __cold abort_should_print_stack(int error);
|
||||
*/
|
||||
#define btrfs_abort_transaction(trans, error) \
|
||||
do { \
|
||||
bool first = false; \
|
||||
bool __first = false; \
|
||||
/* Report first abort since mount */ \
|
||||
if (!test_and_set_bit(BTRFS_FS_STATE_TRANS_ABORTED, \
|
||||
&((trans)->fs_info->fs_state))) { \
|
||||
first = true; \
|
||||
__first = true; \
|
||||
if (WARN(abort_should_print_stack(error), \
|
||||
KERN_ERR \
|
||||
"BTRFS: Transaction aborted (error %d)\n", \
|
||||
@ -246,7 +246,7 @@ do { \
|
||||
} \
|
||||
} \
|
||||
__btrfs_abort_transaction((trans), __func__, \
|
||||
__LINE__, (error), first); \
|
||||
__LINE__, (error), __first); \
|
||||
} while (0)
|
||||
|
||||
int btrfs_end_transaction(struct btrfs_trans_handle *trans);
|
||||
@ -268,6 +268,7 @@ void btrfs_maybe_wake_unfinished_drop(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_clean_one_deleted_snapshot(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_commit_transaction(struct btrfs_trans_handle *trans);
|
||||
void btrfs_commit_transaction_async(struct btrfs_trans_handle *trans);
|
||||
int btrfs_commit_current_transaction(struct btrfs_root *root);
|
||||
int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans);
|
||||
bool btrfs_should_end_transaction(struct btrfs_trans_handle *trans);
|
||||
void btrfs_throttle(struct btrfs_fs_info *fs_info);
|
||||
|
@ -340,6 +340,24 @@ static int check_extent_data_item(struct extent_buffer *leaf,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* For non-compressed data extents, ram_bytes should match its
|
||||
* disk_num_bytes.
|
||||
* However we do not really utilize ram_bytes in this case, so this check
|
||||
* is only optional for DEBUG builds for developers to catch the
|
||||
* unexpected behaviors.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_BTRFS_DEBUG) &&
|
||||
btrfs_file_extent_compression(leaf, fi) == BTRFS_COMPRESS_NONE &&
|
||||
btrfs_file_extent_disk_bytenr(leaf, fi)) {
|
||||
if (WARN_ON(btrfs_file_extent_ram_bytes(leaf, fi) !=
|
||||
btrfs_file_extent_disk_num_bytes(leaf, fi)))
|
||||
file_extent_err(leaf, slot,
|
||||
"mismatch ram_bytes (%llu) and disk_num_bytes (%llu) for non-compressed extent",
|
||||
btrfs_file_extent_ram_bytes(leaf, fi),
|
||||
btrfs_file_extent_disk_num_bytes(leaf, fi));
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1682,9 +1700,6 @@ static int check_inode_ref(struct extent_buffer *leaf,
|
||||
static int check_raid_stripe_extent(const struct extent_buffer *leaf,
|
||||
const struct btrfs_key *key, int slot)
|
||||
{
|
||||
struct btrfs_stripe_extent *stripe_extent =
|
||||
btrfs_item_ptr(leaf, slot, struct btrfs_stripe_extent);
|
||||
|
||||
if (unlikely(!IS_ALIGNED(key->objectid, leaf->fs_info->sectorsize))) {
|
||||
generic_err(leaf, slot,
|
||||
"invalid key objectid for raid stripe extent, have %llu expect aligned to %u",
|
||||
@ -1698,22 +1713,6 @@ static int check_raid_stripe_extent(const struct extent_buffer *leaf,
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
switch (btrfs_stripe_extent_encoding(leaf, stripe_extent)) {
|
||||
case BTRFS_STRIPE_RAID0:
|
||||
case BTRFS_STRIPE_RAID1:
|
||||
case BTRFS_STRIPE_DUP:
|
||||
case BTRFS_STRIPE_RAID10:
|
||||
case BTRFS_STRIPE_RAID5:
|
||||
case BTRFS_STRIPE_RAID6:
|
||||
case BTRFS_STRIPE_RAID1C3:
|
||||
case BTRFS_STRIPE_RAID1C4:
|
||||
break;
|
||||
default:
|
||||
generic_err(leaf, slot, "invalid raid stripe encoding %u",
|
||||
btrfs_stripe_extent_encoding(leaf, stripe_extent));
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -151,7 +151,7 @@ static struct inode *btrfs_iget_logging(u64 objectid, struct btrfs_root *root)
|
||||
* attempt a transaction commit, resulting in a deadlock.
|
||||
*/
|
||||
nofs_flag = memalloc_nofs_save();
|
||||
inode = btrfs_iget(root->fs_info->sb, objectid, root);
|
||||
inode = btrfs_iget(objectid, root);
|
||||
memalloc_nofs_restore(nofs_flag);
|
||||
|
||||
return inode;
|
||||
@ -1644,7 +1644,8 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
BTRFS_I(inode)->index_cnt = (u64)-1;
|
||||
if (S_ISDIR(inode->i_mode))
|
||||
BTRFS_I(inode)->index_cnt = (u64)-1;
|
||||
|
||||
if (inode->i_nlink == 0) {
|
||||
if (S_ISDIR(inode->i_mode)) {
|
||||
@ -2839,7 +2840,7 @@ static void wait_for_writer(struct btrfs_root *root)
|
||||
finish_wait(&root->log_writer_wait, &wait);
|
||||
}
|
||||
|
||||
void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx, struct inode *inode)
|
||||
void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx, struct btrfs_inode *inode)
|
||||
{
|
||||
ctx->log_ret = 0;
|
||||
ctx->log_transid = 0;
|
||||
@ -2858,7 +2859,7 @@ void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx, struct inode *inode)
|
||||
|
||||
void btrfs_init_log_ctx_scratch_eb(struct btrfs_log_ctx *ctx)
|
||||
{
|
||||
struct btrfs_inode *inode = BTRFS_I(ctx->inode);
|
||||
struct btrfs_inode *inode = ctx->inode;
|
||||
|
||||
if (!test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags) &&
|
||||
!test_bit(BTRFS_INODE_COPY_EVERYTHING, &inode->runtime_flags))
|
||||
@ -2876,7 +2877,7 @@ void btrfs_release_log_ctx_extents(struct btrfs_log_ctx *ctx)
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
struct btrfs_ordered_extent *tmp;
|
||||
|
||||
ASSERT(inode_is_locked(ctx->inode));
|
||||
ASSERT(inode_is_locked(&ctx->inode->vfs_inode));
|
||||
|
||||
list_for_each_entry_safe(ordered, tmp, &ctx->ordered_extents, log_list) {
|
||||
list_del_init(&ordered->log_list);
|
||||
@ -4253,8 +4254,10 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_inode *inode, bool inode_item_dropped)
|
||||
{
|
||||
struct btrfs_inode_item *inode_item;
|
||||
struct btrfs_key key;
|
||||
int ret;
|
||||
|
||||
btrfs_get_inode_key(inode, &key);
|
||||
/*
|
||||
* If we are doing a fast fsync and the inode was logged before in the
|
||||
* current transaction, then we know the inode was previously logged and
|
||||
@ -4266,7 +4269,7 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
|
||||
* already exists can also result in unnecessarily splitting a leaf.
|
||||
*/
|
||||
if (!inode_item_dropped && inode->logged_trans == trans->transid) {
|
||||
ret = btrfs_search_slot(trans, log, &inode->location, path, 0, 1);
|
||||
ret = btrfs_search_slot(trans, log, &key, path, 0, 1);
|
||||
ASSERT(ret <= 0);
|
||||
if (ret > 0)
|
||||
ret = -ENOENT;
|
||||
@ -4280,7 +4283,7 @@ static int log_inode_item(struct btrfs_trans_handle *trans,
|
||||
* the inode, we set BTRFS_INODE_NEEDS_FULL_SYNC on its runtime
|
||||
* flags and set ->logged_trans to 0.
|
||||
*/
|
||||
ret = btrfs_insert_empty_item(trans, log, path, &inode->location,
|
||||
ret = btrfs_insert_empty_item(trans, log, path, &key,
|
||||
sizeof(*inode_item));
|
||||
ASSERT(ret != -EEXIST);
|
||||
}
|
||||
@ -4594,6 +4597,7 @@ static int log_extent_csums(struct btrfs_trans_handle *trans,
|
||||
{
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
struct btrfs_root *csum_root;
|
||||
u64 block_start;
|
||||
u64 csum_offset;
|
||||
u64 csum_len;
|
||||
u64 mod_start = em->start;
|
||||
@ -4603,7 +4607,7 @@ static int log_extent_csums(struct btrfs_trans_handle *trans,
|
||||
|
||||
if (inode->flags & BTRFS_INODE_NODATASUM ||
|
||||
(em->flags & EXTENT_FLAG_PREALLOC) ||
|
||||
em->block_start == EXTENT_MAP_HOLE)
|
||||
em->disk_bytenr == EXTENT_MAP_HOLE)
|
||||
return 0;
|
||||
|
||||
list_for_each_entry(ordered, &ctx->ordered_extents, log_list) {
|
||||
@ -4667,17 +4671,18 @@ static int log_extent_csums(struct btrfs_trans_handle *trans,
|
||||
/* If we're compressed we have to save the entire range of csums. */
|
||||
if (extent_map_is_compressed(em)) {
|
||||
csum_offset = 0;
|
||||
csum_len = max(em->block_len, em->orig_block_len);
|
||||
csum_len = em->disk_num_bytes;
|
||||
} else {
|
||||
csum_offset = mod_start - em->start;
|
||||
csum_len = mod_len;
|
||||
}
|
||||
|
||||
/* block start is already adjusted for the file extent offset. */
|
||||
csum_root = btrfs_csum_root(trans->fs_info, em->block_start);
|
||||
ret = btrfs_lookup_csums_list(csum_root, em->block_start + csum_offset,
|
||||
em->block_start + csum_offset +
|
||||
csum_len - 1, &ordered_sums, false);
|
||||
block_start = extent_map_block_start(em);
|
||||
csum_root = btrfs_csum_root(trans->fs_info, block_start);
|
||||
ret = btrfs_lookup_csums_list(csum_root, block_start + csum_offset,
|
||||
block_start + csum_offset + csum_len - 1,
|
||||
&ordered_sums, false);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ret = 0;
|
||||
@ -4707,7 +4712,8 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
|
||||
struct extent_buffer *leaf;
|
||||
struct btrfs_key key;
|
||||
enum btrfs_compression_type compress_type;
|
||||
u64 extent_offset = em->start - em->orig_start;
|
||||
u64 extent_offset = em->offset;
|
||||
u64 block_start = extent_map_block_start(em);
|
||||
u64 block_len;
|
||||
int ret;
|
||||
|
||||
@ -4717,14 +4723,13 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
|
||||
else
|
||||
btrfs_set_stack_file_extent_type(&fi, BTRFS_FILE_EXTENT_REG);
|
||||
|
||||
block_len = max(em->block_len, em->orig_block_len);
|
||||
block_len = em->disk_num_bytes;
|
||||
compress_type = extent_map_compression(em);
|
||||
if (compress_type != BTRFS_COMPRESS_NONE) {
|
||||
btrfs_set_stack_file_extent_disk_bytenr(&fi, em->block_start);
|
||||
btrfs_set_stack_file_extent_disk_bytenr(&fi, block_start);
|
||||
btrfs_set_stack_file_extent_disk_num_bytes(&fi, block_len);
|
||||
} else if (em->block_start < EXTENT_MAP_LAST_BYTE) {
|
||||
btrfs_set_stack_file_extent_disk_bytenr(&fi, em->block_start -
|
||||
extent_offset);
|
||||
} else if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE) {
|
||||
btrfs_set_stack_file_extent_disk_bytenr(&fi, block_start - extent_offset);
|
||||
btrfs_set_stack_file_extent_disk_num_bytes(&fi, block_len);
|
||||
}
|
||||
|
||||
@ -5927,7 +5932,7 @@ again:
|
||||
if (ret < 0) {
|
||||
return ret;
|
||||
} else if (ret > 0 &&
|
||||
other_ino != btrfs_ino(BTRFS_I(ctx->inode))) {
|
||||
other_ino != btrfs_ino(ctx->inode)) {
|
||||
if (ins_nr > 0) {
|
||||
ins_nr++;
|
||||
} else {
|
||||
@ -7073,6 +7078,15 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
|
||||
goto end_no_trans;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we're logging an inode from a subvolume created in the current
|
||||
* transaction we must force a commit since the root is not persisted.
|
||||
*/
|
||||
if (btrfs_root_generation(&root->root_item) == trans->transid) {
|
||||
ret = BTRFS_LOG_FORCE_COMMIT;
|
||||
goto end_no_trans;
|
||||
}
|
||||
|
||||
/*
|
||||
* Skip already logged inodes or inodes corresponding to tmpfiles
|
||||
* (since logging them is pointless, a link count of 0 means they
|
||||
@ -7453,6 +7467,24 @@ void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
|
||||
mutex_unlock(&dir->log_mutex);
|
||||
}
|
||||
|
||||
/*
|
||||
* Call this when creating a subvolume in a directory.
|
||||
* Because we don't commit a transaction when creating a subvolume, we can't
|
||||
* allow the directory pointing to the subvolume to be logged with an entry that
|
||||
* points to an unpersisted root if we are still in the transaction used to
|
||||
* create the subvolume, so make any attempt to log the directory to result in a
|
||||
* full log sync.
|
||||
* Also we don't need to worry with renames, since btrfs_rename() marks the log
|
||||
* for full commit when renaming a subvolume.
|
||||
*/
|
||||
void btrfs_record_new_subvolume(const struct btrfs_trans_handle *trans,
|
||||
struct btrfs_inode *dir)
|
||||
{
|
||||
mutex_lock(&dir->log_mutex);
|
||||
dir->last_unlink_trans = trans->transid;
|
||||
mutex_unlock(&dir->log_mutex);
|
||||
}
|
||||
|
||||
/*
|
||||
* Update the log after adding a new name for an inode.
|
||||
*
|
||||
@ -7585,7 +7617,7 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
|
||||
goto out;
|
||||
}
|
||||
|
||||
btrfs_init_log_ctx(&ctx, &inode->vfs_inode);
|
||||
btrfs_init_log_ctx(&ctx, inode);
|
||||
ctx.logging_new_name = true;
|
||||
btrfs_init_log_ctx_scratch_eb(&ctx);
|
||||
/*
|
||||
|
@ -37,7 +37,7 @@ struct btrfs_log_ctx {
|
||||
bool logging_new_delayed_dentries;
|
||||
/* Indicate if the inode being logged was logged before. */
|
||||
bool logged_before;
|
||||
struct inode *inode;
|
||||
struct btrfs_inode *inode;
|
||||
struct list_head list;
|
||||
/* Only used for fast fsyncs. */
|
||||
struct list_head ordered_extents;
|
||||
@ -55,7 +55,7 @@ struct btrfs_log_ctx {
|
||||
struct extent_buffer *scratch_eb;
|
||||
};
|
||||
|
||||
void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx, struct inode *inode);
|
||||
void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx, struct btrfs_inode *inode);
|
||||
void btrfs_init_log_ctx_scratch_eb(struct btrfs_log_ctx *ctx);
|
||||
void btrfs_release_log_ctx_extents(struct btrfs_log_ctx *ctx);
|
||||
|
||||
@ -94,6 +94,8 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
|
||||
bool for_rename);
|
||||
void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_inode *dir);
|
||||
void btrfs_record_new_subvolume(const struct btrfs_trans_handle *trans,
|
||||
struct btrfs_inode *dir);
|
||||
void btrfs_log_new_name(struct btrfs_trans_handle *trans,
|
||||
struct dentry *old_dentry, struct btrfs_inode *old_dir,
|
||||
u64 old_dir_index, struct dentry *parent);
|
||||
|
@ -50,6 +50,7 @@ void ulist_init(struct ulist *ulist)
|
||||
INIT_LIST_HEAD(&ulist->nodes);
|
||||
ulist->root = RB_ROOT;
|
||||
ulist->nnodes = 0;
|
||||
ulist->prealloc = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -68,6 +69,8 @@ void ulist_release(struct ulist *ulist)
|
||||
list_for_each_entry_safe(node, next, &ulist->nodes, list) {
|
||||
kfree(node);
|
||||
}
|
||||
kfree(ulist->prealloc);
|
||||
ulist->prealloc = NULL;
|
||||
ulist->root = RB_ROOT;
|
||||
INIT_LIST_HEAD(&ulist->nodes);
|
||||
}
|
||||
@ -105,6 +108,12 @@ struct ulist *ulist_alloc(gfp_t gfp_mask)
|
||||
return ulist;
|
||||
}
|
||||
|
||||
void ulist_prealloc(struct ulist *ulist, gfp_t gfp_mask)
|
||||
{
|
||||
if (!ulist->prealloc)
|
||||
ulist->prealloc = kzalloc(sizeof(*ulist->prealloc), gfp_mask);
|
||||
}
|
||||
|
||||
/*
|
||||
* Free dynamically allocated ulist.
|
||||
*
|
||||
@ -206,9 +215,15 @@ int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
|
||||
*old_aux = node->aux;
|
||||
return 0;
|
||||
}
|
||||
node = kmalloc(sizeof(*node), gfp_mask);
|
||||
if (!node)
|
||||
return -ENOMEM;
|
||||
|
||||
if (ulist->prealloc) {
|
||||
node = ulist->prealloc;
|
||||
ulist->prealloc = NULL;
|
||||
} else {
|
||||
node = kmalloc(sizeof(*node), gfp_mask);
|
||||
if (!node)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
node->val = val;
|
||||
node->aux = aux;
|
||||
|
@ -41,12 +41,14 @@ struct ulist {
|
||||
|
||||
struct list_head nodes;
|
||||
struct rb_root root;
|
||||
struct ulist_node *prealloc;
|
||||
};
|
||||
|
||||
void ulist_init(struct ulist *ulist);
|
||||
void ulist_release(struct ulist *ulist);
|
||||
void ulist_reinit(struct ulist *ulist);
|
||||
struct ulist *ulist_alloc(gfp_t gfp_mask);
|
||||
void ulist_prealloc(struct ulist *ulist, gfp_t mask);
|
||||
void ulist_free(struct ulist *ulist);
|
||||
int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask);
|
||||
int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
|
||||
|
@ -13,7 +13,7 @@
|
||||
#include "accessors.h"
|
||||
#include "uuid-tree.h"
|
||||
|
||||
static void btrfs_uuid_to_key(u8 *uuid, u8 type, struct btrfs_key *key)
|
||||
static void btrfs_uuid_to_key(const u8 *uuid, u8 type, struct btrfs_key *key)
|
||||
{
|
||||
key->type = type;
|
||||
key->objectid = get_unaligned_le64(uuid);
|
||||
@ -21,7 +21,7 @@ static void btrfs_uuid_to_key(u8 *uuid, u8 type, struct btrfs_key *key)
|
||||
}
|
||||
|
||||
/* return -ENOENT for !found, < 0 for errors, or 0 if an item was found */
|
||||
static int btrfs_uuid_tree_lookup(struct btrfs_root *uuid_root, u8 *uuid,
|
||||
static int btrfs_uuid_tree_lookup(struct btrfs_root *uuid_root, const u8 *uuid,
|
||||
u8 type, u64 subid)
|
||||
{
|
||||
int ret;
|
||||
@ -81,7 +81,7 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
|
||||
int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, const u8 *uuid, u8 type,
|
||||
u64 subid_cpu)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
@ -145,7 +145,7 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
|
||||
int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, const u8 *uuid, u8 type,
|
||||
u64 subid)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = trans->fs_info;
|
||||
@ -256,7 +256,7 @@ out:
|
||||
* < 0 if an error occurred
|
||||
*/
|
||||
static int btrfs_check_uuid_tree_entry(struct btrfs_fs_info *fs_info,
|
||||
u8 *uuid, u8 type, u64 subvolid)
|
||||
const u8 *uuid, u8 type, u64 subvolid)
|
||||
{
|
||||
int ret = 0;
|
||||
struct btrfs_root *subvol_root;
|
||||
|
@ -8,9 +8,9 @@
|
||||
struct btrfs_trans_handle;
|
||||
struct btrfs_fs_info;
|
||||
|
||||
int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
|
||||
int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans, const u8 *uuid, u8 type,
|
||||
u64 subid);
|
||||
int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, u8 *uuid, u8 type,
|
||||
int btrfs_uuid_tree_remove(struct btrfs_trans_handle *trans, const u8 *uuid, u8 type,
|
||||
u64 subid);
|
||||
int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info);
|
||||
|
||||
|
@ -722,7 +722,7 @@ error_free_page:
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
u8 *btrfs_sb_fsid_ptr(struct btrfs_super_block *sb)
|
||||
const u8 *btrfs_sb_fsid_ptr(const struct btrfs_super_block *sb)
|
||||
{
|
||||
bool has_metadata_uuid = (btrfs_super_incompat_flags(sb) &
|
||||
BTRFS_FEATURE_INCOMPAT_METADATA_UUID);
|
||||
@ -1380,19 +1380,12 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
|
||||
bool new_device_added = false;
|
||||
struct btrfs_device *device = NULL;
|
||||
struct file *bdev_file;
|
||||
u64 bytenr, bytenr_orig;
|
||||
u64 bytenr;
|
||||
dev_t devt;
|
||||
int ret;
|
||||
|
||||
lockdep_assert_held(&uuid_mutex);
|
||||
|
||||
/*
|
||||
* we would like to check all the supers, but that would make
|
||||
* a btrfs mount succeed after a mkfs from a different FS.
|
||||
* So, we need to add a special mount option to scan for
|
||||
* later supers, using BTRFS_SUPER_MIRROR_MAX instead
|
||||
*/
|
||||
|
||||
/*
|
||||
* Avoid an exclusive open here, as the systemd-udev may initiate the
|
||||
* device scan which may race with the user's mount or mkfs command,
|
||||
@ -1407,7 +1400,12 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
|
||||
if (IS_ERR(bdev_file))
|
||||
return ERR_CAST(bdev_file);
|
||||
|
||||
bytenr_orig = btrfs_sb_offset(0);
|
||||
/*
|
||||
* We would like to check all the super blocks, but doing so would
|
||||
* allow a mount to succeed after a mkfs from a different filesystem.
|
||||
* Currently, recovery from a bad primary btrfs superblock is done
|
||||
* using the userspace command 'btrfs check --super'.
|
||||
*/
|
||||
ret = btrfs_sb_log_location_bdev(file_bdev(bdev_file), 0, READ, &bytenr);
|
||||
if (ret) {
|
||||
device = ERR_PTR(ret);
|
||||
@ -1415,7 +1413,7 @@ struct btrfs_device *btrfs_scan_one_device(const char *path, blk_mode_t flags,
|
||||
}
|
||||
|
||||
disk_super = btrfs_read_disk_super(file_bdev(bdev_file), bytenr,
|
||||
bytenr_orig);
|
||||
btrfs_sb_offset(0));
|
||||
if (IS_ERR(disk_super)) {
|
||||
device = ERR_CAST(disk_super);
|
||||
goto error_bdev_put;
|
||||
@ -2991,16 +2989,19 @@ static int btrfs_free_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset)
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
else if (ret > 0) { /* Logic error or corruption */
|
||||
btrfs_handle_fs_error(fs_info, -ENOENT,
|
||||
"Failed lookup while freeing chunk.");
|
||||
ret = -ENOENT;
|
||||
btrfs_err(fs_info, "failed to lookup chunk %llu when freeing",
|
||||
chunk_offset);
|
||||
btrfs_abort_transaction(trans, -ENOENT);
|
||||
ret = -EUCLEAN;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = btrfs_del_item(trans, root, path);
|
||||
if (ret < 0)
|
||||
btrfs_handle_fs_error(fs_info, ret,
|
||||
"Failed to delete chunk item.");
|
||||
if (ret < 0) {
|
||||
btrfs_err(fs_info, "failed to delete chunk %llu item", chunk_offset);
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out;
|
||||
}
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
@ -5628,8 +5629,6 @@ static struct btrfs_block_group *create_chunk(struct btrfs_trans_handle *trans,
|
||||
u64 start = ctl->start;
|
||||
u64 type = ctl->type;
|
||||
int ret;
|
||||
int i;
|
||||
int j;
|
||||
|
||||
map = btrfs_alloc_chunk_map(ctl->num_stripes, GFP_NOFS);
|
||||
if (!map)
|
||||
@ -5644,8 +5643,8 @@ static struct btrfs_block_group *create_chunk(struct btrfs_trans_handle *trans,
|
||||
map->sub_stripes = ctl->sub_stripes;
|
||||
map->num_stripes = ctl->num_stripes;
|
||||
|
||||
for (i = 0; i < ctl->ndevs; ++i) {
|
||||
for (j = 0; j < ctl->dev_stripes; ++j) {
|
||||
for (int i = 0; i < ctl->ndevs; i++) {
|
||||
for (int j = 0; j < ctl->dev_stripes; j++) {
|
||||
int s = i * ctl->dev_stripes + j;
|
||||
map->stripes[s].dev = devices_info[i].dev;
|
||||
map->stripes[s].physical = devices_info[i].dev_offset +
|
||||
@ -6288,20 +6287,19 @@ static bool is_block_group_to_copy(struct btrfs_fs_info *fs_info, u64 logical)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void handle_ops_on_dev_replace(enum btrfs_map_op op,
|
||||
struct btrfs_io_context *bioc,
|
||||
static void handle_ops_on_dev_replace(struct btrfs_io_context *bioc,
|
||||
struct btrfs_dev_replace *dev_replace,
|
||||
u64 logical,
|
||||
int *num_stripes_ret, int *max_errors_ret)
|
||||
struct btrfs_io_geometry *io_geom)
|
||||
{
|
||||
u64 srcdev_devid = dev_replace->srcdev->devid;
|
||||
/*
|
||||
* At this stage, num_stripes is still the real number of stripes,
|
||||
* excluding the duplicated stripes.
|
||||
*/
|
||||
int num_stripes = *num_stripes_ret;
|
||||
int num_stripes = io_geom->num_stripes;
|
||||
int max_errors = io_geom->max_errors;
|
||||
int nr_extra_stripes = 0;
|
||||
int max_errors = *max_errors_ret;
|
||||
int i;
|
||||
|
||||
/*
|
||||
@ -6342,7 +6340,7 @@ static void handle_ops_on_dev_replace(enum btrfs_map_op op,
|
||||
* replace.
|
||||
* If we have 2 extra stripes, only choose the one with smaller physical.
|
||||
*/
|
||||
if (op == BTRFS_MAP_GET_READ_MIRRORS && nr_extra_stripes == 2) {
|
||||
if (io_geom->op == BTRFS_MAP_GET_READ_MIRRORS && nr_extra_stripes == 2) {
|
||||
struct btrfs_io_stripe *first = &bioc->stripes[num_stripes];
|
||||
struct btrfs_io_stripe *second = &bioc->stripes[num_stripes + 1];
|
||||
|
||||
@ -6360,8 +6358,8 @@ static void handle_ops_on_dev_replace(enum btrfs_map_op op,
|
||||
}
|
||||
}
|
||||
|
||||
*num_stripes_ret = num_stripes + nr_extra_stripes;
|
||||
*max_errors_ret = max_errors + nr_extra_stripes;
|
||||
io_geom->num_stripes = num_stripes + nr_extra_stripes;
|
||||
io_geom->max_errors = max_errors + nr_extra_stripes;
|
||||
bioc->replace_nr_stripes = nr_extra_stripes;
|
||||
}
|
||||
|
||||
@ -6624,7 +6622,6 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
|
||||
struct btrfs_chunk_map *map;
|
||||
struct btrfs_io_geometry io_geom = { 0 };
|
||||
u64 map_offset;
|
||||
int i;
|
||||
int ret = 0;
|
||||
int num_copies;
|
||||
struct btrfs_io_context *bioc = NULL;
|
||||
@ -6770,7 +6767,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
|
||||
* For all other non-RAID56 profiles, just copy the target
|
||||
* stripe into the bioc.
|
||||
*/
|
||||
for (i = 0; i < io_geom.num_stripes; i++) {
|
||||
for (int i = 0; i < io_geom.num_stripes; i++) {
|
||||
ret = set_io_stripe(fs_info, logical, length,
|
||||
&bioc->stripes[i], map, &io_geom);
|
||||
if (ret < 0)
|
||||
@ -6790,8 +6787,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
|
||||
|
||||
if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL &&
|
||||
op != BTRFS_MAP_READ) {
|
||||
handle_ops_on_dev_replace(op, bioc, dev_replace, logical,
|
||||
&io_geom.num_stripes, &io_geom.max_errors);
|
||||
handle_ops_on_dev_replace(bioc, dev_replace, logical, &io_geom);
|
||||
}
|
||||
|
||||
*bioc_ret = bioc;
|
||||
|
@ -834,6 +834,6 @@ int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info);
|
||||
bool btrfs_repair_one_zone(struct btrfs_fs_info *fs_info, u64 logical);
|
||||
|
||||
bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr);
|
||||
u8 *btrfs_sb_fsid_ptr(struct btrfs_super_block *sb);
|
||||
const u8 *btrfs_sb_fsid_ptr(const struct btrfs_super_block *sb);
|
||||
|
||||
#endif
|
||||
|
@ -24,7 +24,7 @@
|
||||
#include "accessors.h"
|
||||
#include "dir-item.h"
|
||||
|
||||
int btrfs_getxattr(struct inode *inode, const char *name,
|
||||
int btrfs_getxattr(const struct inode *inode, const char *name,
|
||||
void *buffer, size_t size)
|
||||
{
|
||||
struct btrfs_dir_item *di;
|
||||
@ -451,7 +451,7 @@ static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler,
|
||||
if (IS_ERR(trans))
|
||||
return PTR_ERR(trans);
|
||||
|
||||
ret = btrfs_set_prop(trans, inode, name, value, size, flags);
|
||||
ret = btrfs_set_prop(trans, BTRFS_I(inode), name, value, size, flags);
|
||||
if (!ret) {
|
||||
inode_inc_iversion(inode);
|
||||
inode_set_ctime_current(inode);
|
||||
|
@ -14,7 +14,7 @@ struct btrfs_trans_handle;
|
||||
|
||||
extern const struct xattr_handler * const btrfs_xattr_handlers[];
|
||||
|
||||
int btrfs_getxattr(struct inode *inode, const char *name,
|
||||
int btrfs_getxattr(const struct inode *inode, const char *name,
|
||||
void *buffer, size_t size);
|
||||
int btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode,
|
||||
const char *name, const void *value, size_t size, int flags);
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/bio.h>
|
||||
#include <linux/refcount.h>
|
||||
#include "btrfs_inode.h"
|
||||
#include "compression.h"
|
||||
|
||||
/* workspace buffer size for s390 zlib hardware support */
|
||||
@ -112,8 +113,13 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
|
||||
*total_out = 0;
|
||||
*total_in = 0;
|
||||
|
||||
if (Z_OK != zlib_deflateInit(&workspace->strm, workspace->level)) {
|
||||
pr_warn("BTRFS: deflateInit failed\n");
|
||||
ret = zlib_deflateInit(&workspace->strm, workspace->level);
|
||||
if (unlikely(ret != Z_OK)) {
|
||||
struct btrfs_inode *inode = BTRFS_I(mapping->host);
|
||||
|
||||
btrfs_err(inode->root->fs_info,
|
||||
"zlib compression init failed, error %d root %llu inode %llu offset %llu",
|
||||
ret, btrfs_root_id(inode->root), btrfs_ino(inode), start);
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
@ -182,9 +188,13 @@ int zlib_compress_folios(struct list_head *ws, struct address_space *mapping,
|
||||
}
|
||||
|
||||
ret = zlib_deflate(&workspace->strm, Z_SYNC_FLUSH);
|
||||
if (ret != Z_OK) {
|
||||
pr_debug("BTRFS: deflate in loop returned %d\n",
|
||||
ret);
|
||||
if (unlikely(ret != Z_OK)) {
|
||||
struct btrfs_inode *inode = BTRFS_I(mapping->host);
|
||||
|
||||
btrfs_warn(inode->root->fs_info,
|
||||
"zlib compression failed, error %d root %llu inode %llu offset %llu",
|
||||
ret, btrfs_root_id(inode->root), btrfs_ino(inode),
|
||||
start);
|
||||
zlib_deflateEnd(&workspace->strm);
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
@ -307,9 +317,14 @@ int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
|
||||
workspace->strm.avail_in -= 2;
|
||||
}
|
||||
|
||||
if (Z_OK != zlib_inflateInit2(&workspace->strm, wbits)) {
|
||||
pr_warn("BTRFS: inflateInit failed\n");
|
||||
ret = zlib_inflateInit2(&workspace->strm, wbits);
|
||||
if (unlikely(ret != Z_OK)) {
|
||||
struct btrfs_inode *inode = cb->bbio.inode;
|
||||
|
||||
kunmap_local(data_in);
|
||||
btrfs_err(inode->root->fs_info,
|
||||
"zlib decompression init failed, error %d root %llu inode %llu offset %llu",
|
||||
ret, btrfs_root_id(inode->root), btrfs_ino(inode), cb->start);
|
||||
return -EIO;
|
||||
}
|
||||
while (workspace->strm.total_in < srclen) {
|
||||
@ -348,10 +363,15 @@ int zlib_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
|
||||
workspace->strm.avail_in = min(tmp, PAGE_SIZE);
|
||||
}
|
||||
}
|
||||
if (ret != Z_STREAM_END)
|
||||
if (unlikely(ret != Z_STREAM_END)) {
|
||||
btrfs_err(cb->bbio.inode->root->fs_info,
|
||||
"zlib decompression failed, error %d root %llu inode %llu offset %llu",
|
||||
ret, btrfs_root_id(cb->bbio.inode->root),
|
||||
btrfs_ino(cb->bbio.inode), cb->start);
|
||||
ret = -EIO;
|
||||
else
|
||||
} else {
|
||||
ret = 0;
|
||||
}
|
||||
done:
|
||||
zlib_inflateEnd(&workspace->strm);
|
||||
if (data_in)
|
||||
@ -386,8 +406,14 @@ int zlib_decompress(struct list_head *ws, const u8 *data_in,
|
||||
workspace->strm.avail_in -= 2;
|
||||
}
|
||||
|
||||
if (Z_OK != zlib_inflateInit2(&workspace->strm, wbits)) {
|
||||
pr_warn("BTRFS: inflateInit failed\n");
|
||||
ret = zlib_inflateInit2(&workspace->strm, wbits);
|
||||
if (unlikely(ret != Z_OK)) {
|
||||
struct btrfs_inode *inode = BTRFS_I(dest_page->mapping->host);
|
||||
|
||||
btrfs_err(inode->root->fs_info,
|
||||
"zlib decompression init failed, error %d root %llu inode %llu offset %llu",
|
||||
ret, btrfs_root_id(inode->root), btrfs_ino(inode),
|
||||
page_offset(dest_page));
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
@ -404,8 +430,12 @@ int zlib_decompress(struct list_head *ws, const u8 *data_in,
|
||||
|
||||
out:
|
||||
if (unlikely(to_copy != destlen)) {
|
||||
pr_warn_ratelimited("BTRFS: inflate failed, decompressed=%lu expected=%zu\n",
|
||||
to_copy, destlen);
|
||||
struct btrfs_inode *inode = BTRFS_I(dest_page->mapping->host);
|
||||
|
||||
btrfs_err(inode->root->fs_info,
|
||||
"zlib decompression failed, error %d root %llu inode %llu offset %llu decompressed %lu expected %zu",
|
||||
ret, btrfs_root_id(inode->root), btrfs_ino(inode),
|
||||
page_offset(dest_page), to_copy, destlen);
|
||||
ret = -EIO;
|
||||
} else {
|
||||
ret = 0;
|
||||
|
@ -87,9 +87,8 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones,
|
||||
bool empty[BTRFS_NR_SB_LOG_ZONES];
|
||||
bool full[BTRFS_NR_SB_LOG_ZONES];
|
||||
sector_t sector;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) {
|
||||
for (int i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) {
|
||||
ASSERT(zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL);
|
||||
empty[i] = (zones[i].cond == BLK_ZONE_COND_EMPTY);
|
||||
full[i] = sb_zone_is_full(&zones[i]);
|
||||
@ -121,9 +120,8 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones,
|
||||
struct address_space *mapping = bdev->bd_mapping;
|
||||
struct page *page[BTRFS_NR_SB_LOG_ZONES];
|
||||
struct btrfs_super_block *super[BTRFS_NR_SB_LOG_ZONES];
|
||||
int i;
|
||||
|
||||
for (i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) {
|
||||
for (int i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) {
|
||||
u64 zone_end = (zones[i].start + zones[i].capacity) << SECTOR_SHIFT;
|
||||
u64 bytenr = ALIGN_DOWN(zone_end, BTRFS_SUPER_INFO_SIZE) -
|
||||
BTRFS_SUPER_INFO_SIZE;
|
||||
@ -144,7 +142,7 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones,
|
||||
else
|
||||
sector = zones[0].start;
|
||||
|
||||
for (i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++)
|
||||
for (int i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++)
|
||||
btrfs_release_disk_super(super[i]);
|
||||
} else if (!full[0] && (empty[1] || full[1])) {
|
||||
sector = zones[0].wp;
|
||||
@ -652,8 +650,7 @@ out:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
|
||||
struct blk_zone *zone)
|
||||
static int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, struct blk_zone *zone)
|
||||
{
|
||||
unsigned int nr_zones = 1;
|
||||
int ret;
|
||||
@ -770,7 +767,7 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
|
||||
return 0;
|
||||
}
|
||||
|
||||
int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info, unsigned long *mount_opt)
|
||||
int btrfs_check_mountopts_zoned(const struct btrfs_fs_info *info, unsigned long *mount_opt)
|
||||
{
|
||||
if (!btrfs_is_zoned(info))
|
||||
return 0;
|
||||
@ -1726,7 +1723,7 @@ bool btrfs_use_zone_append(struct btrfs_bio *bbio)
|
||||
if (!btrfs_is_zoned(fs_info))
|
||||
return false;
|
||||
|
||||
if (!inode || !is_data_inode(&inode->vfs_inode))
|
||||
if (!inode || !is_data_inode(inode))
|
||||
return false;
|
||||
|
||||
if (btrfs_op(&bbio->bio) != BTRFS_MAP_WRITE)
|
||||
@ -1768,7 +1765,7 @@ void btrfs_record_physical_zoned(struct btrfs_bio *bbio)
|
||||
static void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered,
|
||||
u64 logical)
|
||||
{
|
||||
struct extent_map_tree *em_tree = &BTRFS_I(ordered->inode)->extent_tree;
|
||||
struct extent_map_tree *em_tree = &ordered->inode->extent_tree;
|
||||
struct extent_map *em;
|
||||
|
||||
ordered->disk_bytenr = logical;
|
||||
@ -1776,7 +1773,9 @@ static void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered,
|
||||
write_lock(&em_tree->lock);
|
||||
em = search_extent_mapping(em_tree, ordered->file_offset,
|
||||
ordered->num_bytes);
|
||||
em->block_start = logical;
|
||||
/* The em should be a new COW extent, thus it should not have an offset. */
|
||||
ASSERT(em->offset == 0);
|
||||
em->disk_bytenr = logical;
|
||||
free_extent_map(em);
|
||||
write_unlock(&em_tree->lock);
|
||||
}
|
||||
@ -1787,7 +1786,7 @@ static bool btrfs_zoned_split_ordered(struct btrfs_ordered_extent *ordered,
|
||||
struct btrfs_ordered_extent *new;
|
||||
|
||||
if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags) &&
|
||||
split_extent_map(BTRFS_I(ordered->inode), ordered->file_offset,
|
||||
split_extent_map(ordered->inode, ordered->file_offset,
|
||||
ordered->num_bytes, len, logical))
|
||||
return false;
|
||||
|
||||
@ -1801,7 +1800,7 @@ static bool btrfs_zoned_split_ordered(struct btrfs_ordered_extent *ordered,
|
||||
|
||||
void btrfs_finish_ordered_zoned(struct btrfs_ordered_extent *ordered)
|
||||
{
|
||||
struct btrfs_inode *inode = BTRFS_I(ordered->inode);
|
||||
struct btrfs_inode *inode = ordered->inode;
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
struct btrfs_ordered_sum *sum;
|
||||
u64 logical, len;
|
||||
@ -1845,7 +1844,7 @@ out:
|
||||
* here so that we don't attempt to log the csums later.
|
||||
*/
|
||||
if ((inode->flags & BTRFS_INODE_NODATASUM) ||
|
||||
test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state)) {
|
||||
test_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state)) {
|
||||
while ((sum = list_first_entry_or_null(&ordered->list,
|
||||
typeof(*sum), list))) {
|
||||
list_del(&sum->list);
|
||||
@ -2215,8 +2214,7 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
|
||||
/* Ensure all writes in this block group finish */
|
||||
btrfs_wait_block_group_reservations(block_group);
|
||||
/* No need to wait for NOCOW writers. Zoned mode does not allow that */
|
||||
btrfs_wait_ordered_roots(fs_info, U64_MAX, block_group->start,
|
||||
block_group->length);
|
||||
btrfs_wait_ordered_roots(fs_info, U64_MAX, block_group);
|
||||
/* Wait for extent buffers to be written. */
|
||||
if (is_metadata)
|
||||
wait_eb_writebacks(block_group);
|
||||
|
@ -53,14 +53,12 @@ struct btrfs_zoned_device_info {
|
||||
void btrfs_finish_ordered_zoned(struct btrfs_ordered_extent *ordered);
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_ZONED
|
||||
int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
|
||||
struct blk_zone *zone);
|
||||
int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache);
|
||||
void btrfs_destroy_dev_zone_info(struct btrfs_device *device);
|
||||
struct btrfs_zoned_device_info *btrfs_clone_dev_zone_info(struct btrfs_device *orig_dev);
|
||||
int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info, unsigned long *mount_opt);
|
||||
int btrfs_check_mountopts_zoned(const struct btrfs_fs_info *info, unsigned long *mount_opt);
|
||||
int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw,
|
||||
u64 *bytenr_ret);
|
||||
int btrfs_sb_log_location(struct btrfs_device *device, int mirror, int rw,
|
||||
@ -98,11 +96,6 @@ int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_space_info *space_info, bool do_finish);
|
||||
void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info);
|
||||
#else /* CONFIG_BLK_DEV_ZONED */
|
||||
static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
|
||||
struct blk_zone *zone)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
@ -136,7 +129,7 @@ static inline int btrfs_check_zoned_mode(const struct btrfs_fs_info *fs_info)
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
static inline int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info,
|
||||
static inline int btrfs_check_mountopts_zoned(const struct btrfs_fs_info *info,
|
||||
unsigned long *mount_opt)
|
||||
{
|
||||
return 0;
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include <linux/zstd.h>
|
||||
#include "misc.h"
|
||||
#include "fs.h"
|
||||
#include "btrfs_inode.h"
|
||||
#include "compression.h"
|
||||
#include "super.h"
|
||||
|
||||
@ -399,8 +400,13 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
|
||||
/* Initialize the stream */
|
||||
stream = zstd_init_cstream(¶ms, len, workspace->mem,
|
||||
workspace->size);
|
||||
if (!stream) {
|
||||
pr_warn("BTRFS: zstd_init_cstream failed\n");
|
||||
if (unlikely(!stream)) {
|
||||
struct btrfs_inode *inode = BTRFS_I(mapping->host);
|
||||
|
||||
btrfs_err(inode->root->fs_info,
|
||||
"zstd compression init level %d failed, root %llu inode %llu offset %llu",
|
||||
workspace->req_level, btrfs_root_id(inode->root),
|
||||
btrfs_ino(inode), start);
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
@ -429,9 +435,14 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
|
||||
|
||||
ret2 = zstd_compress_stream(stream, &workspace->out_buf,
|
||||
&workspace->in_buf);
|
||||
if (zstd_is_error(ret2)) {
|
||||
pr_debug("BTRFS: zstd_compress_stream returned %d\n",
|
||||
zstd_get_error_code(ret2));
|
||||
if (unlikely(zstd_is_error(ret2))) {
|
||||
struct btrfs_inode *inode = BTRFS_I(mapping->host);
|
||||
|
||||
btrfs_warn(inode->root->fs_info,
|
||||
"zstd compression level %d failed, error %d root %llu inode %llu offset %llu",
|
||||
workspace->req_level, zstd_get_error_code(ret2),
|
||||
btrfs_root_id(inode->root), btrfs_ino(inode),
|
||||
start);
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
@ -497,9 +508,14 @@ int zstd_compress_folios(struct list_head *ws, struct address_space *mapping,
|
||||
size_t ret2;
|
||||
|
||||
ret2 = zstd_end_stream(stream, &workspace->out_buf);
|
||||
if (zstd_is_error(ret2)) {
|
||||
pr_debug("BTRFS: zstd_end_stream returned %d\n",
|
||||
zstd_get_error_code(ret2));
|
||||
if (unlikely(zstd_is_error(ret2))) {
|
||||
struct btrfs_inode *inode = BTRFS_I(mapping->host);
|
||||
|
||||
btrfs_err(inode->root->fs_info,
|
||||
"zstd compression end level %d failed, error %d root %llu inode %llu offset %llu",
|
||||
workspace->req_level, zstd_get_error_code(ret2),
|
||||
btrfs_root_id(inode->root), btrfs_ino(inode),
|
||||
start);
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
@ -561,8 +577,12 @@ int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
|
||||
|
||||
stream = zstd_init_dstream(
|
||||
ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
|
||||
if (!stream) {
|
||||
pr_debug("BTRFS: zstd_init_dstream failed\n");
|
||||
if (unlikely(!stream)) {
|
||||
struct btrfs_inode *inode = cb->bbio.inode;
|
||||
|
||||
btrfs_err(inode->root->fs_info,
|
||||
"zstd decompression init failed, root %llu inode %llu offset %llu",
|
||||
btrfs_root_id(inode->root), btrfs_ino(inode), cb->start);
|
||||
ret = -EIO;
|
||||
goto done;
|
||||
}
|
||||
@ -580,9 +600,13 @@ int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb)
|
||||
|
||||
ret2 = zstd_decompress_stream(stream, &workspace->out_buf,
|
||||
&workspace->in_buf);
|
||||
if (zstd_is_error(ret2)) {
|
||||
pr_debug("BTRFS: zstd_decompress_stream returned %d\n",
|
||||
zstd_get_error_code(ret2));
|
||||
if (unlikely(zstd_is_error(ret2))) {
|
||||
struct btrfs_inode *inode = cb->bbio.inode;
|
||||
|
||||
btrfs_err(inode->root->fs_info,
|
||||
"zstd decompression failed, error %d root %llu inode %llu offset %llu",
|
||||
zstd_get_error_code(ret2), btrfs_root_id(inode->root),
|
||||
btrfs_ino(inode), cb->start);
|
||||
ret = -EIO;
|
||||
goto done;
|
||||
}
|
||||
@ -637,8 +661,14 @@ int zstd_decompress(struct list_head *ws, const u8 *data_in,
|
||||
|
||||
stream = zstd_init_dstream(
|
||||
ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
|
||||
if (!stream) {
|
||||
pr_warn("BTRFS: zstd_init_dstream failed\n");
|
||||
if (unlikely(!stream)) {
|
||||
struct btrfs_inode *inode = BTRFS_I(dest_page->mapping->host);
|
||||
|
||||
btrfs_err(inode->root->fs_info,
|
||||
"zstd decompression init failed, root %llu inode %llu offset %llu",
|
||||
btrfs_root_id(inode->root), btrfs_ino(inode),
|
||||
page_offset(dest_page));
|
||||
ret = -EIO;
|
||||
goto finish;
|
||||
}
|
||||
|
||||
@ -655,9 +685,13 @@ int zstd_decompress(struct list_head *ws, const u8 *data_in,
|
||||
* one call should end the decompression.
|
||||
*/
|
||||
ret = zstd_decompress_stream(stream, &workspace->out_buf, &workspace->in_buf);
|
||||
if (zstd_is_error(ret)) {
|
||||
pr_warn_ratelimited("BTRFS: zstd_decompress_stream return %d\n",
|
||||
zstd_get_error_code(ret));
|
||||
if (unlikely(zstd_is_error(ret))) {
|
||||
struct btrfs_inode *inode = BTRFS_I(dest_page->mapping->host);
|
||||
|
||||
btrfs_err(inode->root->fs_info,
|
||||
"zstd decompression failed, error %d root %llu inode %llu offset %llu",
|
||||
zstd_get_error_code(ret), btrfs_root_id(inode->root),
|
||||
btrfs_ino(inode), page_offset(dest_page));
|
||||
goto finish;
|
||||
}
|
||||
to_copy = workspace->out_buf.pos;
|
||||
|
@ -291,9 +291,6 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
|
||||
__field( u64, ino )
|
||||
__field( u64, start )
|
||||
__field( u64, len )
|
||||
__field( u64, orig_start )
|
||||
__field( u64, block_start )
|
||||
__field( u64, block_len )
|
||||
__field( u32, flags )
|
||||
__field( int, refs )
|
||||
),
|
||||
@ -303,23 +300,15 @@ TRACE_EVENT_CONDITION(btrfs_get_extent,
|
||||
__entry->ino = btrfs_ino(inode);
|
||||
__entry->start = map->start;
|
||||
__entry->len = map->len;
|
||||
__entry->orig_start = map->orig_start;
|
||||
__entry->block_start = map->block_start;
|
||||
__entry->block_len = map->block_len;
|
||||
__entry->flags = map->flags;
|
||||
__entry->refs = refcount_read(&map->refs);
|
||||
),
|
||||
|
||||
TP_printk_btrfs("root=%llu(%s) ino=%llu start=%llu len=%llu "
|
||||
"orig_start=%llu block_start=%llu(%s) "
|
||||
"block_len=%llu flags=%s refs=%u",
|
||||
TP_printk_btrfs("root=%llu(%s) ino=%llu start=%llu len=%llu flags=%s refs=%u",
|
||||
show_root_type(__entry->root_objectid),
|
||||
__entry->ino,
|
||||
__entry->start,
|
||||
__entry->len,
|
||||
__entry->orig_start,
|
||||
show_map_type(__entry->block_start),
|
||||
__entry->block_len,
|
||||
show_map_flags(__entry->flags),
|
||||
__entry->refs)
|
||||
);
|
||||
@ -2617,7 +2606,6 @@ TRACE_EVENT(btrfs_extent_map_shrinker_remove_em,
|
||||
__field( u64, root_id )
|
||||
__field( u64, start )
|
||||
__field( u64, len )
|
||||
__field( u64, block_start )
|
||||
__field( u32, flags )
|
||||
),
|
||||
|
||||
@ -2626,15 +2614,12 @@ TRACE_EVENT(btrfs_extent_map_shrinker_remove_em,
|
||||
__entry->root_id = inode->root->root_key.objectid;
|
||||
__entry->start = em->start;
|
||||
__entry->len = em->len;
|
||||
__entry->block_start = em->block_start;
|
||||
__entry->flags = em->flags;
|
||||
),
|
||||
|
||||
TP_printk_btrfs(
|
||||
"ino=%llu root=%llu(%s) start=%llu len=%llu block_start=%llu(%s) flags=%s",
|
||||
TP_printk_btrfs("ino=%llu root=%llu(%s) start=%llu len=%llu flags=%s",
|
||||
__entry->ino, show_root_type(__entry->root_id),
|
||||
__entry->start, __entry->len,
|
||||
show_map_type(__entry->block_start),
|
||||
show_map_flags(__entry->flags))
|
||||
);
|
||||
|
||||
|
@ -747,21 +747,9 @@ struct btrfs_raid_stride {
|
||||
__le64 physical;
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
/* The stripe_extent::encoding, 1:1 mapping of enum btrfs_raid_types. */
|
||||
#define BTRFS_STRIPE_RAID0 1
|
||||
#define BTRFS_STRIPE_RAID1 2
|
||||
#define BTRFS_STRIPE_DUP 3
|
||||
#define BTRFS_STRIPE_RAID10 4
|
||||
#define BTRFS_STRIPE_RAID5 5
|
||||
#define BTRFS_STRIPE_RAID6 6
|
||||
#define BTRFS_STRIPE_RAID1C3 7
|
||||
#define BTRFS_STRIPE_RAID1C4 8
|
||||
|
||||
struct btrfs_stripe_extent {
|
||||
__u8 encoding;
|
||||
__u8 reserved[7];
|
||||
/* An array of raid strides this stripe is composed of. */
|
||||
struct btrfs_raid_stride strides[];
|
||||
__DECLARE_FLEX_ARRAY(struct btrfs_raid_stride, strides);
|
||||
} __attribute__ ((__packed__));
|
||||
|
||||
#define BTRFS_HEADER_FLAG_WRITTEN (1ULL << 0)
|
||||
@ -777,6 +765,14 @@ struct btrfs_stripe_extent {
|
||||
#define BTRFS_SUPER_FLAG_CHANGING_FSID (1ULL << 35)
|
||||
#define BTRFS_SUPER_FLAG_CHANGING_FSID_V2 (1ULL << 36)
|
||||
|
||||
/*
|
||||
* Those are temporaray flags utilized by btrfs-progs to do offline conversion.
|
||||
* They are rejected by kernel.
|
||||
* But still keep them all here to avoid conflicts.
|
||||
*/
|
||||
#define BTRFS_SUPER_FLAG_CHANGING_BG_TREE (1ULL << 38)
|
||||
#define BTRFS_SUPER_FLAG_CHANGING_DATA_CSUM (1ULL << 39)
|
||||
#define BTRFS_SUPER_FLAG_CHANGING_META_CSUM (1ULL << 40)
|
||||
|
||||
/*
|
||||
* items in the extent btree are used to record the objectid of the
|
||||
|
Loading…
x
Reference in New Issue
Block a user