mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-04 04:02:26 +00:00
for-6.6-tag
-----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmTskOwACgkQxWXV+ddt WDsNJw/8CCi41Z7e3LdJsQd2iy3/+oJZUvIGuT5YvshYxTLCbV7AL+diBPnSQs4Q /KFMGL7RZBgJzwVoSQtXnESXXgX8VOVfN1zY//k5g6z7BscCEQd73H/M0B8ciZy/ aBygm9tJ7EtWbGZWNR8yad8YtOgl6xoClrPnJK/DCLwMGPy2o+fnKP3Y9FOKY5KM 1Sl0Y4FlJ9dTJpxIwYbx4xmuyHrh2OivjU/KnS9SzQlHu0nl6zsIAE45eKem2/EG 1figY5aFBYPpPYfopbLDalEBR3bQGiViZVJuNEop3AimdcMOXw9jBF3EZYUb5Tgn MleMDgmmjLGOE/txGhvTxKj9kci2aGX+fJn3jXbcIMksAA0OQFLPqzGvEQcrs6Ok HA0RsmAkS5fWNDCuuo4ZPXEyUPvluTQizkwyoulOfnK+UPJCWaRqbEBMTsvm6M6X wFT2czwLpaEU/W6loIZkISUhfbRqVoA3DfHy398QXNzRhSrg8fQJjma1f7mrHvTi CzU+OD5YSC2nXktVOnklyTr0XT+7HF69cumlDbr8TS8u1qu8n1keU/7M3MBB4xZk BZFJDz8pnsAqpwVA4T434E/w45MDnYlwBw5r+U8Xjyso8xlau+sYXKcim85vT2Q0 yx/L91P6tdekR1y97p4aDdxw/PgTzdkNGMnsTBMVzgtCj+5pMmE= =N7Yn -----END PGP SIGNATURE----- Merge tag 'for-6.6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux Pull btrfs updates from David Sterba: "No new features, the bulk of the changes are fixes, refactoring and cleanups. The notable fix is the scrub performance restoration after rewrite in 6.4, though still only partial. Fixes: - scrub performance drop due to rewrite in 6.4 partially restored: - do IO grouping by blg_plug/blk_unplug again - avoid unnecessary tree searches when processing stripes, in extent and checksum trees - the drop is noticeable on fast PCIe devices, -66% and restored to -33% of the original - backports to 6.4 planned - handle more corner cases of transaction commit during orphan cleanup or delayed ref processing - use correct fsid/metadata_uuid when validating super block - copy directory permissions and time when creating a stub subvolume Core: - debugging feature integrity checker deprecated, to be removed in 6.7 - in zoned mode, zones are activated just before the write, making error handling easier, now the overcommit mechanism can be enabled again which improves performance by avoiding more frequent flushing - v0 extent handling completely removed, deprecated long time ago - error handling improvements - tests: - extent buffer bitmap tests - pinned extent splitting tests - cleanups and refactoring: - compression writeback - extent buffer bitmap - space flushing, ENOSPC handling" * tag 'for-6.6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (110 commits) btrfs: zoned: skip splitting and logical rewriting on pre-alloc write btrfs: tests: test invalid splitting when skipping pinned drop extent_map btrfs: tests: add a test for btrfs_add_extent_mapping btrfs: tests: add extent_map tests for dropping with odd layouts btrfs: scrub: move write back of repaired sectors to scrub_stripe_read_repair_worker() btrfs: scrub: don't go ordered workqueue for dev-replace btrfs: scrub: fix grouping of read IO btrfs: scrub: avoid unnecessary csum tree search preparing stripes btrfs: scrub: avoid unnecessary extent tree search preparing stripes btrfs: copy dir permission and time when creating a stub subvolume btrfs: remove pointless empty list check when reading delayed dir indexes btrfs: drop redundant check to use fs_devices::metadata_uuid btrfs: compare the correct fsid/metadata_uuid in btrfs_validate_super btrfs: use the correct superblock to compare fsid in btrfs_validate_super btrfs: simplify memcpy either of metadata_uuid or fsid btrfs: add a helper to read the superblock metadata_uuid btrfs: remove v0 extent handling btrfs: output extra debug info if we failed to find an inline backref btrfs: move the !zoned assert into run_delalloc_cow btrfs: consolidate the error handling in run_delalloc_nocow ...
This commit is contained in:
commit
547635c6ac
@ -49,9 +49,11 @@ config BTRFS_FS_POSIX_ACL
|
||||
If you don't know what Access Control Lists are, say N
|
||||
|
||||
config BTRFS_FS_CHECK_INTEGRITY
|
||||
bool "Btrfs with integrity check tool compiled in (DANGEROUS)"
|
||||
bool "Btrfs with integrity check tool compiled in (DEPRECATED)"
|
||||
depends on BTRFS_FS
|
||||
help
|
||||
This feature has been deprecated and will be removed in 6.7.
|
||||
|
||||
Adds code that examines all block write requests (including
|
||||
writes of the super block). The goal is to verify that the
|
||||
state of the filesystem on disk is always consistent, i.e.,
|
||||
|
@ -3,6 +3,8 @@
|
||||
#ifndef BTRFS_ACCESSORS_H
|
||||
#define BTRFS_ACCESSORS_H
|
||||
|
||||
#include <linux/stddef.h>
|
||||
|
||||
struct btrfs_map_token {
|
||||
struct extent_buffer *eb;
|
||||
char *kaddr;
|
||||
@ -34,13 +36,13 @@ static inline void put_unaligned_le8(u8 val, void *p)
|
||||
read_extent_buffer(eb, (char *)(result), \
|
||||
((unsigned long)(ptr)) + \
|
||||
offsetof(type, member), \
|
||||
sizeof(((type *)0)->member)))
|
||||
sizeof_field(type, member)))
|
||||
|
||||
#define write_eb_member(eb, ptr, type, member, result) (\
|
||||
write_extent_buffer(eb, (char *)(result), \
|
||||
((unsigned long)(ptr)) + \
|
||||
offsetof(type, member), \
|
||||
sizeof(((type *)0)->member)))
|
||||
sizeof_field(type, member)))
|
||||
|
||||
#define DECLARE_BTRFS_SETGET_BITS(bits) \
|
||||
u##bits btrfs_get_token_##bits(struct btrfs_map_token *token, \
|
||||
@ -62,25 +64,25 @@ DECLARE_BTRFS_SETGET_BITS(64)
|
||||
static inline u##bits btrfs_##name(const struct extent_buffer *eb, \
|
||||
const type *s) \
|
||||
{ \
|
||||
static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \
|
||||
static_assert(sizeof(u##bits) == sizeof_field(type, member)); \
|
||||
return btrfs_get_##bits(eb, s, offsetof(type, member)); \
|
||||
} \
|
||||
static inline void btrfs_set_##name(const struct extent_buffer *eb, type *s, \
|
||||
u##bits val) \
|
||||
{ \
|
||||
static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \
|
||||
static_assert(sizeof(u##bits) == sizeof_field(type, member)); \
|
||||
btrfs_set_##bits(eb, s, offsetof(type, member), val); \
|
||||
} \
|
||||
static inline u##bits btrfs_token_##name(struct btrfs_map_token *token, \
|
||||
const type *s) \
|
||||
{ \
|
||||
static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \
|
||||
static_assert(sizeof(u##bits) == sizeof_field(type, member)); \
|
||||
return btrfs_get_token_##bits(token, s, offsetof(type, member));\
|
||||
} \
|
||||
static inline void btrfs_set_token_##name(struct btrfs_map_token *token,\
|
||||
type *s, u##bits val) \
|
||||
{ \
|
||||
static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \
|
||||
static_assert(sizeof(u##bits) == sizeof_field(type, member)); \
|
||||
btrfs_set_token_##bits(token, s, offsetof(type, member), val); \
|
||||
}
|
||||
|
||||
@ -111,17 +113,14 @@ static inline void btrfs_set_##name(type *s, u##bits val) \
|
||||
static inline u64 btrfs_device_total_bytes(const struct extent_buffer *eb,
|
||||
struct btrfs_dev_item *s)
|
||||
{
|
||||
static_assert(sizeof(u64) ==
|
||||
sizeof(((struct btrfs_dev_item *)0))->total_bytes);
|
||||
return btrfs_get_64(eb, s, offsetof(struct btrfs_dev_item,
|
||||
total_bytes));
|
||||
static_assert(sizeof(u64) == sizeof_field(struct btrfs_dev_item, total_bytes));
|
||||
return btrfs_get_64(eb, s, offsetof(struct btrfs_dev_item, total_bytes));
|
||||
}
|
||||
static inline void btrfs_set_device_total_bytes(const struct extent_buffer *eb,
|
||||
struct btrfs_dev_item *s,
|
||||
u64 val)
|
||||
{
|
||||
static_assert(sizeof(u64) ==
|
||||
sizeof(((struct btrfs_dev_item *)0))->total_bytes);
|
||||
static_assert(sizeof(u64) == sizeof_field(struct btrfs_dev_item, total_bytes));
|
||||
WARN_ON(!IS_ALIGNED(val, eb->fs_info->sectorsize));
|
||||
btrfs_set_64(eb, s, offsetof(struct btrfs_dev_item, total_bytes), val);
|
||||
}
|
||||
|
@ -3373,7 +3373,6 @@ int btrfs_backref_add_tree_node(struct btrfs_backref_cache *cache,
|
||||
struct btrfs_key *node_key,
|
||||
struct btrfs_backref_node *cur)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = cache->fs_info;
|
||||
struct btrfs_backref_edge *edge;
|
||||
struct btrfs_backref_node *exist;
|
||||
int ret;
|
||||
@ -3462,25 +3461,21 @@ int btrfs_backref_add_tree_node(struct btrfs_backref_cache *cache,
|
||||
ret = handle_direct_tree_backref(cache, &key, cur);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
continue;
|
||||
} else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
|
||||
ret = -EINVAL;
|
||||
btrfs_print_v0_err(fs_info);
|
||||
btrfs_handle_fs_error(fs_info, ret, NULL);
|
||||
goto out;
|
||||
} else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) {
|
||||
continue;
|
||||
} else if (key.type == BTRFS_TREE_BLOCK_REF_KEY) {
|
||||
/*
|
||||
* key.type == BTRFS_TREE_BLOCK_REF_KEY, inline ref
|
||||
* offset means the root objectid. We need to search
|
||||
* the tree to get its parent bytenr.
|
||||
*/
|
||||
ret = handle_indirect_tree_backref(cache, path, &key, node_key,
|
||||
cur);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* key.type == BTRFS_TREE_BLOCK_REF_KEY, inline ref offset
|
||||
* means the root objectid. We need to search the tree to get
|
||||
* its parent bytenr.
|
||||
* Unrecognized tree backref items (if it can pass tree-checker)
|
||||
* would be ignored.
|
||||
*/
|
||||
ret = handle_indirect_tree_backref(cache, path, &key, node_key,
|
||||
cur);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
ret = 0;
|
||||
cur->checked = 1;
|
||||
|
@ -504,13 +504,20 @@ static void fragment_free_space(struct btrfs_block_group *block_group)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This is only called by btrfs_cache_block_group, since we could have freed
|
||||
* extents we need to check the pinned_extents for any extents that can't be
|
||||
* used yet since their free space will be released as soon as the transaction
|
||||
* commits.
|
||||
* Add a free space range to the in memory free space cache of a block group.
|
||||
* This checks if the range contains super block locations and any such
|
||||
* locations are not added to the free space cache.
|
||||
*
|
||||
* @block_group: The target block group.
|
||||
* @start: Start offset of the range.
|
||||
* @end: End offset of the range (exclusive).
|
||||
* @total_added_ret: Optional pointer to return the total amount of space
|
||||
* added to the block group's free space cache.
|
||||
*
|
||||
* Returns 0 on success or < 0 on error.
|
||||
*/
|
||||
int add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end,
|
||||
u64 *total_added_ret)
|
||||
int btrfs_add_new_free_space(struct btrfs_block_group *block_group, u64 start,
|
||||
u64 end, u64 *total_added_ret)
|
||||
{
|
||||
struct btrfs_fs_info *info = block_group->fs_info;
|
||||
u64 extent_start, extent_end, size;
|
||||
@ -520,11 +527,10 @@ int add_new_free_space(struct btrfs_block_group *block_group, u64 start, u64 end
|
||||
*total_added_ret = 0;
|
||||
|
||||
while (start < end) {
|
||||
ret = find_first_extent_bit(&info->excluded_extents, start,
|
||||
&extent_start, &extent_end,
|
||||
EXTENT_DIRTY | EXTENT_UPTODATE,
|
||||
NULL);
|
||||
if (ret)
|
||||
if (!find_first_extent_bit(&info->excluded_extents, start,
|
||||
&extent_start, &extent_end,
|
||||
EXTENT_DIRTY | EXTENT_UPTODATE,
|
||||
NULL))
|
||||
break;
|
||||
|
||||
if (extent_start <= start) {
|
||||
@ -799,8 +805,8 @@ static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl)
|
||||
key.type == BTRFS_METADATA_ITEM_KEY) {
|
||||
u64 space_added;
|
||||
|
||||
ret = add_new_free_space(block_group, last, key.objectid,
|
||||
&space_added);
|
||||
ret = btrfs_add_new_free_space(block_group, last,
|
||||
key.objectid, &space_added);
|
||||
if (ret)
|
||||
goto out;
|
||||
total_found += space_added;
|
||||
@ -821,14 +827,20 @@ static int load_extent_tree_free(struct btrfs_caching_control *caching_ctl)
|
||||
path->slots[0]++;
|
||||
}
|
||||
|
||||
ret = add_new_free_space(block_group, last,
|
||||
block_group->start + block_group->length,
|
||||
NULL);
|
||||
ret = btrfs_add_new_free_space(block_group, last,
|
||||
block_group->start + block_group->length,
|
||||
NULL);
|
||||
out:
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void btrfs_free_excluded_extents(const struct btrfs_block_group *bg)
|
||||
{
|
||||
clear_extent_bits(&bg->fs_info->excluded_extents, bg->start,
|
||||
bg->start + bg->length - 1, EXTENT_UPTODATE);
|
||||
}
|
||||
|
||||
static noinline void caching_thread(struct btrfs_work *work)
|
||||
{
|
||||
struct btrfs_block_group *block_group;
|
||||
@ -2098,8 +2110,9 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
|
||||
if (cache->start < BTRFS_SUPER_INFO_OFFSET) {
|
||||
stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->start;
|
||||
cache->bytes_super += stripe_len;
|
||||
ret = btrfs_add_excluded_extent(fs_info, cache->start,
|
||||
stripe_len);
|
||||
ret = set_extent_bit(&fs_info->excluded_extents, cache->start,
|
||||
cache->start + stripe_len - 1,
|
||||
EXTENT_UPTODATE, NULL);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@ -2125,8 +2138,9 @@ static int exclude_super_stripes(struct btrfs_block_group *cache)
|
||||
cache->start + cache->length - logical[nr]);
|
||||
|
||||
cache->bytes_super += len;
|
||||
ret = btrfs_add_excluded_extent(fs_info, logical[nr],
|
||||
len);
|
||||
ret = set_extent_bit(&fs_info->excluded_extents, logical[nr],
|
||||
logical[nr] + len - 1,
|
||||
EXTENT_UPTODATE, NULL);
|
||||
if (ret) {
|
||||
kfree(logical);
|
||||
return ret;
|
||||
@ -2319,8 +2333,8 @@ static int read_one_block_group(struct btrfs_fs_info *info,
|
||||
btrfs_free_excluded_extents(cache);
|
||||
} else if (cache->used == 0) {
|
||||
cache->cached = BTRFS_CACHE_FINISHED;
|
||||
ret = add_new_free_space(cache, cache->start,
|
||||
cache->start + cache->length, NULL);
|
||||
ret = btrfs_add_new_free_space(cache, cache->start,
|
||||
cache->start + cache->length, NULL);
|
||||
btrfs_free_excluded_extents(cache);
|
||||
if (ret)
|
||||
goto error;
|
||||
@ -2767,7 +2781,7 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
ret = add_new_free_space(cache, chunk_offset, chunk_offset + size, NULL);
|
||||
ret = btrfs_add_new_free_space(cache, chunk_offset, chunk_offset + size, NULL);
|
||||
btrfs_free_excluded_extents(cache);
|
||||
if (ret) {
|
||||
btrfs_put_block_group(cache);
|
||||
@ -4075,7 +4089,7 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
|
||||
|
||||
if (IS_ERR(ret_bg)) {
|
||||
ret = PTR_ERR(ret_bg);
|
||||
} else if (from_extent_allocation) {
|
||||
} else if (from_extent_allocation && (flags & BTRFS_BLOCK_GROUP_DATA)) {
|
||||
/*
|
||||
* New block group is likely to be used soon. Try to activate
|
||||
* it now. Failure is OK for now.
|
||||
@ -4273,6 +4287,17 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
|
||||
struct btrfs_caching_control *caching_ctl;
|
||||
struct rb_node *n;
|
||||
|
||||
if (btrfs_is_zoned(info)) {
|
||||
if (info->active_meta_bg) {
|
||||
btrfs_put_block_group(info->active_meta_bg);
|
||||
info->active_meta_bg = NULL;
|
||||
}
|
||||
if (info->active_system_bg) {
|
||||
btrfs_put_block_group(info->active_system_bg);
|
||||
info->active_system_bg = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
write_lock(&info->block_group_cache_lock);
|
||||
while (!list_empty(&info->caching_block_groups)) {
|
||||
caching_ctl = list_entry(info->caching_block_groups.next,
|
||||
|
@ -291,8 +291,8 @@ int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait);
|
||||
void btrfs_put_caching_control(struct btrfs_caching_control *ctl);
|
||||
struct btrfs_caching_control *btrfs_get_caching_control(
|
||||
struct btrfs_block_group *cache);
|
||||
int add_new_free_space(struct btrfs_block_group *block_group,
|
||||
u64 start, u64 end, u64 *total_added_ret);
|
||||
int btrfs_add_new_free_space(struct btrfs_block_group *block_group,
|
||||
u64 start, u64 end, u64 *total_added_ret);
|
||||
struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
|
||||
struct btrfs_fs_info *fs_info,
|
||||
const u64 chunk_offset);
|
||||
|
@ -498,12 +498,8 @@ int btrfs_prealloc_file_range_trans(struct inode *inode,
|
||||
u64 start, u64 num_bytes, u64 min_size,
|
||||
loff_t actual_len, u64 *alloc_hint);
|
||||
int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page,
|
||||
u64 start, u64 end, int *page_started,
|
||||
unsigned long *nr_written, struct writeback_control *wbc);
|
||||
u64 start, u64 end, struct writeback_control *wbc);
|
||||
int btrfs_writepage_cow_fixup(struct page *page);
|
||||
void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
|
||||
struct page *page, u64 start,
|
||||
u64 end, bool uptodate);
|
||||
int btrfs_encoded_io_compression_from_extent(struct btrfs_fs_info *fs_info,
|
||||
int compress_type);
|
||||
int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
|
||||
|
@ -1736,9 +1736,6 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
|
||||
int over = 0;
|
||||
unsigned char d_type;
|
||||
|
||||
if (list_empty(ins_list))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Changing the data of the delayed item is impossible. So
|
||||
* we needn't lock them. And we have held i_mutex of the
|
||||
|
@ -792,9 +792,9 @@ static int btrfs_set_target_alloc_state(struct btrfs_device *srcdev,
|
||||
|
||||
lockdep_assert_held(&srcdev->fs_info->chunk_mutex);
|
||||
|
||||
while (!find_first_extent_bit(&srcdev->alloc_state, start,
|
||||
&found_start, &found_end,
|
||||
CHUNK_ALLOCATED, &cached_state)) {
|
||||
while (find_first_extent_bit(&srcdev->alloc_state, start,
|
||||
&found_start, &found_end,
|
||||
CHUNK_ALLOCATED, &cached_state)) {
|
||||
ret = set_extent_bit(&tgtdev->alloc_state, found_start,
|
||||
found_end, CHUNK_ALLOCATED, NULL);
|
||||
if (ret)
|
||||
|
@ -313,21 +313,16 @@ static bool check_tree_block_fsid(struct extent_buffer *eb)
|
||||
struct btrfs_fs_info *fs_info = eb->fs_info;
|
||||
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices, *seed_devs;
|
||||
u8 fsid[BTRFS_FSID_SIZE];
|
||||
u8 *metadata_uuid;
|
||||
|
||||
read_extent_buffer(eb, fsid, offsetof(struct btrfs_header, fsid),
|
||||
BTRFS_FSID_SIZE);
|
||||
/*
|
||||
* Checking the incompat flag is only valid for the current fs. For
|
||||
* seed devices it's forbidden to have their uuid changed so reading
|
||||
* ->fsid in this case is fine
|
||||
*/
|
||||
if (btrfs_fs_incompat(fs_info, METADATA_UUID))
|
||||
metadata_uuid = fs_devices->metadata_uuid;
|
||||
else
|
||||
metadata_uuid = fs_devices->fsid;
|
||||
|
||||
if (!memcmp(fsid, metadata_uuid, BTRFS_FSID_SIZE))
|
||||
/*
|
||||
* alloc_fs_devices() copies the fsid into metadata_uuid if the
|
||||
* metadata_uuid is unset in the superblock, including for a seed device.
|
||||
* So, we can use fs_devices->metadata_uuid.
|
||||
*/
|
||||
if (memcmp(fsid, fs_info->fs_devices->metadata_uuid, BTRFS_FSID_SIZE) == 0)
|
||||
return false;
|
||||
|
||||
list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list)
|
||||
@ -2384,21 +2379,18 @@ int btrfs_validate_super(struct btrfs_fs_info *fs_info,
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
if (memcmp(fs_info->fs_devices->fsid, fs_info->super_copy->fsid,
|
||||
BTRFS_FSID_SIZE)) {
|
||||
if (memcmp(fs_info->fs_devices->fsid, sb->fsid, BTRFS_FSID_SIZE) != 0) {
|
||||
btrfs_err(fs_info,
|
||||
"superblock fsid doesn't match fsid of fs_devices: %pU != %pU",
|
||||
fs_info->super_copy->fsid, fs_info->fs_devices->fsid);
|
||||
sb->fsid, fs_info->fs_devices->fsid);
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
if (btrfs_fs_incompat(fs_info, METADATA_UUID) &&
|
||||
memcmp(fs_info->fs_devices->metadata_uuid,
|
||||
fs_info->super_copy->metadata_uuid, BTRFS_FSID_SIZE)) {
|
||||
if (memcmp(fs_info->fs_devices->metadata_uuid, btrfs_sb_fsid_ptr(sb),
|
||||
BTRFS_FSID_SIZE) != 0) {
|
||||
btrfs_err(fs_info,
|
||||
"superblock metadata_uuid doesn't match metadata uuid of fs_devices: %pU != %pU",
|
||||
fs_info->super_copy->metadata_uuid,
|
||||
fs_info->fs_devices->metadata_uuid);
|
||||
btrfs_sb_fsid_ptr(sb), fs_info->fs_devices->metadata_uuid);
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
@ -2869,6 +2861,56 @@ static int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
u64 root_objectid = 0;
|
||||
struct btrfs_root *gang[8];
|
||||
int i = 0;
|
||||
int err = 0;
|
||||
unsigned int ret = 0;
|
||||
|
||||
while (1) {
|
||||
spin_lock(&fs_info->fs_roots_radix_lock);
|
||||
ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
|
||||
(void **)gang, root_objectid,
|
||||
ARRAY_SIZE(gang));
|
||||
if (!ret) {
|
||||
spin_unlock(&fs_info->fs_roots_radix_lock);
|
||||
break;
|
||||
}
|
||||
root_objectid = gang[ret - 1]->root_key.objectid + 1;
|
||||
|
||||
for (i = 0; i < ret; i++) {
|
||||
/* Avoid to grab roots in dead_roots. */
|
||||
if (btrfs_root_refs(&gang[i]->root_item) == 0) {
|
||||
gang[i] = NULL;
|
||||
continue;
|
||||
}
|
||||
/* Grab all the search result for later use. */
|
||||
gang[i] = btrfs_grab_root(gang[i]);
|
||||
}
|
||||
spin_unlock(&fs_info->fs_roots_radix_lock);
|
||||
|
||||
for (i = 0; i < ret; i++) {
|
||||
if (!gang[i])
|
||||
continue;
|
||||
root_objectid = gang[i]->root_key.objectid;
|
||||
err = btrfs_orphan_cleanup(gang[i]);
|
||||
if (err)
|
||||
goto out;
|
||||
btrfs_put_root(gang[i]);
|
||||
}
|
||||
root_objectid++;
|
||||
}
|
||||
out:
|
||||
/* Release the uncleaned roots due to error. */
|
||||
for (; i < ret; i++) {
|
||||
if (gang[i])
|
||||
btrfs_put_root(gang[i]);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Some options only have meaning at mount time and shouldn't persist across
|
||||
* remounts, or be displayed. Clear these at the end of mount and remount
|
||||
@ -3222,7 +3264,7 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
|
||||
|
||||
/* check FS state, whether FS is broken. */
|
||||
if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR)
|
||||
set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
|
||||
WRITE_ONCE(fs_info->fs_error, -EUCLEAN);
|
||||
|
||||
/*
|
||||
* In the long term, we'll store the compression type in the super
|
||||
@ -3417,6 +3459,8 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
|
||||
|
||||
btrfs_free_zone_cache(fs_info);
|
||||
|
||||
btrfs_check_active_zone_reservation(fs_info);
|
||||
|
||||
if (!sb_rdonly(sb) && fs_info->fs_devices->missing_devices &&
|
||||
!btrfs_check_rw_degradable(fs_info, NULL)) {
|
||||
btrfs_warn(fs_info,
|
||||
@ -4136,56 +4180,6 @@ void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
|
||||
btrfs_put_root(root);
|
||||
}
|
||||
|
||||
int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
u64 root_objectid = 0;
|
||||
struct btrfs_root *gang[8];
|
||||
int i = 0;
|
||||
int err = 0;
|
||||
unsigned int ret = 0;
|
||||
|
||||
while (1) {
|
||||
spin_lock(&fs_info->fs_roots_radix_lock);
|
||||
ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
|
||||
(void **)gang, root_objectid,
|
||||
ARRAY_SIZE(gang));
|
||||
if (!ret) {
|
||||
spin_unlock(&fs_info->fs_roots_radix_lock);
|
||||
break;
|
||||
}
|
||||
root_objectid = gang[ret - 1]->root_key.objectid + 1;
|
||||
|
||||
for (i = 0; i < ret; i++) {
|
||||
/* Avoid to grab roots in dead_roots */
|
||||
if (btrfs_root_refs(&gang[i]->root_item) == 0) {
|
||||
gang[i] = NULL;
|
||||
continue;
|
||||
}
|
||||
/* grab all the search result for later use */
|
||||
gang[i] = btrfs_grab_root(gang[i]);
|
||||
}
|
||||
spin_unlock(&fs_info->fs_roots_radix_lock);
|
||||
|
||||
for (i = 0; i < ret; i++) {
|
||||
if (!gang[i])
|
||||
continue;
|
||||
root_objectid = gang[i]->root_key.objectid;
|
||||
err = btrfs_orphan_cleanup(gang[i]);
|
||||
if (err)
|
||||
goto out;
|
||||
btrfs_put_root(gang[i]);
|
||||
}
|
||||
root_objectid++;
|
||||
}
|
||||
out:
|
||||
/* release the uncleaned roots due to error */
|
||||
for (; i < ret; i++) {
|
||||
if (gang[i])
|
||||
btrfs_put_root(gang[i]);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
int btrfs_commit_super(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct btrfs_root *root = fs_info->tree_root;
|
||||
@ -4228,7 +4222,7 @@ static void warn_about_uncommitted_trans(struct btrfs_fs_info *fs_info)
|
||||
u64 found_end;
|
||||
|
||||
found = true;
|
||||
while (!find_first_extent_bit(&trans->dirty_pages, cur,
|
||||
while (find_first_extent_bit(&trans->dirty_pages, cur,
|
||||
&found_start, &found_end, EXTENT_DIRTY, &cached)) {
|
||||
dirty_bytes += found_end + 1 - found_start;
|
||||
cur = found_end + 1;
|
||||
@ -4552,9 +4546,7 @@ static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
|
||||
static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct btrfs_root *root;
|
||||
struct list_head splice;
|
||||
|
||||
INIT_LIST_HEAD(&splice);
|
||||
LIST_HEAD(splice);
|
||||
|
||||
spin_lock(&fs_info->ordered_root_lock);
|
||||
list_splice_init(&fs_info->ordered_roots, &splice);
|
||||
@ -4660,9 +4652,7 @@ static void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
|
||||
static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
|
||||
{
|
||||
struct btrfs_inode *btrfs_inode;
|
||||
struct list_head splice;
|
||||
|
||||
INIT_LIST_HEAD(&splice);
|
||||
LIST_HEAD(splice);
|
||||
|
||||
spin_lock(&root->delalloc_lock);
|
||||
list_splice_init(&root->delalloc_inodes, &splice);
|
||||
@ -4695,9 +4685,7 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
|
||||
static void btrfs_destroy_all_delalloc_inodes(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct btrfs_root *root;
|
||||
struct list_head splice;
|
||||
|
||||
INIT_LIST_HEAD(&splice);
|
||||
LIST_HEAD(splice);
|
||||
|
||||
spin_lock(&fs_info->delalloc_root_lock);
|
||||
list_splice_init(&fs_info->delalloc_roots, &splice);
|
||||
@ -4716,21 +4704,16 @@ static void btrfs_destroy_all_delalloc_inodes(struct btrfs_fs_info *fs_info)
|
||||
spin_unlock(&fs_info->delalloc_root_lock);
|
||||
}
|
||||
|
||||
static int btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info,
|
||||
struct extent_io_tree *dirty_pages,
|
||||
int mark)
|
||||
static void btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info,
|
||||
struct extent_io_tree *dirty_pages,
|
||||
int mark)
|
||||
{
|
||||
int ret;
|
||||
struct extent_buffer *eb;
|
||||
u64 start = 0;
|
||||
u64 end;
|
||||
|
||||
while (1) {
|
||||
ret = find_first_extent_bit(dirty_pages, start, &start, &end,
|
||||
mark, NULL);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
while (find_first_extent_bit(dirty_pages, start, &start, &end,
|
||||
mark, NULL)) {
|
||||
clear_extent_bits(dirty_pages, start, end, mark);
|
||||
while (start <= end) {
|
||||
eb = find_extent_buffer(fs_info, start);
|
||||
@ -4746,16 +4729,13 @@ static int btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info,
|
||||
free_extent_buffer_stale(eb);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
|
||||
struct extent_io_tree *unpin)
|
||||
static void btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
|
||||
struct extent_io_tree *unpin)
|
||||
{
|
||||
u64 start;
|
||||
u64 end;
|
||||
int ret;
|
||||
|
||||
while (1) {
|
||||
struct extent_state *cached_state = NULL;
|
||||
@ -4767,9 +4747,8 @@ static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
|
||||
* the same extent range.
|
||||
*/
|
||||
mutex_lock(&fs_info->unused_bg_unpin_mutex);
|
||||
ret = find_first_extent_bit(unpin, 0, &start, &end,
|
||||
EXTENT_DIRTY, &cached_state);
|
||||
if (ret) {
|
||||
if (!find_first_extent_bit(unpin, 0, &start, &end,
|
||||
EXTENT_DIRTY, &cached_state)) {
|
||||
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
|
||||
break;
|
||||
}
|
||||
@ -4780,8 +4759,6 @@ static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
|
||||
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
|
||||
cond_resched();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void btrfs_cleanup_bg_io(struct btrfs_block_group *cache)
|
||||
|
@ -77,7 +77,6 @@ struct btrfs_root *btrfs_extent_root(struct btrfs_fs_info *fs_info, u64 bytenr);
|
||||
struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info);
|
||||
|
||||
void btrfs_free_fs_info(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
|
||||
|
@ -831,15 +831,15 @@ static struct extent_state *find_first_extent_bit_state(struct extent_io_tree *t
|
||||
*
|
||||
* Note: If there are multiple bits set in @bits, any of them will match.
|
||||
*
|
||||
* Return 0 if we find something, and update @start_ret and @end_ret.
|
||||
* Return 1 if we found nothing.
|
||||
* Return true if we find something, and update @start_ret and @end_ret.
|
||||
* Return false if we found nothing.
|
||||
*/
|
||||
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
|
||||
u64 *start_ret, u64 *end_ret, u32 bits,
|
||||
struct extent_state **cached_state)
|
||||
bool find_first_extent_bit(struct extent_io_tree *tree, u64 start,
|
||||
u64 *start_ret, u64 *end_ret, u32 bits,
|
||||
struct extent_state **cached_state)
|
||||
{
|
||||
struct extent_state *state;
|
||||
int ret = 1;
|
||||
bool ret = false;
|
||||
|
||||
spin_lock(&tree->lock);
|
||||
if (cached_state && *cached_state) {
|
||||
@ -863,7 +863,7 @@ int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
|
||||
cache_state_if_flags(state, cached_state, 0);
|
||||
*start_ret = state->start;
|
||||
*end_ret = state->end;
|
||||
ret = 0;
|
||||
ret = true;
|
||||
}
|
||||
out:
|
||||
spin_unlock(&tree->lock);
|
||||
|
@ -182,9 +182,9 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
|
||||
u32 bits, u32 clear_bits,
|
||||
struct extent_state **cached_state);
|
||||
|
||||
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
|
||||
u64 *start_ret, u64 *end_ret, u32 bits,
|
||||
struct extent_state **cached_state);
|
||||
bool find_first_extent_bit(struct extent_io_tree *tree, u64 start,
|
||||
u64 *start_ret, u64 *end_ret, u32 bits,
|
||||
struct extent_state **cached_state);
|
||||
void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start,
|
||||
u64 *start_ret, u64 *end_ret, u32 bits);
|
||||
int find_contiguous_extent_bit(struct extent_io_tree *tree, u64 start,
|
||||
|
@ -69,27 +69,6 @@ static int block_group_bits(struct btrfs_block_group *cache, u64 bits)
|
||||
return (cache->flags & bits) == bits;
|
||||
}
|
||||
|
||||
int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info,
|
||||
u64 start, u64 num_bytes)
|
||||
{
|
||||
u64 end = start + num_bytes - 1;
|
||||
set_extent_bit(&fs_info->excluded_extents, start, end,
|
||||
EXTENT_UPTODATE, NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void btrfs_free_excluded_extents(struct btrfs_block_group *cache)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = cache->fs_info;
|
||||
u64 start, end;
|
||||
|
||||
start = cache->start;
|
||||
end = start + cache->length - 1;
|
||||
|
||||
clear_extent_bits(&fs_info->excluded_extents, start, end,
|
||||
EXTENT_UPTODATE);
|
||||
}
|
||||
|
||||
/* simple helper to search for an existing data extent at a given offset */
|
||||
int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len)
|
||||
{
|
||||
@ -187,8 +166,10 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
|
||||
num_refs = btrfs_extent_refs(leaf, ei);
|
||||
extent_flags = btrfs_extent_flags(leaf, ei);
|
||||
} else {
|
||||
ret = -EINVAL;
|
||||
btrfs_print_v0_err(fs_info);
|
||||
ret = -EUCLEAN;
|
||||
btrfs_err(fs_info,
|
||||
"unexpected extent item size, has %u expect >= %zu",
|
||||
item_size, sizeof(*ei));
|
||||
if (trans)
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
else
|
||||
@ -402,11 +383,11 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
|
||||
}
|
||||
}
|
||||
|
||||
WARN_ON(1);
|
||||
btrfs_print_leaf(eb);
|
||||
btrfs_err(eb->fs_info,
|
||||
"eb %llu iref 0x%lx invalid extent inline ref type %d",
|
||||
eb->start, (unsigned long)iref, type);
|
||||
WARN_ON(1);
|
||||
|
||||
return BTRFS_REF_TYPE_INVALID;
|
||||
}
|
||||
@ -624,12 +605,12 @@ static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans,
|
||||
ref2 = btrfs_item_ptr(leaf, path->slots[0],
|
||||
struct btrfs_shared_data_ref);
|
||||
num_refs = btrfs_shared_data_ref_count(leaf, ref2);
|
||||
} else if (unlikely(key.type == BTRFS_EXTENT_REF_V0_KEY)) {
|
||||
btrfs_print_v0_err(trans->fs_info);
|
||||
btrfs_abort_transaction(trans, -EINVAL);
|
||||
return -EINVAL;
|
||||
} else {
|
||||
BUG();
|
||||
btrfs_err(trans->fs_info,
|
||||
"unrecognized backref key (%llu %u %llu)",
|
||||
key.objectid, key.type, key.offset);
|
||||
btrfs_abort_transaction(trans, -EUCLEAN);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
BUG_ON(num_refs < refs_to_drop);
|
||||
@ -660,7 +641,6 @@ static noinline u32 extent_data_ref_count(struct btrfs_path *path,
|
||||
leaf = path->nodes[0];
|
||||
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
|
||||
|
||||
BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
|
||||
if (iref) {
|
||||
/*
|
||||
* If type is invalid, we should have bailed out earlier than
|
||||
@ -869,6 +849,11 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
|
||||
err = -ENOENT;
|
||||
goto out;
|
||||
} else if (WARN_ON(ret)) {
|
||||
btrfs_print_leaf(path->nodes[0]);
|
||||
btrfs_err(fs_info,
|
||||
"extent item not found for insert, bytenr %llu num_bytes %llu parent %llu root_objectid %llu owner %llu offset %llu",
|
||||
bytenr, num_bytes, parent, root_objectid, owner,
|
||||
offset);
|
||||
err = -EIO;
|
||||
goto out;
|
||||
}
|
||||
@ -876,8 +861,10 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
|
||||
leaf = path->nodes[0];
|
||||
item_size = btrfs_item_size(leaf, path->slots[0]);
|
||||
if (unlikely(item_size < sizeof(*ei))) {
|
||||
err = -EINVAL;
|
||||
btrfs_print_v0_err(fs_info);
|
||||
err = -EUCLEAN;
|
||||
btrfs_err(fs_info,
|
||||
"unexpected extent item size, has %llu expect >= %zu",
|
||||
item_size, sizeof(*ei));
|
||||
btrfs_abort_transaction(trans, err);
|
||||
goto out;
|
||||
}
|
||||
@ -1079,13 +1066,13 @@ static int lookup_extent_backref(struct btrfs_trans_handle *trans,
|
||||
/*
|
||||
* helper to update/remove inline back ref
|
||||
*/
|
||||
static noinline_for_stack
|
||||
void update_inline_extent_backref(struct btrfs_path *path,
|
||||
static noinline_for_stack int update_inline_extent_backref(struct btrfs_path *path,
|
||||
struct btrfs_extent_inline_ref *iref,
|
||||
int refs_to_mod,
|
||||
struct btrfs_delayed_extent_op *extent_op)
|
||||
{
|
||||
struct extent_buffer *leaf = path->nodes[0];
|
||||
struct btrfs_fs_info *fs_info = leaf->fs_info;
|
||||
struct btrfs_extent_item *ei;
|
||||
struct btrfs_extent_data_ref *dref = NULL;
|
||||
struct btrfs_shared_data_ref *sref = NULL;
|
||||
@ -1098,18 +1085,33 @@ void update_inline_extent_backref(struct btrfs_path *path,
|
||||
|
||||
ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item);
|
||||
refs = btrfs_extent_refs(leaf, ei);
|
||||
WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0);
|
||||
if (unlikely(refs_to_mod < 0 && refs + refs_to_mod <= 0)) {
|
||||
struct btrfs_key key;
|
||||
u32 extent_size;
|
||||
|
||||
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
|
||||
if (key.type == BTRFS_METADATA_ITEM_KEY)
|
||||
extent_size = fs_info->nodesize;
|
||||
else
|
||||
extent_size = key.offset;
|
||||
btrfs_print_leaf(leaf);
|
||||
btrfs_err(fs_info,
|
||||
"invalid refs_to_mod for extent %llu num_bytes %u, has %d expect >= -%llu",
|
||||
key.objectid, extent_size, refs_to_mod, refs);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
refs += refs_to_mod;
|
||||
btrfs_set_extent_refs(leaf, ei, refs);
|
||||
if (extent_op)
|
||||
__run_delayed_extent_op(extent_op, leaf, ei);
|
||||
|
||||
/*
|
||||
* If type is invalid, we should have bailed out after
|
||||
* lookup_inline_extent_backref().
|
||||
*/
|
||||
type = btrfs_get_extent_inline_ref_type(leaf, iref, BTRFS_REF_TYPE_ANY);
|
||||
ASSERT(type != BTRFS_REF_TYPE_INVALID);
|
||||
/*
|
||||
* Function btrfs_get_extent_inline_ref_type() has already printed
|
||||
* error messages.
|
||||
*/
|
||||
if (unlikely(type == BTRFS_REF_TYPE_INVALID))
|
||||
return -EUCLEAN;
|
||||
|
||||
if (type == BTRFS_EXTENT_DATA_REF_KEY) {
|
||||
dref = (struct btrfs_extent_data_ref *)(&iref->offset);
|
||||
@ -1119,10 +1121,43 @@ void update_inline_extent_backref(struct btrfs_path *path,
|
||||
refs = btrfs_shared_data_ref_count(leaf, sref);
|
||||
} else {
|
||||
refs = 1;
|
||||
BUG_ON(refs_to_mod != -1);
|
||||
/*
|
||||
* For tree blocks we can only drop one ref for it, and tree
|
||||
* blocks should not have refs > 1.
|
||||
*
|
||||
* Furthermore if we're inserting a new inline backref, we
|
||||
* won't reach this path either. That would be
|
||||
* setup_inline_extent_backref().
|
||||
*/
|
||||
if (unlikely(refs_to_mod != -1)) {
|
||||
struct btrfs_key key;
|
||||
|
||||
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
|
||||
|
||||
btrfs_print_leaf(leaf);
|
||||
btrfs_err(fs_info,
|
||||
"invalid refs_to_mod for tree block %llu, has %d expect -1",
|
||||
key.objectid, refs_to_mod);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
}
|
||||
|
||||
BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod);
|
||||
if (unlikely(refs_to_mod < 0 && refs < -refs_to_mod)) {
|
||||
struct btrfs_key key;
|
||||
u32 extent_size;
|
||||
|
||||
btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
|
||||
if (key.type == BTRFS_METADATA_ITEM_KEY)
|
||||
extent_size = fs_info->nodesize;
|
||||
else
|
||||
extent_size = key.offset;
|
||||
btrfs_print_leaf(leaf);
|
||||
btrfs_err(fs_info,
|
||||
"invalid refs_to_mod for backref entry, iref %lu extent %llu num_bytes %u, has %d expect >= -%llu",
|
||||
(unsigned long)iref, key.objectid, extent_size,
|
||||
refs_to_mod, refs);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
refs += refs_to_mod;
|
||||
|
||||
if (refs > 0) {
|
||||
@ -1142,6 +1177,7 @@ void update_inline_extent_backref(struct btrfs_path *path,
|
||||
btrfs_truncate_item(path, item_size, 1);
|
||||
}
|
||||
btrfs_mark_buffer_dirty(leaf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static noinline_for_stack
|
||||
@ -1170,7 +1206,7 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
|
||||
bytenr, num_bytes, root_objectid, path->slots[0]);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
update_inline_extent_backref(path, iref, refs_to_add, extent_op);
|
||||
ret = update_inline_extent_backref(path, iref, refs_to_add, extent_op);
|
||||
} else if (ret == -ENOENT) {
|
||||
setup_inline_extent_backref(trans->fs_info, path, iref, parent,
|
||||
root_objectid, owner, offset,
|
||||
@ -1190,7 +1226,7 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans,
|
||||
|
||||
BUG_ON(!is_data && refs_to_drop != 1);
|
||||
if (iref)
|
||||
update_inline_extent_backref(path, iref, -refs_to_drop, NULL);
|
||||
ret = update_inline_extent_backref(path, iref, -refs_to_drop, NULL);
|
||||
else if (is_data)
|
||||
ret = remove_extent_data_ref(trans, root, path, refs_to_drop);
|
||||
else
|
||||
@ -1629,8 +1665,10 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
|
||||
item_size = btrfs_item_size(leaf, path->slots[0]);
|
||||
|
||||
if (unlikely(item_size < sizeof(*ei))) {
|
||||
err = -EINVAL;
|
||||
btrfs_print_v0_err(fs_info);
|
||||
err = -EUCLEAN;
|
||||
btrfs_err(fs_info,
|
||||
"unexpected extent item size, has %u expect >= %zu",
|
||||
item_size, sizeof(*ei));
|
||||
btrfs_abort_transaction(trans, err);
|
||||
goto out;
|
||||
}
|
||||
@ -2751,9 +2789,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans)
|
||||
struct extent_state *cached_state = NULL;
|
||||
|
||||
mutex_lock(&fs_info->unused_bg_unpin_mutex);
|
||||
ret = find_first_extent_bit(unpin, 0, &start, &end,
|
||||
EXTENT_DIRTY, &cached_state);
|
||||
if (ret) {
|
||||
if (!find_first_extent_bit(unpin, 0, &start, &end,
|
||||
EXTENT_DIRTY, &cached_state)) {
|
||||
mutex_unlock(&fs_info->unused_bg_unpin_mutex);
|
||||
break;
|
||||
}
|
||||
@ -3059,8 +3096,10 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
|
||||
leaf = path->nodes[0];
|
||||
item_size = btrfs_item_size(leaf, extent_slot);
|
||||
if (unlikely(item_size < sizeof(*ei))) {
|
||||
ret = -EINVAL;
|
||||
btrfs_print_v0_err(info);
|
||||
ret = -EUCLEAN;
|
||||
btrfs_err(trans->fs_info,
|
||||
"unexpected extent item size, has %u expect >= %zu",
|
||||
item_size, sizeof(*ei));
|
||||
btrfs_abort_transaction(trans, ret);
|
||||
goto out;
|
||||
}
|
||||
@ -3351,11 +3390,38 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref)
|
||||
}
|
||||
|
||||
enum btrfs_loop_type {
|
||||
/*
|
||||
* Start caching block groups but do not wait for progress or for them
|
||||
* to be done.
|
||||
*/
|
||||
LOOP_CACHING_NOWAIT,
|
||||
|
||||
/*
|
||||
* Wait for the block group free_space >= the space we're waiting for if
|
||||
* the block group isn't cached.
|
||||
*/
|
||||
LOOP_CACHING_WAIT,
|
||||
|
||||
/*
|
||||
* Allow allocations to happen from block groups that do not yet have a
|
||||
* size classification.
|
||||
*/
|
||||
LOOP_UNSET_SIZE_CLASS,
|
||||
|
||||
/*
|
||||
* Allocate a chunk and then retry the allocation.
|
||||
*/
|
||||
LOOP_ALLOC_CHUNK,
|
||||
|
||||
/*
|
||||
* Ignore the size class restrictions for this allocation.
|
||||
*/
|
||||
LOOP_WRONG_SIZE_CLASS,
|
||||
|
||||
/*
|
||||
* Ignore the empty size, only try to allocate the number of bytes
|
||||
* needed for this allocation.
|
||||
*/
|
||||
LOOP_NO_EMPTY_SIZE,
|
||||
};
|
||||
|
||||
@ -3427,7 +3493,6 @@ btrfs_release_block_group(struct btrfs_block_group *cache,
|
||||
* Helper function for find_free_extent().
|
||||
*
|
||||
* Return -ENOENT to inform caller that we need fallback to unclustered mode.
|
||||
* Return -EAGAIN to inform caller that we need to re-search this block group
|
||||
* Return >0 to inform caller that we find nothing
|
||||
* Return 0 means we have found a location and set ffe_ctl->found_offset.
|
||||
*/
|
||||
@ -3508,14 +3573,6 @@ static int find_free_extent_clustered(struct btrfs_block_group *bg,
|
||||
trace_btrfs_reserve_extent_cluster(bg, ffe_ctl);
|
||||
return 0;
|
||||
}
|
||||
} else if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT &&
|
||||
!ffe_ctl->retry_clustered) {
|
||||
spin_unlock(&last_ptr->refill_lock);
|
||||
|
||||
ffe_ctl->retry_clustered = true;
|
||||
btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
|
||||
ffe_ctl->empty_cluster + ffe_ctl->empty_size);
|
||||
return -EAGAIN;
|
||||
}
|
||||
/*
|
||||
* At this point we either didn't find a cluster or we weren't able to
|
||||
@ -3530,7 +3587,6 @@ static int find_free_extent_clustered(struct btrfs_block_group *bg,
|
||||
/*
|
||||
* Return >0 to inform caller that we find nothing
|
||||
* Return 0 when we found an free extent and set ffe_ctrl->found_offset
|
||||
* Return -EAGAIN to inform caller that we need to re-search this block group
|
||||
*/
|
||||
static int find_free_extent_unclustered(struct btrfs_block_group *bg,
|
||||
struct find_free_extent_ctl *ffe_ctl)
|
||||
@ -3568,25 +3624,8 @@ static int find_free_extent_unclustered(struct btrfs_block_group *bg,
|
||||
offset = btrfs_find_space_for_alloc(bg, ffe_ctl->search_start,
|
||||
ffe_ctl->num_bytes, ffe_ctl->empty_size,
|
||||
&ffe_ctl->max_extent_size);
|
||||
|
||||
/*
|
||||
* If we didn't find a chunk, and we haven't failed on this block group
|
||||
* before, and this block group is in the middle of caching and we are
|
||||
* ok with waiting, then go ahead and wait for progress to be made, and
|
||||
* set @retry_unclustered to true.
|
||||
*
|
||||
* If @retry_unclustered is true then we've already waited on this
|
||||
* block group once and should move on to the next block group.
|
||||
*/
|
||||
if (!offset && !ffe_ctl->retry_unclustered && !ffe_ctl->cached &&
|
||||
ffe_ctl->loop > LOOP_CACHING_NOWAIT) {
|
||||
btrfs_wait_block_group_cache_progress(bg, ffe_ctl->num_bytes +
|
||||
ffe_ctl->empty_size);
|
||||
ffe_ctl->retry_unclustered = true;
|
||||
return -EAGAIN;
|
||||
} else if (!offset) {
|
||||
if (!offset)
|
||||
return 1;
|
||||
}
|
||||
ffe_ctl->found_offset = offset;
|
||||
return 0;
|
||||
}
|
||||
@ -3600,7 +3639,7 @@ static int do_allocation_clustered(struct btrfs_block_group *block_group,
|
||||
/* We want to try and use the cluster allocator, so lets look there */
|
||||
if (ffe_ctl->last_ptr && ffe_ctl->use_cluster) {
|
||||
ret = find_free_extent_clustered(block_group, ffe_ctl, bg_ret);
|
||||
if (ret >= 0 || ret == -EAGAIN)
|
||||
if (ret >= 0)
|
||||
return ret;
|
||||
/* ret == -ENOENT case falls through */
|
||||
}
|
||||
@ -3685,7 +3724,9 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
|
||||
}
|
||||
spin_unlock(&block_group->lock);
|
||||
|
||||
if (!ret && !btrfs_zone_activate(block_group)) {
|
||||
/* Metadata block group is activated at write time. */
|
||||
if (!ret && (block_group->flags & BTRFS_BLOCK_GROUP_DATA) &&
|
||||
!btrfs_zone_activate(block_group)) {
|
||||
ret = 1;
|
||||
/*
|
||||
* May need to clear fs_info->{treelog,data_reloc}_bg.
|
||||
@ -3709,7 +3750,8 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
|
||||
fs_info->data_reloc_bg == 0);
|
||||
|
||||
if (block_group->ro ||
|
||||
test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) {
|
||||
(!ffe_ctl->for_data_reloc &&
|
||||
test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags))) {
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
@ -3752,8 +3794,26 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
|
||||
if (ffe_ctl->for_treelog && !fs_info->treelog_bg)
|
||||
fs_info->treelog_bg = block_group->start;
|
||||
|
||||
if (ffe_ctl->for_data_reloc && !fs_info->data_reloc_bg)
|
||||
fs_info->data_reloc_bg = block_group->start;
|
||||
if (ffe_ctl->for_data_reloc) {
|
||||
if (!fs_info->data_reloc_bg)
|
||||
fs_info->data_reloc_bg = block_group->start;
|
||||
/*
|
||||
* Do not allow allocations from this block group, unless it is
|
||||
* for data relocation. Compared to increasing the ->ro, setting
|
||||
* the ->zoned_data_reloc_ongoing flag still allows nocow
|
||||
* writers to come in. See btrfs_inc_nocow_writers().
|
||||
*
|
||||
* We need to disable an allocation to avoid an allocation of
|
||||
* regular (non-relocation data) extent. With mix of relocation
|
||||
* extents and regular extents, we can dispatch WRITE commands
|
||||
* (for relocation extents) and ZONE APPEND commands (for
|
||||
* regular extents) at the same time to the same zone, which
|
||||
* easily break the write pointer.
|
||||
*
|
||||
* Also, this flag avoids this block group to be zone finished.
|
||||
*/
|
||||
set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags);
|
||||
}
|
||||
|
||||
ffe_ctl->found_offset = start + block_group->alloc_offset;
|
||||
block_group->alloc_offset += num_bytes;
|
||||
@ -3771,24 +3831,8 @@ static int do_allocation_zoned(struct btrfs_block_group *block_group,
|
||||
out:
|
||||
if (ret && ffe_ctl->for_treelog)
|
||||
fs_info->treelog_bg = 0;
|
||||
if (ret && ffe_ctl->for_data_reloc &&
|
||||
fs_info->data_reloc_bg == block_group->start) {
|
||||
/*
|
||||
* Do not allow further allocations from this block group.
|
||||
* Compared to increasing the ->ro, setting the
|
||||
* ->zoned_data_reloc_ongoing flag still allows nocow
|
||||
* writers to come in. See btrfs_inc_nocow_writers().
|
||||
*
|
||||
* We need to disable an allocation to avoid an allocation of
|
||||
* regular (non-relocation data) extent. With mix of relocation
|
||||
* extents and regular extents, we can dispatch WRITE commands
|
||||
* (for relocation extents) and ZONE APPEND commands (for
|
||||
* regular extents) at the same time to the same zone, which
|
||||
* easily break the write pointer.
|
||||
*/
|
||||
set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags);
|
||||
if (ret && ffe_ctl->for_data_reloc)
|
||||
fs_info->data_reloc_bg = 0;
|
||||
}
|
||||
spin_unlock(&fs_info->relocation_bg_lock);
|
||||
spin_unlock(&fs_info->treelog_bg_lock);
|
||||
spin_unlock(&block_group->lock);
|
||||
@ -3816,8 +3860,7 @@ static void release_block_group(struct btrfs_block_group *block_group,
|
||||
{
|
||||
switch (ffe_ctl->policy) {
|
||||
case BTRFS_EXTENT_ALLOC_CLUSTERED:
|
||||
ffe_ctl->retry_clustered = false;
|
||||
ffe_ctl->retry_unclustered = false;
|
||||
ffe_ctl->retry_uncached = false;
|
||||
break;
|
||||
case BTRFS_EXTENT_ALLOC_ZONED:
|
||||
/* Nothing to do */
|
||||
@ -3861,6 +3904,10 @@ static void found_extent(struct find_free_extent_ctl *ffe_ctl,
|
||||
static int can_allocate_chunk_zoned(struct btrfs_fs_info *fs_info,
|
||||
struct find_free_extent_ctl *ffe_ctl)
|
||||
{
|
||||
/* Block group's activeness is not a requirement for METADATA block groups. */
|
||||
if (!(ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA))
|
||||
return 0;
|
||||
|
||||
/* If we can activate new zone, just allocate a chunk and use it */
|
||||
if (btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->flags))
|
||||
return 0;
|
||||
@ -3949,15 +3996,7 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
|
||||
if (ffe_ctl->index < BTRFS_NR_RAID_TYPES)
|
||||
return 1;
|
||||
|
||||
/*
|
||||
* LOOP_CACHING_NOWAIT, search partially cached block groups, kicking
|
||||
* caching kthreads as we move along
|
||||
* LOOP_CACHING_WAIT, search everything, and wait if our bg is caching
|
||||
* LOOP_UNSET_SIZE_CLASS, allow unset size class
|
||||
* LOOP_ALLOC_CHUNK, force a chunk allocation and try again
|
||||
* LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try
|
||||
* again
|
||||
*/
|
||||
/* See the comments for btrfs_loop_type for an explanation of the phases. */
|
||||
if (ffe_ctl->loop < LOOP_NO_EMPTY_SIZE) {
|
||||
ffe_ctl->index = 0;
|
||||
/*
|
||||
@ -4168,9 +4207,7 @@ static noinline int find_free_extent(struct btrfs_root *root,
|
||||
ffe_ctl->orig_have_caching_bg = false;
|
||||
ffe_ctl->index = btrfs_bg_flags_to_raid_index(ffe_ctl->flags);
|
||||
ffe_ctl->loop = 0;
|
||||
/* For clustered allocation */
|
||||
ffe_ctl->retry_clustered = false;
|
||||
ffe_ctl->retry_unclustered = false;
|
||||
ffe_ctl->retry_uncached = false;
|
||||
ffe_ctl->cached = 0;
|
||||
ffe_ctl->max_extent_size = 0;
|
||||
ffe_ctl->total_free_space = 0;
|
||||
@ -4321,16 +4358,12 @@ static noinline int find_free_extent(struct btrfs_root *root,
|
||||
|
||||
bg_ret = NULL;
|
||||
ret = do_allocation(block_group, ffe_ctl, &bg_ret);
|
||||
if (ret == 0) {
|
||||
if (bg_ret && bg_ret != block_group) {
|
||||
btrfs_release_block_group(block_group,
|
||||
ffe_ctl->delalloc);
|
||||
block_group = bg_ret;
|
||||
}
|
||||
} else if (ret == -EAGAIN) {
|
||||
goto have_block_group;
|
||||
} else if (ret > 0) {
|
||||
if (ret > 0)
|
||||
goto loop;
|
||||
|
||||
if (bg_ret && bg_ret != block_group) {
|
||||
btrfs_release_block_group(block_group, ffe_ctl->delalloc);
|
||||
block_group = bg_ret;
|
||||
}
|
||||
|
||||
/* Checks */
|
||||
@ -4371,6 +4404,15 @@ static noinline int find_free_extent(struct btrfs_root *root,
|
||||
btrfs_release_block_group(block_group, ffe_ctl->delalloc);
|
||||
break;
|
||||
loop:
|
||||
if (!ffe_ctl->cached && ffe_ctl->loop > LOOP_CACHING_NOWAIT &&
|
||||
!ffe_ctl->retry_uncached) {
|
||||
ffe_ctl->retry_uncached = true;
|
||||
btrfs_wait_block_group_cache_progress(block_group,
|
||||
ffe_ctl->num_bytes +
|
||||
ffe_ctl->empty_cluster +
|
||||
ffe_ctl->empty_size);
|
||||
goto have_block_group;
|
||||
}
|
||||
release_block_group(block_group, ffe_ctl, ffe_ctl->delalloc);
|
||||
cond_resched();
|
||||
}
|
||||
|
@ -48,16 +48,11 @@ struct find_free_extent_ctl {
|
||||
int loop;
|
||||
|
||||
/*
|
||||
* Whether we're refilling a cluster, if true we need to re-search
|
||||
* current block group but don't try to refill the cluster again.
|
||||
* Set to true if we're retrying the allocation on this block group
|
||||
* after waiting for caching progress, this is so that we retry only
|
||||
* once before moving on to another block group.
|
||||
*/
|
||||
bool retry_clustered;
|
||||
|
||||
/*
|
||||
* Whether we're updating free space cache, if true we need to re-search
|
||||
* current block group but don't try updating free space cache again.
|
||||
*/
|
||||
bool retry_unclustered;
|
||||
bool retry_uncached;
|
||||
|
||||
/* If current block group is cached */
|
||||
int cached;
|
||||
@ -96,9 +91,6 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
|
||||
enum btrfs_inline_ref_type is_data);
|
||||
u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset);
|
||||
|
||||
int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info,
|
||||
u64 start, u64 num_bytes);
|
||||
void btrfs_free_excluded_extents(struct btrfs_block_group *cache);
|
||||
int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, unsigned long count);
|
||||
void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_delayed_ref_root *delayed_refs,
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -40,7 +40,6 @@ enum {
|
||||
ENUM_BIT(PAGE_START_WRITEBACK),
|
||||
ENUM_BIT(PAGE_END_WRITEBACK),
|
||||
ENUM_BIT(PAGE_SET_ORDERED),
|
||||
ENUM_BIT(PAGE_LOCK),
|
||||
};
|
||||
|
||||
/*
|
||||
@ -94,6 +93,13 @@ struct extent_buffer {
|
||||
#endif
|
||||
};
|
||||
|
||||
struct btrfs_eb_write_context {
|
||||
struct writeback_control *wbc;
|
||||
struct extent_buffer *eb;
|
||||
/* Block group @eb resides in. Only used for zoned mode. */
|
||||
struct btrfs_block_group *zoned_bg;
|
||||
};
|
||||
|
||||
/*
|
||||
* Get the correct offset inside the page of extent buffer.
|
||||
*
|
||||
@ -178,8 +184,9 @@ int try_release_extent_mapping(struct page *page, gfp_t mask);
|
||||
int try_release_extent_buffer(struct page *page);
|
||||
|
||||
int btrfs_read_folio(struct file *file, struct folio *folio);
|
||||
int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
|
||||
struct writeback_control *wbc);
|
||||
void extent_write_locked_range(struct inode *inode, struct page *locked_page,
|
||||
u64 start, u64 end, struct writeback_control *wbc,
|
||||
bool pages_dirty);
|
||||
int extent_writepages(struct address_space *mapping,
|
||||
struct writeback_control *wbc);
|
||||
int btree_write_cache_pages(struct address_space *mapping,
|
||||
@ -236,11 +243,24 @@ void read_extent_buffer(const struct extent_buffer *eb, void *dst,
|
||||
int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb,
|
||||
void __user *dst, unsigned long start,
|
||||
unsigned long len);
|
||||
void write_extent_buffer_fsid(const struct extent_buffer *eb, const void *src);
|
||||
void write_extent_buffer_chunk_tree_uuid(const struct extent_buffer *eb,
|
||||
const void *src);
|
||||
void write_extent_buffer(const struct extent_buffer *eb, const void *src,
|
||||
unsigned long start, unsigned long len);
|
||||
|
||||
static inline void write_extent_buffer_chunk_tree_uuid(
|
||||
const struct extent_buffer *eb, const void *chunk_tree_uuid)
|
||||
{
|
||||
write_extent_buffer(eb, chunk_tree_uuid,
|
||||
offsetof(struct btrfs_header, chunk_tree_uuid),
|
||||
BTRFS_FSID_SIZE);
|
||||
}
|
||||
|
||||
static inline void write_extent_buffer_fsid(const struct extent_buffer *eb,
|
||||
const void *fsid)
|
||||
{
|
||||
write_extent_buffer(eb, fsid, offsetof(struct btrfs_header, fsid),
|
||||
BTRFS_FSID_SIZE);
|
||||
}
|
||||
|
||||
void copy_extent_buffer_full(const struct extent_buffer *dst,
|
||||
const struct extent_buffer *src);
|
||||
void copy_extent_buffer(const struct extent_buffer *dst,
|
||||
@ -266,7 +286,6 @@ void set_extent_buffer_dirty(struct extent_buffer *eb);
|
||||
void set_extent_buffer_uptodate(struct extent_buffer *eb);
|
||||
void clear_extent_buffer_uptodate(struct extent_buffer *eb);
|
||||
void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
|
||||
void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
|
||||
void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
|
||||
struct page *locked_page,
|
||||
u32 bits_to_clear, unsigned long page_ops);
|
||||
@ -277,8 +296,6 @@ void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans,
|
||||
|
||||
int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array);
|
||||
|
||||
void end_extent_writepage(struct page *page, int err, u64 start, u64 end);
|
||||
|
||||
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
|
||||
bool find_lock_delalloc_range(struct inode *inode,
|
||||
struct page *locked_page, u64 *start,
|
||||
|
@ -597,29 +597,37 @@ int btrfs_lookup_csums_list(struct btrfs_root *root, u64 start, u64 end,
|
||||
* Each bit represents a sector. Thus caller should ensure @csum_buf passed
|
||||
* in is large enough to contain all csums.
|
||||
*/
|
||||
int btrfs_lookup_csums_bitmap(struct btrfs_root *root, u64 start, u64 end,
|
||||
u8 *csum_buf, unsigned long *csum_bitmap,
|
||||
bool search_commit)
|
||||
int btrfs_lookup_csums_bitmap(struct btrfs_root *root, struct btrfs_path *path,
|
||||
u64 start, u64 end, u8 *csum_buf,
|
||||
unsigned long *csum_bitmap)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct btrfs_key key;
|
||||
struct btrfs_path *path;
|
||||
struct extent_buffer *leaf;
|
||||
struct btrfs_csum_item *item;
|
||||
const u64 orig_start = start;
|
||||
bool free_path = false;
|
||||
int ret;
|
||||
|
||||
ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
|
||||
IS_ALIGNED(end + 1, fs_info->sectorsize));
|
||||
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
if (!path) {
|
||||
path = btrfs_alloc_path();
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
free_path = true;
|
||||
}
|
||||
|
||||
if (search_commit) {
|
||||
path->skip_locking = 1;
|
||||
path->reada = READA_FORWARD;
|
||||
path->search_commit_root = 1;
|
||||
/* Check if we can reuse the previous path. */
|
||||
if (path->nodes[0]) {
|
||||
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
|
||||
|
||||
if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
|
||||
key.type == BTRFS_EXTENT_CSUM_KEY &&
|
||||
key.offset <= start)
|
||||
goto search_forward;
|
||||
btrfs_release_path(path);
|
||||
}
|
||||
|
||||
key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
|
||||
@ -656,6 +664,7 @@ int btrfs_lookup_csums_bitmap(struct btrfs_root *root, u64 start, u64 end,
|
||||
}
|
||||
}
|
||||
|
||||
search_forward:
|
||||
while (start <= end) {
|
||||
u64 csum_end;
|
||||
|
||||
@ -712,7 +721,8 @@ int btrfs_lookup_csums_bitmap(struct btrfs_root *root, u64 start, u64 end,
|
||||
}
|
||||
ret = 0;
|
||||
fail:
|
||||
btrfs_free_path(path);
|
||||
if (free_path)
|
||||
btrfs_free_path(path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -57,9 +57,9 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
|
||||
int btrfs_lookup_csums_list(struct btrfs_root *root, u64 start, u64 end,
|
||||
struct list_head *list, int search_commit,
|
||||
bool nowait);
|
||||
int btrfs_lookup_csums_bitmap(struct btrfs_root *root, u64 start, u64 end,
|
||||
u8 *csum_buf, unsigned long *csum_bitmap,
|
||||
bool search_commit);
|
||||
int btrfs_lookup_csums_bitmap(struct btrfs_root *root, struct btrfs_path *path,
|
||||
u64 start, u64 end, u8 *csum_buf,
|
||||
unsigned long *csum_bitmap);
|
||||
void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode,
|
||||
const struct btrfs_path *path,
|
||||
struct btrfs_file_extent_item *fi,
|
||||
|
@ -2999,7 +2999,7 @@ static long btrfs_fallocate(struct file *file, int mode,
|
||||
struct extent_changeset *data_reserved = NULL;
|
||||
struct falloc_range *range;
|
||||
struct falloc_range *tmp;
|
||||
struct list_head reserve_list;
|
||||
LIST_HEAD(reserve_list);
|
||||
u64 cur_offset;
|
||||
u64 last_byte;
|
||||
u64 alloc_start;
|
||||
@ -3091,7 +3091,6 @@ static long btrfs_fallocate(struct file *file, int mode,
|
||||
btrfs_assert_inode_range_clean(BTRFS_I(inode), alloc_start, locked_end);
|
||||
|
||||
/* First, check if we exceed the qgroup limit */
|
||||
INIT_LIST_HEAD(&reserve_list);
|
||||
while (cur_offset < alloc_end) {
|
||||
em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
|
||||
alloc_end - cur_offset);
|
||||
|
@ -1219,10 +1219,9 @@ static noinline_for_stack int write_pinned_extent_entries(
|
||||
start = block_group->start;
|
||||
|
||||
while (start < block_group->start + block_group->length) {
|
||||
ret = find_first_extent_bit(unpin, start,
|
||||
&extent_start, &extent_end,
|
||||
EXTENT_DIRTY, NULL);
|
||||
if (ret)
|
||||
if (!find_first_extent_bit(unpin, start,
|
||||
&extent_start, &extent_end,
|
||||
EXTENT_DIRTY, NULL))
|
||||
return 0;
|
||||
|
||||
/* This pinned extent is out of our range */
|
||||
@ -2705,13 +2704,8 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
|
||||
bg_reclaim_threshold = READ_ONCE(sinfo->bg_reclaim_threshold);
|
||||
|
||||
spin_lock(&ctl->tree_lock);
|
||||
/* Count initial region as zone_unusable until it gets activated. */
|
||||
if (!used)
|
||||
to_free = size;
|
||||
else if (initial &&
|
||||
test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &block_group->fs_info->flags) &&
|
||||
(block_group->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)))
|
||||
to_free = 0;
|
||||
else if (initial)
|
||||
to_free = block_group->zone_capacity;
|
||||
else if (offset >= block_group->alloc_offset)
|
||||
@ -2739,8 +2733,7 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
|
||||
reclaimable_unusable = block_group->zone_unusable -
|
||||
(block_group->length - block_group->zone_capacity);
|
||||
/* All the region is now unusable. Mark it as unused and reclaim */
|
||||
if (block_group->zone_unusable == block_group->length &&
|
||||
block_group->alloc_offset) {
|
||||
if (block_group->zone_unusable == block_group->length) {
|
||||
btrfs_mark_bg_unused(block_group);
|
||||
} else if (bg_reclaim_threshold &&
|
||||
reclaimable_unusable >=
|
||||
@ -2944,7 +2937,8 @@ void btrfs_dump_free_space(struct btrfs_block_group *block_group,
|
||||
btrfs_info(fs_info, "block group has cluster?: %s",
|
||||
list_empty(&block_group->cluster_list) ? "no" : "yes");
|
||||
btrfs_info(fs_info,
|
||||
"%d blocks of free space at or bigger than bytes is", count);
|
||||
"%d free space entries at or bigger than %llu bytes",
|
||||
count, bytes);
|
||||
}
|
||||
|
||||
void btrfs_init_free_space_ctl(struct btrfs_block_group *block_group,
|
||||
|
@ -1517,8 +1517,10 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
|
||||
} else if (prev_bit == 1 && bit == 0) {
|
||||
u64 space_added;
|
||||
|
||||
ret = add_new_free_space(block_group, extent_start,
|
||||
offset, &space_added);
|
||||
ret = btrfs_add_new_free_space(block_group,
|
||||
extent_start,
|
||||
offset,
|
||||
&space_added);
|
||||
if (ret)
|
||||
goto out;
|
||||
total_found += space_added;
|
||||
@ -1533,7 +1535,7 @@ static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl,
|
||||
}
|
||||
}
|
||||
if (prev_bit == 1) {
|
||||
ret = add_new_free_space(block_group, extent_start, end, NULL);
|
||||
ret = btrfs_add_new_free_space(block_group, extent_start, end, NULL);
|
||||
if (ret)
|
||||
goto out;
|
||||
extent_count++;
|
||||
@ -1590,8 +1592,9 @@ static int load_free_space_extents(struct btrfs_caching_control *caching_ctl,
|
||||
ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY);
|
||||
ASSERT(key.objectid < end && key.objectid + key.offset <= end);
|
||||
|
||||
ret = add_new_free_space(block_group, key.objectid,
|
||||
key.objectid + key.offset, &space_added);
|
||||
ret = btrfs_add_new_free_space(block_group, key.objectid,
|
||||
key.objectid + key.offset,
|
||||
&space_added);
|
||||
if (ret)
|
||||
goto out;
|
||||
total_found += space_added;
|
||||
|
@ -46,8 +46,6 @@ static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE);
|
||||
* Runtime (in-memory) states of filesystem
|
||||
*/
|
||||
enum {
|
||||
/* Global indicator of serious filesystem errors */
|
||||
BTRFS_FS_STATE_ERROR,
|
||||
/*
|
||||
* Filesystem is being remounted, allow to skip some operations, like
|
||||
* defrag
|
||||
@ -686,6 +684,12 @@ struct btrfs_fs_info {
|
||||
bool qgroup_rescan_running;
|
||||
u8 qgroup_drop_subtree_thres;
|
||||
|
||||
/*
|
||||
* If this is not 0, then it indicates a serious filesystem error has
|
||||
* happened and it contains that error (negative errno value).
|
||||
*/
|
||||
int fs_error;
|
||||
|
||||
/* Filesystem state */
|
||||
unsigned long fs_state;
|
||||
|
||||
@ -766,6 +770,9 @@ struct btrfs_fs_info {
|
||||
u64 data_reloc_bg;
|
||||
struct mutex zoned_data_reloc_io_lock;
|
||||
|
||||
struct btrfs_block_group *active_meta_bg;
|
||||
struct btrfs_block_group *active_system_bg;
|
||||
|
||||
u64 nr_global_roots;
|
||||
|
||||
spinlock_t zone_active_bgs_lock;
|
||||
@ -962,8 +969,8 @@ static inline void btrfs_wake_unfinished_drop(struct btrfs_fs_info *fs_info)
|
||||
clear_and_wake_up_bit(BTRFS_FS_UNFINISHED_DROPS, &fs_info->flags);
|
||||
}
|
||||
|
||||
#define BTRFS_FS_ERROR(fs_info) (unlikely(test_bit(BTRFS_FS_STATE_ERROR, \
|
||||
&(fs_info)->fs_state)))
|
||||
#define BTRFS_FS_ERROR(fs_info) (READ_ONCE((fs_info)->fs_error))
|
||||
|
||||
#define BTRFS_FS_LOG_CLEANUP_ERROR(fs_info) \
|
||||
(unlikely(test_bit(BTRFS_FS_STATE_LOG_CLEANUP_ERROR, \
|
||||
&(fs_info)->fs_state)))
|
||||
|
788
fs/btrfs/inode.c
788
fs/btrfs/inode.c
File diff suppressed because it is too large
Load Diff
@ -10,14 +10,13 @@
|
||||
#ifdef CONFIG_PRINTK
|
||||
|
||||
#define STATE_STRING_PREFACE ": state "
|
||||
#define STATE_STRING_BUF_LEN (sizeof(STATE_STRING_PREFACE) + BTRFS_FS_STATE_COUNT)
|
||||
#define STATE_STRING_BUF_LEN (sizeof(STATE_STRING_PREFACE) + BTRFS_FS_STATE_COUNT + 1)
|
||||
|
||||
/*
|
||||
* Characters to print to indicate error conditions or uncommon filesystem state.
|
||||
* RO is not an error.
|
||||
*/
|
||||
static const char fs_state_chars[] = {
|
||||
[BTRFS_FS_STATE_ERROR] = 'E',
|
||||
[BTRFS_FS_STATE_REMOUNTING] = 'M',
|
||||
[BTRFS_FS_STATE_RO] = 0,
|
||||
[BTRFS_FS_STATE_TRANS_ABORTED] = 'A',
|
||||
@ -37,6 +36,11 @@ static void btrfs_state_to_string(const struct btrfs_fs_info *info, char *buf)
|
||||
memcpy(curr, STATE_STRING_PREFACE, sizeof(STATE_STRING_PREFACE));
|
||||
curr += sizeof(STATE_STRING_PREFACE) - 1;
|
||||
|
||||
if (BTRFS_FS_ERROR(info)) {
|
||||
*curr++ = 'E';
|
||||
states_printed = true;
|
||||
}
|
||||
|
||||
for_each_set_bit(bit, &fs_state, sizeof(fs_state)) {
|
||||
WARN_ON_ONCE(bit >= BTRFS_FS_STATE_COUNT);
|
||||
if ((bit < BTRFS_FS_STATE_COUNT) && fs_state_chars[bit]) {
|
||||
@ -155,7 +159,7 @@ void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function
|
||||
* Today we only save the error info to memory. Long term we'll also
|
||||
* send it down to the disk.
|
||||
*/
|
||||
set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
|
||||
WRITE_ONCE(fs_info->fs_error, errno);
|
||||
|
||||
/* Don't go through full error handling during mount. */
|
||||
if (!(sb->s_flags & SB_BORN))
|
||||
@ -252,12 +256,6 @@ void __cold _btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt,
|
||||
}
|
||||
#endif
|
||||
|
||||
void __cold btrfs_print_v0_err(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
btrfs_err(fs_info,
|
||||
"Unsupported V0 extent filesystem detected. Aborting. Please re-create your filesystem with a newer kernel");
|
||||
}
|
||||
|
||||
#if BITS_PER_LONG == 32
|
||||
void __cold btrfs_warn_32bit_limit(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
|
@ -181,8 +181,6 @@ do { \
|
||||
#define ASSERT(expr) (void)(expr)
|
||||
#endif
|
||||
|
||||
void __cold btrfs_print_v0_err(struct btrfs_fs_info *fs_info);
|
||||
|
||||
__printf(5, 6)
|
||||
__cold
|
||||
void __btrfs_handle_fs_error(struct btrfs_fs_info *fs_info, const char *function,
|
||||
|
@ -410,6 +410,10 @@ void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,
|
||||
unsigned long flags;
|
||||
u64 cur = file_offset;
|
||||
|
||||
trace_btrfs_writepage_end_io_hook(inode, file_offset,
|
||||
file_offset + num_bytes - 1,
|
||||
uptodate);
|
||||
|
||||
spin_lock_irqsave(&tree->lock, flags);
|
||||
while (cur < file_offset + num_bytes) {
|
||||
u64 entry_end;
|
||||
@ -736,11 +740,9 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, u64 nr,
|
||||
const u64 range_start, const u64 range_len)
|
||||
{
|
||||
struct btrfs_root *root;
|
||||
struct list_head splice;
|
||||
LIST_HEAD(splice);
|
||||
u64 done;
|
||||
|
||||
INIT_LIST_HEAD(&splice);
|
||||
|
||||
mutex_lock(&fs_info->ordered_operations_mutex);
|
||||
spin_lock(&fs_info->ordered_root_lock);
|
||||
list_splice_init(&fs_info->ordered_roots, &splice);
|
||||
|
@ -95,8 +95,10 @@ static void print_extent_item(const struct extent_buffer *eb, int slot, int type
|
||||
int ref_index = 0;
|
||||
|
||||
if (unlikely(item_size < sizeof(*ei))) {
|
||||
btrfs_print_v0_err(eb->fs_info);
|
||||
btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL);
|
||||
btrfs_err(eb->fs_info,
|
||||
"unexpected extent item size, has %u expect >= %zu",
|
||||
item_size, sizeof(*ei));
|
||||
btrfs_handle_fs_error(eb->fs_info, -EUCLEAN, NULL);
|
||||
}
|
||||
|
||||
ei = btrfs_item_ptr(eb, slot, struct btrfs_extent_item);
|
||||
@ -291,10 +293,6 @@ void btrfs_print_leaf(const struct extent_buffer *l)
|
||||
btrfs_file_extent_num_bytes(l, fi),
|
||||
btrfs_file_extent_ram_bytes(l, fi));
|
||||
break;
|
||||
case BTRFS_EXTENT_REF_V0_KEY:
|
||||
btrfs_print_v0_err(fs_info);
|
||||
btrfs_handle_fs_error(fs_info, -EINVAL, NULL);
|
||||
break;
|
||||
case BTRFS_BLOCK_GROUP_ITEM_KEY:
|
||||
bi = btrfs_item_ptr(l, i,
|
||||
struct btrfs_block_group_item);
|
||||
|
@ -3590,15 +3590,16 @@ btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
|
||||
* going to clear all tracking information for a clean start.
|
||||
*/
|
||||
|
||||
trans = btrfs_join_transaction(fs_info->fs_root);
|
||||
if (IS_ERR(trans)) {
|
||||
trans = btrfs_attach_transaction_barrier(fs_info->fs_root);
|
||||
if (IS_ERR(trans) && trans != ERR_PTR(-ENOENT)) {
|
||||
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
|
||||
return PTR_ERR(trans);
|
||||
}
|
||||
ret = btrfs_commit_transaction(trans);
|
||||
if (ret) {
|
||||
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
|
||||
return ret;
|
||||
} else if (trans != ERR_PTR(-ENOENT)) {
|
||||
ret = btrfs_commit_transaction(trans);
|
||||
if (ret) {
|
||||
fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_RESCAN;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
qgroup_rescan_zero_tracking(fs_info);
|
||||
@ -3757,9 +3758,11 @@ static int try_flush_qgroup(struct btrfs_root *root)
|
||||
goto out;
|
||||
btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
|
||||
|
||||
trans = btrfs_join_transaction(root);
|
||||
trans = btrfs_attach_transaction_barrier(root);
|
||||
if (IS_ERR(trans)) {
|
||||
ret = PTR_ERR(trans);
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
@ -584,8 +584,7 @@ static int rbio_can_merge(struct btrfs_raid_bio *last,
|
||||
if (last->operation == BTRFS_RBIO_PARITY_SCRUB)
|
||||
return 0;
|
||||
|
||||
if (last->operation == BTRFS_RBIO_REBUILD_MISSING ||
|
||||
last->operation == BTRFS_RBIO_READ_REBUILD)
|
||||
if (last->operation == BTRFS_RBIO_READ_REBUILD)
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
@ -784,10 +783,7 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
|
||||
spin_unlock(&rbio->bio_list_lock);
|
||||
spin_unlock(&h->lock);
|
||||
|
||||
if (next->operation == BTRFS_RBIO_READ_REBUILD)
|
||||
start_async_work(next, recover_rbio_work_locked);
|
||||
else if (next->operation == BTRFS_RBIO_REBUILD_MISSING) {
|
||||
steal_rbio(rbio, next);
|
||||
if (next->operation == BTRFS_RBIO_READ_REBUILD) {
|
||||
start_async_work(next, recover_rbio_work_locked);
|
||||
} else if (next->operation == BTRFS_RBIO_WRITE) {
|
||||
steal_rbio(rbio, next);
|
||||
@ -1517,11 +1513,11 @@ static void submit_read_wait_bio_list(struct btrfs_raid_bio *rbio,
|
||||
while ((bio = bio_list_pop(bio_list))) {
|
||||
bio->bi_end_io = raid_wait_read_end_io;
|
||||
|
||||
if (trace_raid56_scrub_read_recover_enabled()) {
|
||||
if (trace_raid56_read_enabled()) {
|
||||
struct raid56_bio_trace_info trace_info = { 0 };
|
||||
|
||||
bio_get_trace_info(rbio, bio, &trace_info);
|
||||
trace_raid56_scrub_read_recover(rbio, bio, &trace_info);
|
||||
trace_raid56_read(rbio, bio, &trace_info);
|
||||
}
|
||||
submit_bio(bio);
|
||||
}
|
||||
@ -1698,8 +1694,7 @@ static int verify_one_sector(struct btrfs_raid_bio *rbio,
|
||||
* If we're rebuilding a read, we have to use pages from the
|
||||
* bio list if possible.
|
||||
*/
|
||||
if ((rbio->operation == BTRFS_RBIO_READ_REBUILD ||
|
||||
rbio->operation == BTRFS_RBIO_REBUILD_MISSING)) {
|
||||
if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
|
||||
sector = sector_in_rbio(rbio, stripe_nr, sector_nr, 0);
|
||||
} else {
|
||||
sector = rbio_stripe_sector(rbio, stripe_nr, sector_nr);
|
||||
@ -1763,8 +1758,7 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
|
||||
* If we're rebuilding a read, we have to use pages from the
|
||||
* bio list if possible.
|
||||
*/
|
||||
if ((rbio->operation == BTRFS_RBIO_READ_REBUILD ||
|
||||
rbio->operation == BTRFS_RBIO_REBUILD_MISSING)) {
|
||||
if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
|
||||
sector = sector_in_rbio(rbio, stripe_nr, sector_nr, 0);
|
||||
} else {
|
||||
sector = rbio_stripe_sector(rbio, stripe_nr, sector_nr);
|
||||
@ -1897,8 +1891,7 @@ static int recover_sectors(struct btrfs_raid_bio *rbio)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
|
||||
rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
|
||||
if (rbio->operation == BTRFS_RBIO_READ_REBUILD) {
|
||||
spin_lock(&rbio->bio_list_lock);
|
||||
set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
|
||||
spin_unlock(&rbio->bio_list_lock);
|
||||
@ -2112,8 +2105,8 @@ static void fill_data_csums(struct btrfs_raid_bio *rbio)
|
||||
goto error;
|
||||
}
|
||||
|
||||
ret = btrfs_lookup_csums_bitmap(csum_root, start, start + len - 1,
|
||||
rbio->csum_buf, rbio->csum_bitmap, false);
|
||||
ret = btrfs_lookup_csums_bitmap(csum_root, NULL, start, start + len - 1,
|
||||
rbio->csum_buf, rbio->csum_bitmap);
|
||||
if (ret < 0)
|
||||
goto error;
|
||||
if (bitmap_empty(rbio->csum_bitmap, len >> fs_info->sectorsize_bits))
|
||||
@ -2198,11 +2191,11 @@ static void submit_write_bios(struct btrfs_raid_bio *rbio,
|
||||
while ((bio = bio_list_pop(bio_list))) {
|
||||
bio->bi_end_io = raid_wait_write_end_io;
|
||||
|
||||
if (trace_raid56_write_stripe_enabled()) {
|
||||
if (trace_raid56_write_enabled()) {
|
||||
struct raid56_bio_trace_info trace_info = { 0 };
|
||||
|
||||
bio_get_trace_info(rbio, bio, &trace_info);
|
||||
trace_raid56_write_stripe(rbio, bio, &trace_info);
|
||||
trace_raid56_write(rbio, bio, &trace_info);
|
||||
}
|
||||
submit_bio(bio);
|
||||
}
|
||||
|
@ -14,7 +14,6 @@ enum btrfs_rbio_ops {
|
||||
BTRFS_RBIO_WRITE,
|
||||
BTRFS_RBIO_READ_REBUILD,
|
||||
BTRFS_RBIO_PARITY_SCRUB,
|
||||
BTRFS_RBIO_REBUILD_MISSING,
|
||||
};
|
||||
|
||||
struct btrfs_raid_bio {
|
||||
|
@ -3006,9 +3006,6 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
|
||||
if (!page)
|
||||
return -ENOMEM;
|
||||
}
|
||||
ret = set_page_extent_mapped(page);
|
||||
if (ret < 0)
|
||||
goto release_page;
|
||||
|
||||
if (PageReadahead(page))
|
||||
page_cache_async_readahead(inode->i_mapping, ra, NULL,
|
||||
@ -3024,6 +3021,15 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We could have lost page private when we dropped the lock to read the
|
||||
* page above, make sure we set_page_extent_mapped here so we have any
|
||||
* of the subpage blocksize stuff we need in place.
|
||||
*/
|
||||
ret = set_page_extent_mapped(page);
|
||||
if (ret < 0)
|
||||
goto release_page;
|
||||
|
||||
page_start = page_offset(page);
|
||||
page_end = page_start + PAGE_SIZE - 1;
|
||||
|
||||
@ -3250,12 +3256,13 @@ static int add_tree_block(struct reloc_control *rc,
|
||||
if (type == BTRFS_TREE_BLOCK_REF_KEY)
|
||||
owner = btrfs_extent_inline_ref_offset(eb, iref);
|
||||
}
|
||||
} else if (unlikely(item_size == sizeof(struct btrfs_extent_item_v0))) {
|
||||
btrfs_print_v0_err(eb->fs_info);
|
||||
btrfs_handle_fs_error(eb->fs_info, -EINVAL, NULL);
|
||||
return -EINVAL;
|
||||
} else {
|
||||
BUG();
|
||||
btrfs_print_leaf(eb);
|
||||
btrfs_err(rc->block_group->fs_info,
|
||||
"unrecognized tree backref at tree block %llu slot %u",
|
||||
eb->start, path->slots[0]);
|
||||
btrfs_release_path(path);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
|
||||
btrfs_release_path(path);
|
||||
@ -3498,6 +3505,8 @@ int find_next_extent(struct reloc_control *rc, struct btrfs_path *path,
|
||||
|
||||
last = rc->block_group->start + rc->block_group->length;
|
||||
while (1) {
|
||||
bool block_found;
|
||||
|
||||
cond_resched();
|
||||
if (rc->search_start >= last) {
|
||||
ret = 1;
|
||||
@ -3548,11 +3557,11 @@ int find_next_extent(struct reloc_control *rc, struct btrfs_path *path,
|
||||
goto next;
|
||||
}
|
||||
|
||||
ret = find_first_extent_bit(&rc->processed_blocks,
|
||||
key.objectid, &start, &end,
|
||||
EXTENT_DIRTY, NULL);
|
||||
block_found = find_first_extent_bit(&rc->processed_blocks,
|
||||
key.objectid, &start, &end,
|
||||
EXTENT_DIRTY, NULL);
|
||||
|
||||
if (ret == 0 && start <= key.objectid) {
|
||||
if (block_found && start <= key.objectid) {
|
||||
btrfs_release_path(path);
|
||||
rc->search_start = end + 1;
|
||||
} else {
|
||||
|
240
fs/btrfs/scrub.c
240
fs/btrfs/scrub.c
@ -43,9 +43,20 @@ struct scrub_ctx;
|
||||
/*
|
||||
* The following value only influences the performance.
|
||||
*
|
||||
* This determines the batch size for stripe submitted in one go.
|
||||
* This detemines how many stripes would be submitted in one go,
|
||||
* which is 512KiB (BTRFS_STRIPE_LEN * SCRUB_STRIPES_PER_GROUP).
|
||||
*/
|
||||
#define SCRUB_STRIPES_PER_SCTX 8 /* That would be 8 64K stripe per-device. */
|
||||
#define SCRUB_STRIPES_PER_GROUP 8
|
||||
|
||||
/*
|
||||
* How many groups we have for each sctx.
|
||||
*
|
||||
* This would be 8M per device, the same value as the old scrub in-flight bios
|
||||
* size limit.
|
||||
*/
|
||||
#define SCRUB_GROUPS_PER_SCTX 16
|
||||
|
||||
#define SCRUB_TOTAL_STRIPES (SCRUB_GROUPS_PER_SCTX * SCRUB_STRIPES_PER_GROUP)
|
||||
|
||||
/*
|
||||
* The following value times PAGE_SIZE needs to be large enough to match the
|
||||
@ -172,9 +183,11 @@ struct scrub_stripe {
|
||||
};
|
||||
|
||||
struct scrub_ctx {
|
||||
struct scrub_stripe stripes[SCRUB_STRIPES_PER_SCTX];
|
||||
struct scrub_stripe stripes[SCRUB_TOTAL_STRIPES];
|
||||
struct scrub_stripe *raid56_data_stripes;
|
||||
struct btrfs_fs_info *fs_info;
|
||||
struct btrfs_path extent_path;
|
||||
struct btrfs_path csum_path;
|
||||
int first_free;
|
||||
int cur_stripe;
|
||||
atomic_t cancel_req;
|
||||
@ -315,10 +328,10 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
|
||||
if (!sctx)
|
||||
return;
|
||||
|
||||
for (i = 0; i < SCRUB_STRIPES_PER_SCTX; i++)
|
||||
for (i = 0; i < SCRUB_TOTAL_STRIPES; i++)
|
||||
release_scrub_stripe(&sctx->stripes[i]);
|
||||
|
||||
kfree(sctx);
|
||||
kvfree(sctx);
|
||||
}
|
||||
|
||||
static void scrub_put_ctx(struct scrub_ctx *sctx)
|
||||
@ -333,13 +346,20 @@ static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
|
||||
struct scrub_ctx *sctx;
|
||||
int i;
|
||||
|
||||
sctx = kzalloc(sizeof(*sctx), GFP_KERNEL);
|
||||
/* Since sctx has inline 128 stripes, it can go beyond 64K easily. Use
|
||||
* kvzalloc().
|
||||
*/
|
||||
sctx = kvzalloc(sizeof(*sctx), GFP_KERNEL);
|
||||
if (!sctx)
|
||||
goto nomem;
|
||||
refcount_set(&sctx->refs, 1);
|
||||
sctx->is_dev_replace = is_dev_replace;
|
||||
sctx->fs_info = fs_info;
|
||||
for (i = 0; i < SCRUB_STRIPES_PER_SCTX; i++) {
|
||||
sctx->extent_path.search_commit_root = 1;
|
||||
sctx->extent_path.skip_locking = 1;
|
||||
sctx->csum_path.search_commit_root = 1;
|
||||
sctx->csum_path.skip_locking = 1;
|
||||
for (i = 0; i < SCRUB_TOTAL_STRIPES; i++) {
|
||||
int ret;
|
||||
|
||||
ret = init_scrub_stripe(fs_info, &sctx->stripes[i]);
|
||||
@ -970,6 +990,9 @@ static void scrub_stripe_report_errors(struct scrub_ctx *sctx,
|
||||
spin_unlock(&sctx->stat_lock);
|
||||
}
|
||||
|
||||
static void scrub_write_sectors(struct scrub_ctx *sctx, struct scrub_stripe *stripe,
|
||||
unsigned long write_bitmap, bool dev_replace);
|
||||
|
||||
/*
|
||||
* The main entrance for all read related scrub work, including:
|
||||
*
|
||||
@ -978,13 +1001,16 @@ static void scrub_stripe_report_errors(struct scrub_ctx *sctx,
|
||||
* - Go through the remaining mirrors and try to read as large blocksize as
|
||||
* possible
|
||||
* - Go through all mirrors (including the failed mirror) sector-by-sector
|
||||
* - Submit writeback for repaired sectors
|
||||
*
|
||||
* Writeback does not happen here, it needs extra synchronization.
|
||||
* Writeback for dev-replace does not happen here, it needs extra
|
||||
* synchronization for zoned devices.
|
||||
*/
|
||||
static void scrub_stripe_read_repair_worker(struct work_struct *work)
|
||||
{
|
||||
struct scrub_stripe *stripe = container_of(work, struct scrub_stripe, work);
|
||||
struct btrfs_fs_info *fs_info = stripe->bg->fs_info;
|
||||
struct scrub_ctx *sctx = stripe->sctx;
|
||||
struct btrfs_fs_info *fs_info = sctx->fs_info;
|
||||
int num_copies = btrfs_num_copies(fs_info, stripe->bg->start,
|
||||
stripe->bg->length);
|
||||
int mirror;
|
||||
@ -1049,7 +1075,23 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work)
|
||||
goto out;
|
||||
}
|
||||
out:
|
||||
scrub_stripe_report_errors(stripe->sctx, stripe);
|
||||
/*
|
||||
* Submit the repaired sectors. For zoned case, we cannot do repair
|
||||
* in-place, but queue the bg to be relocated.
|
||||
*/
|
||||
if (btrfs_is_zoned(fs_info)) {
|
||||
if (!bitmap_empty(&stripe->error_bitmap, stripe->nr_sectors))
|
||||
btrfs_repair_one_zone(fs_info, sctx->stripes[0].bg->start);
|
||||
} else if (!sctx->readonly) {
|
||||
unsigned long repaired;
|
||||
|
||||
bitmap_andnot(&repaired, &stripe->init_error_bitmap,
|
||||
&stripe->error_bitmap, stripe->nr_sectors);
|
||||
scrub_write_sectors(sctx, stripe, repaired, false);
|
||||
wait_scrub_stripe_io(stripe);
|
||||
}
|
||||
|
||||
scrub_stripe_report_errors(sctx, stripe);
|
||||
set_bit(SCRUB_STRIPE_FLAG_REPAIR_DONE, &stripe->state);
|
||||
wake_up(&stripe->repair_wait);
|
||||
}
|
||||
@ -1262,7 +1304,6 @@ static int get_raid56_logic_offset(u64 physical, int num,
|
||||
|
||||
/* Work out the disk rotation on this stripe-set */
|
||||
rot = stripe_nr % map->num_stripes;
|
||||
stripe_nr /= map->num_stripes;
|
||||
/* calculate which stripe this data locates */
|
||||
rot += i;
|
||||
stripe_index = rot % map->num_stripes;
|
||||
@ -1468,6 +1509,8 @@ static void scrub_stripe_reset_bitmaps(struct scrub_stripe *stripe)
|
||||
* Return <0 for error.
|
||||
*/
|
||||
static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
|
||||
struct btrfs_path *extent_path,
|
||||
struct btrfs_path *csum_path,
|
||||
struct btrfs_device *dev, u64 physical,
|
||||
int mirror_num, u64 logical_start,
|
||||
u32 logical_len,
|
||||
@ -1477,7 +1520,6 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
|
||||
struct btrfs_root *extent_root = btrfs_extent_root(fs_info, bg->start);
|
||||
struct btrfs_root *csum_root = btrfs_csum_root(fs_info, bg->start);
|
||||
const u64 logical_end = logical_start + logical_len;
|
||||
struct btrfs_path path = { 0 };
|
||||
u64 cur_logical = logical_start;
|
||||
u64 stripe_end;
|
||||
u64 extent_start;
|
||||
@ -1493,14 +1535,13 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
|
||||
/* The range must be inside the bg. */
|
||||
ASSERT(logical_start >= bg->start && logical_end <= bg->start + bg->length);
|
||||
|
||||
path.search_commit_root = 1;
|
||||
path.skip_locking = 1;
|
||||
|
||||
ret = find_first_extent_item(extent_root, &path, logical_start, logical_len);
|
||||
ret = find_first_extent_item(extent_root, extent_path, logical_start,
|
||||
logical_len);
|
||||
/* Either error or not found. */
|
||||
if (ret)
|
||||
goto out;
|
||||
get_extent_info(&path, &extent_start, &extent_len, &extent_flags, &extent_gen);
|
||||
get_extent_info(extent_path, &extent_start, &extent_len, &extent_flags,
|
||||
&extent_gen);
|
||||
if (extent_flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
|
||||
stripe->nr_meta_extents++;
|
||||
if (extent_flags & BTRFS_EXTENT_FLAG_DATA)
|
||||
@ -1528,7 +1569,7 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
|
||||
|
||||
/* Fill the extent info for the remaining sectors. */
|
||||
while (cur_logical <= stripe_end) {
|
||||
ret = find_first_extent_item(extent_root, &path, cur_logical,
|
||||
ret = find_first_extent_item(extent_root, extent_path, cur_logical,
|
||||
stripe_end - cur_logical + 1);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
@ -1536,7 +1577,7 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
get_extent_info(&path, &extent_start, &extent_len,
|
||||
get_extent_info(extent_path, &extent_start, &extent_len,
|
||||
&extent_flags, &extent_gen);
|
||||
if (extent_flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)
|
||||
stripe->nr_meta_extents++;
|
||||
@ -1561,9 +1602,9 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
|
||||
*/
|
||||
ASSERT(BITS_PER_LONG >= BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits);
|
||||
|
||||
ret = btrfs_lookup_csums_bitmap(csum_root, stripe->logical,
|
||||
stripe_end, stripe->csums,
|
||||
&csum_bitmap, true);
|
||||
ret = btrfs_lookup_csums_bitmap(csum_root, csum_path,
|
||||
stripe->logical, stripe_end,
|
||||
stripe->csums, &csum_bitmap);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (ret > 0)
|
||||
@ -1576,7 +1617,6 @@ static int scrub_find_fill_first_stripe(struct btrfs_block_group *bg,
|
||||
}
|
||||
set_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &stripe->state);
|
||||
out:
|
||||
btrfs_release_path(&path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -1654,6 +1694,28 @@ static bool stripe_has_metadata_error(struct scrub_stripe *stripe)
|
||||
return false;
|
||||
}
|
||||
|
||||
static void submit_initial_group_read(struct scrub_ctx *sctx,
|
||||
unsigned int first_slot,
|
||||
unsigned int nr_stripes)
|
||||
{
|
||||
struct blk_plug plug;
|
||||
|
||||
ASSERT(first_slot < SCRUB_TOTAL_STRIPES);
|
||||
ASSERT(first_slot + nr_stripes <= SCRUB_TOTAL_STRIPES);
|
||||
|
||||
scrub_throttle_dev_io(sctx, sctx->stripes[0].dev,
|
||||
btrfs_stripe_nr_to_offset(nr_stripes));
|
||||
blk_start_plug(&plug);
|
||||
for (int i = 0; i < nr_stripes; i++) {
|
||||
struct scrub_stripe *stripe = &sctx->stripes[first_slot + i];
|
||||
|
||||
/* Those stripes should be initialized. */
|
||||
ASSERT(test_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &stripe->state));
|
||||
scrub_submit_initial_read(sctx, stripe);
|
||||
}
|
||||
blk_finish_plug(&plug);
|
||||
}
|
||||
|
||||
static int flush_scrub_stripes(struct scrub_ctx *sctx)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = sctx->fs_info;
|
||||
@ -1666,11 +1728,11 @@ static int flush_scrub_stripes(struct scrub_ctx *sctx)
|
||||
|
||||
ASSERT(test_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &sctx->stripes[0].state));
|
||||
|
||||
scrub_throttle_dev_io(sctx, sctx->stripes[0].dev,
|
||||
btrfs_stripe_nr_to_offset(nr_stripes));
|
||||
for (int i = 0; i < nr_stripes; i++) {
|
||||
stripe = &sctx->stripes[i];
|
||||
scrub_submit_initial_read(sctx, stripe);
|
||||
/* Submit the stripes which are populated but not submitted. */
|
||||
if (nr_stripes % SCRUB_STRIPES_PER_GROUP) {
|
||||
const int first_slot = round_down(nr_stripes, SCRUB_STRIPES_PER_GROUP);
|
||||
|
||||
submit_initial_group_read(sctx, first_slot, nr_stripes - first_slot);
|
||||
}
|
||||
|
||||
for (int i = 0; i < nr_stripes; i++) {
|
||||
@ -1680,32 +1742,6 @@ static int flush_scrub_stripes(struct scrub_ctx *sctx)
|
||||
test_bit(SCRUB_STRIPE_FLAG_REPAIR_DONE, &stripe->state));
|
||||
}
|
||||
|
||||
/*
|
||||
* Submit the repaired sectors. For zoned case, we cannot do repair
|
||||
* in-place, but queue the bg to be relocated.
|
||||
*/
|
||||
if (btrfs_is_zoned(fs_info)) {
|
||||
for (int i = 0; i < nr_stripes; i++) {
|
||||
stripe = &sctx->stripes[i];
|
||||
|
||||
if (!bitmap_empty(&stripe->error_bitmap, stripe->nr_sectors)) {
|
||||
btrfs_repair_one_zone(fs_info,
|
||||
sctx->stripes[0].bg->start);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (!sctx->readonly) {
|
||||
for (int i = 0; i < nr_stripes; i++) {
|
||||
unsigned long repaired;
|
||||
|
||||
stripe = &sctx->stripes[i];
|
||||
|
||||
bitmap_andnot(&repaired, &stripe->init_error_bitmap,
|
||||
&stripe->error_bitmap, stripe->nr_sectors);
|
||||
scrub_write_sectors(sctx, stripe, repaired, false);
|
||||
}
|
||||
}
|
||||
|
||||
/* Submit for dev-replace. */
|
||||
if (sctx->is_dev_replace) {
|
||||
/*
|
||||
@ -1750,28 +1786,40 @@ static void raid56_scrub_wait_endio(struct bio *bio)
|
||||
|
||||
static int queue_scrub_stripe(struct scrub_ctx *sctx, struct btrfs_block_group *bg,
|
||||
struct btrfs_device *dev, int mirror_num,
|
||||
u64 logical, u32 length, u64 physical)
|
||||
u64 logical, u32 length, u64 physical,
|
||||
u64 *found_logical_ret)
|
||||
{
|
||||
struct scrub_stripe *stripe;
|
||||
int ret;
|
||||
|
||||
/* No available slot, submit all stripes and wait for them. */
|
||||
if (sctx->cur_stripe >= SCRUB_STRIPES_PER_SCTX) {
|
||||
ret = flush_scrub_stripes(sctx);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
/*
|
||||
* There should always be one slot left, as caller filling the last
|
||||
* slot should flush them all.
|
||||
*/
|
||||
ASSERT(sctx->cur_stripe < SCRUB_TOTAL_STRIPES);
|
||||
|
||||
stripe = &sctx->stripes[sctx->cur_stripe];
|
||||
|
||||
/* We can queue one stripe using the remaining slot. */
|
||||
scrub_reset_stripe(stripe);
|
||||
ret = scrub_find_fill_first_stripe(bg, dev, physical, mirror_num,
|
||||
logical, length, stripe);
|
||||
ret = scrub_find_fill_first_stripe(bg, &sctx->extent_path,
|
||||
&sctx->csum_path, dev, physical,
|
||||
mirror_num, logical, length, stripe);
|
||||
/* Either >0 as no more extents or <0 for error. */
|
||||
if (ret)
|
||||
return ret;
|
||||
if (found_logical_ret)
|
||||
*found_logical_ret = stripe->logical;
|
||||
sctx->cur_stripe++;
|
||||
|
||||
/* We filled one group, submit it. */
|
||||
if (sctx->cur_stripe % SCRUB_STRIPES_PER_GROUP == 0) {
|
||||
const int first_slot = sctx->cur_stripe - SCRUB_STRIPES_PER_GROUP;
|
||||
|
||||
submit_initial_group_read(sctx, first_slot, SCRUB_STRIPES_PER_GROUP);
|
||||
}
|
||||
|
||||
/* Last slot used, flush them all. */
|
||||
if (sctx->cur_stripe == SCRUB_TOTAL_STRIPES)
|
||||
return flush_scrub_stripes(sctx);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -1785,6 +1833,8 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
|
||||
struct btrfs_fs_info *fs_info = sctx->fs_info;
|
||||
struct btrfs_raid_bio *rbio;
|
||||
struct btrfs_io_context *bioc = NULL;
|
||||
struct btrfs_path extent_path = { 0 };
|
||||
struct btrfs_path csum_path = { 0 };
|
||||
struct bio *bio;
|
||||
struct scrub_stripe *stripe;
|
||||
bool all_empty = true;
|
||||
@ -1795,6 +1845,16 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
|
||||
|
||||
ASSERT(sctx->raid56_data_stripes);
|
||||
|
||||
/*
|
||||
* For data stripe search, we cannot re-use the same extent/csum paths,
|
||||
* as the data stripe bytenr may be smaller than previous extent. Thus
|
||||
* we have to use our own extent/csum paths.
|
||||
*/
|
||||
extent_path.search_commit_root = 1;
|
||||
extent_path.skip_locking = 1;
|
||||
csum_path.search_commit_root = 1;
|
||||
csum_path.skip_locking = 1;
|
||||
|
||||
for (int i = 0; i < data_stripes; i++) {
|
||||
int stripe_index;
|
||||
int rot;
|
||||
@ -1809,7 +1869,7 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
|
||||
|
||||
scrub_reset_stripe(stripe);
|
||||
set_bit(SCRUB_STRIPE_FLAG_NO_REPORT, &stripe->state);
|
||||
ret = scrub_find_fill_first_stripe(bg,
|
||||
ret = scrub_find_fill_first_stripe(bg, &extent_path, &csum_path,
|
||||
map->stripes[stripe_index].dev, physical, 1,
|
||||
full_stripe_start + btrfs_stripe_nr_to_offset(i),
|
||||
BTRFS_STRIPE_LEN, stripe);
|
||||
@ -1854,24 +1914,6 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
|
||||
/* For now, no zoned support for RAID56. */
|
||||
ASSERT(!btrfs_is_zoned(sctx->fs_info));
|
||||
|
||||
/* Writeback for the repaired sectors. */
|
||||
for (int i = 0; i < data_stripes; i++) {
|
||||
unsigned long repaired;
|
||||
|
||||
stripe = &sctx->raid56_data_stripes[i];
|
||||
|
||||
bitmap_andnot(&repaired, &stripe->init_error_bitmap,
|
||||
&stripe->error_bitmap, stripe->nr_sectors);
|
||||
scrub_write_sectors(sctx, stripe, repaired, false);
|
||||
}
|
||||
|
||||
/* Wait for the above writebacks to finish. */
|
||||
for (int i = 0; i < data_stripes; i++) {
|
||||
stripe = &sctx->raid56_data_stripes[i];
|
||||
|
||||
wait_scrub_stripe_io(stripe);
|
||||
}
|
||||
|
||||
/*
|
||||
* Now all data stripes are properly verified. Check if we have any
|
||||
* unrepaired, if so abort immediately or we could further corrupt the
|
||||
@ -1937,6 +1979,8 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
|
||||
bio_put(bio);
|
||||
btrfs_bio_counter_dec(fs_info);
|
||||
|
||||
btrfs_release_path(&extent_path);
|
||||
btrfs_release_path(&csum_path);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
@ -1958,18 +2002,15 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx,
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = sctx->fs_info;
|
||||
const u64 logical_end = logical_start + logical_length;
|
||||
/* An artificial limit, inherit from old scrub behavior */
|
||||
struct btrfs_path path = { 0 };
|
||||
u64 cur_logical = logical_start;
|
||||
int ret;
|
||||
|
||||
/* The range must be inside the bg */
|
||||
ASSERT(logical_start >= bg->start && logical_end <= bg->start + bg->length);
|
||||
|
||||
path.search_commit_root = 1;
|
||||
path.skip_locking = 1;
|
||||
/* Go through each extent items inside the logical range */
|
||||
while (cur_logical < logical_end) {
|
||||
u64 found_logical;
|
||||
u64 cur_physical = physical + cur_logical - logical_start;
|
||||
|
||||
/* Canceled? */
|
||||
@ -1994,7 +2035,7 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx,
|
||||
|
||||
ret = queue_scrub_stripe(sctx, bg, device, mirror_num,
|
||||
cur_logical, logical_end - cur_logical,
|
||||
cur_physical);
|
||||
cur_physical, &found_logical);
|
||||
if (ret > 0) {
|
||||
/* No more extent, just update the accounting */
|
||||
sctx->stat.last_physical = physical + logical_length;
|
||||
@ -2004,14 +2045,11 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx,
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
ASSERT(sctx->cur_stripe > 0);
|
||||
cur_logical = sctx->stripes[sctx->cur_stripe - 1].logical
|
||||
+ BTRFS_STRIPE_LEN;
|
||||
cur_logical = found_logical + BTRFS_STRIPE_LEN;
|
||||
|
||||
/* Don't hold CPU for too long time */
|
||||
cond_resched();
|
||||
}
|
||||
btrfs_release_path(&path);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -2109,6 +2147,9 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
|
||||
u64 stripe_logical;
|
||||
int stop_loop = 0;
|
||||
|
||||
/* Extent_path should be released by now. */
|
||||
ASSERT(sctx->extent_path.nodes[0] == NULL);
|
||||
|
||||
scrub_blocked_if_needed(fs_info);
|
||||
|
||||
if (sctx->is_dev_replace &&
|
||||
@ -2227,6 +2268,9 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
|
||||
ret2 = flush_scrub_stripes(sctx);
|
||||
if (!ret)
|
||||
ret = ret2;
|
||||
btrfs_release_path(&sctx->extent_path);
|
||||
btrfs_release_path(&sctx->csum_path);
|
||||
|
||||
if (sctx->raid56_data_stripes) {
|
||||
for (int i = 0; i < nr_data_stripes(map); i++)
|
||||
release_scrub_stripe(&sctx->raid56_data_stripes[i]);
|
||||
@ -2711,8 +2755,7 @@ static void scrub_workers_put(struct btrfs_fs_info *fs_info)
|
||||
/*
|
||||
* get a reference count on fs_info->scrub_workers. start worker if necessary
|
||||
*/
|
||||
static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
|
||||
int is_dev_replace)
|
||||
static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct workqueue_struct *scrub_workers = NULL;
|
||||
unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
|
||||
@ -2722,10 +2765,7 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
|
||||
if (refcount_inc_not_zero(&fs_info->scrub_workers_refcnt))
|
||||
return 0;
|
||||
|
||||
if (is_dev_replace)
|
||||
scrub_workers = alloc_ordered_workqueue("btrfs-scrub", flags);
|
||||
else
|
||||
scrub_workers = alloc_workqueue("btrfs-scrub", flags, max_active);
|
||||
scrub_workers = alloc_workqueue("btrfs-scrub", flags, max_active);
|
||||
if (!scrub_workers)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -2777,7 +2817,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
|
||||
if (IS_ERR(sctx))
|
||||
return PTR_ERR(sctx);
|
||||
|
||||
ret = scrub_workers_get(fs_info, is_dev_replace);
|
||||
ret = scrub_workers_get(fs_info);
|
||||
if (ret)
|
||||
goto out_free_ctx;
|
||||
|
||||
|
@ -3685,7 +3685,7 @@ static void tail_append_pending_moves(struct send_ctx *sctx,
|
||||
static int apply_children_dir_moves(struct send_ctx *sctx)
|
||||
{
|
||||
struct pending_dir_move *pm;
|
||||
struct list_head stack;
|
||||
LIST_HEAD(stack);
|
||||
u64 parent_ino = sctx->cur_ino;
|
||||
int ret = 0;
|
||||
|
||||
@ -3693,7 +3693,6 @@ static int apply_children_dir_moves(struct send_ctx *sctx)
|
||||
if (!pm)
|
||||
return 0;
|
||||
|
||||
INIT_LIST_HEAD(&stack);
|
||||
tail_append_pending_moves(sctx, pm, &stack);
|
||||
|
||||
while (!list_empty(&stack)) {
|
||||
@ -4165,7 +4164,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
|
||||
int ret = 0;
|
||||
struct recorded_ref *cur;
|
||||
struct recorded_ref *cur2;
|
||||
struct list_head check_dirs;
|
||||
LIST_HEAD(check_dirs);
|
||||
struct fs_path *valid_path = NULL;
|
||||
u64 ow_inode = 0;
|
||||
u64 ow_gen;
|
||||
@ -4184,7 +4183,6 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
|
||||
* which is always '..'
|
||||
*/
|
||||
BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID);
|
||||
INIT_LIST_HEAD(&check_dirs);
|
||||
|
||||
valid_path = fs_path_alloc();
|
||||
if (!valid_path) {
|
||||
|
@ -389,11 +389,7 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
|
||||
return 0;
|
||||
|
||||
used = btrfs_space_info_used(space_info, true);
|
||||
if (test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags) &&
|
||||
(space_info->flags & BTRFS_BLOCK_GROUP_METADATA))
|
||||
avail = 0;
|
||||
else
|
||||
avail = calc_available_free_space(fs_info, space_info, flush);
|
||||
avail = calc_available_free_space(fs_info, space_info, flush);
|
||||
|
||||
if (used + bytes < space_info->total_bytes + avail)
|
||||
return 1;
|
||||
@ -510,6 +506,7 @@ void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
|
||||
int dump_block_groups)
|
||||
{
|
||||
struct btrfs_block_group *cache;
|
||||
u64 total_avail = 0;
|
||||
int index = 0;
|
||||
|
||||
spin_lock(&info->lock);
|
||||
@ -523,18 +520,27 @@ void btrfs_dump_space_info(struct btrfs_fs_info *fs_info,
|
||||
down_read(&info->groups_sem);
|
||||
again:
|
||||
list_for_each_entry(cache, &info->block_groups[index], list) {
|
||||
u64 avail;
|
||||
|
||||
spin_lock(&cache->lock);
|
||||
avail = cache->length - cache->used - cache->pinned -
|
||||
cache->reserved - cache->delalloc_bytes -
|
||||
cache->bytes_super - cache->zone_unusable;
|
||||
btrfs_info(fs_info,
|
||||
"block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %llu zone_unusable %s",
|
||||
cache->start, cache->length, cache->used, cache->pinned,
|
||||
cache->reserved, cache->zone_unusable,
|
||||
cache->ro ? "[readonly]" : "");
|
||||
"block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %llu delalloc %llu super %llu zone_unusable (%llu bytes available) %s",
|
||||
cache->start, cache->length, cache->used, cache->pinned,
|
||||
cache->reserved, cache->delalloc_bytes,
|
||||
cache->bytes_super, cache->zone_unusable,
|
||||
avail, cache->ro ? "[readonly]" : "");
|
||||
spin_unlock(&cache->lock);
|
||||
btrfs_dump_free_space(cache, bytes);
|
||||
total_avail += avail;
|
||||
}
|
||||
if (++index < BTRFS_NR_RAID_TYPES)
|
||||
goto again;
|
||||
up_read(&info->groups_sem);
|
||||
|
||||
btrfs_info(fs_info, "%llu bytes available across all block groups", total_avail);
|
||||
}
|
||||
|
||||
static inline u64 calc_reclaim_items_nr(const struct btrfs_fs_info *fs_info,
|
||||
@ -715,9 +721,11 @@ static void flush_space(struct btrfs_fs_info *fs_info,
|
||||
else
|
||||
nr = -1;
|
||||
|
||||
trans = btrfs_join_transaction(root);
|
||||
trans = btrfs_join_transaction_nostart(root);
|
||||
if (IS_ERR(trans)) {
|
||||
ret = PTR_ERR(trans);
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
ret = btrfs_run_delayed_items_nr(trans, nr);
|
||||
@ -733,9 +741,11 @@ static void flush_space(struct btrfs_fs_info *fs_info,
|
||||
break;
|
||||
case FLUSH_DELAYED_REFS_NR:
|
||||
case FLUSH_DELAYED_REFS:
|
||||
trans = btrfs_join_transaction(root);
|
||||
trans = btrfs_join_transaction_nostart(root);
|
||||
if (IS_ERR(trans)) {
|
||||
ret = PTR_ERR(trans);
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
if (state == FLUSH_DELAYED_REFS_NR)
|
||||
@ -747,18 +757,6 @@ static void flush_space(struct btrfs_fs_info *fs_info,
|
||||
break;
|
||||
case ALLOC_CHUNK:
|
||||
case ALLOC_CHUNK_FORCE:
|
||||
/*
|
||||
* For metadata space on zoned filesystem, reaching here means we
|
||||
* don't have enough space left in active_total_bytes. Try to
|
||||
* activate a block group first, because we may have inactive
|
||||
* block group already allocated.
|
||||
*/
|
||||
ret = btrfs_zoned_activate_one_bg(fs_info, space_info, false);
|
||||
if (ret < 0)
|
||||
break;
|
||||
else if (ret == 1)
|
||||
break;
|
||||
|
||||
trans = btrfs_join_transaction(root);
|
||||
if (IS_ERR(trans)) {
|
||||
ret = PTR_ERR(trans);
|
||||
@ -770,22 +768,6 @@ static void flush_space(struct btrfs_fs_info *fs_info,
|
||||
CHUNK_ALLOC_FORCE);
|
||||
btrfs_end_transaction(trans);
|
||||
|
||||
/*
|
||||
* For metadata space on zoned filesystem, allocating a new chunk
|
||||
* is not enough. We still need to activate the block * group.
|
||||
* Active the newly allocated block group by (maybe) finishing
|
||||
* a block group.
|
||||
*/
|
||||
if (ret == 1) {
|
||||
ret = btrfs_zoned_activate_one_bg(fs_info, space_info, true);
|
||||
/*
|
||||
* Revert to the original ret regardless we could finish
|
||||
* one block group or not.
|
||||
*/
|
||||
if (ret >= 0)
|
||||
ret = 1;
|
||||
}
|
||||
|
||||
if (ret > 0 || ret == -ENOSPC)
|
||||
ret = 0;
|
||||
break;
|
||||
@ -800,9 +782,18 @@ static void flush_space(struct btrfs_fs_info *fs_info,
|
||||
break;
|
||||
case COMMIT_TRANS:
|
||||
ASSERT(current->journal_info == NULL);
|
||||
trans = btrfs_join_transaction(root);
|
||||
/*
|
||||
* We don't want to start a new transaction, just attach to the
|
||||
* current one or wait it fully commits in case its commit is
|
||||
* happening at the moment. Note: we don't use a nostart join
|
||||
* because that does not wait for a transaction to fully commit
|
||||
* (only for it to be unblocked, state TRANS_STATE_UNBLOCKED).
|
||||
*/
|
||||
trans = btrfs_attach_transaction_barrier(root);
|
||||
if (IS_ERR(trans)) {
|
||||
ret = PTR_ERR(trans);
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
ret = btrfs_commit_transaction(trans);
|
||||
@ -1408,8 +1399,18 @@ static void priority_reclaim_metadata_space(struct btrfs_fs_info *fs_info,
|
||||
}
|
||||
}
|
||||
|
||||
/* Attempt to steal from the global rsv if we can. */
|
||||
if (!steal_from_global_rsv(fs_info, space_info, ticket)) {
|
||||
/*
|
||||
* Attempt to steal from the global rsv if we can, except if the fs was
|
||||
* turned into error mode due to a transaction abort when flushing space
|
||||
* above, in that case fail with the abort error instead of returning
|
||||
* success to the caller if we can steal from the global rsv - this is
|
||||
* just to have caller fail immeditelly instead of later when trying to
|
||||
* modify the fs, making it easier to debug -ENOSPC problems.
|
||||
*/
|
||||
if (BTRFS_FS_ERROR(fs_info)) {
|
||||
ticket->error = BTRFS_FS_ERROR(fs_info);
|
||||
remove_ticket(space_info, ticket);
|
||||
} else if (!steal_from_global_rsv(fs_info, space_info, ticket)) {
|
||||
ticket->error = -ENOSPC;
|
||||
remove_ticket(space_info, ticket);
|
||||
}
|
||||
|
@ -709,12 +709,16 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
|
||||
break;
|
||||
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
|
||||
case Opt_check_integrity_including_extent_data:
|
||||
btrfs_warn(info,
|
||||
"integrity checker is deprecated and will be removed in 6.7");
|
||||
btrfs_info(info,
|
||||
"enabling check integrity including extent data");
|
||||
btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY_DATA);
|
||||
btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
|
||||
break;
|
||||
case Opt_check_integrity:
|
||||
btrfs_warn(info,
|
||||
"integrity checker is deprecated and will be removed in 6.7");
|
||||
btrfs_info(info, "enabling check integrity");
|
||||
btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
|
||||
break;
|
||||
@ -727,6 +731,8 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
|
||||
goto out;
|
||||
}
|
||||
info->check_integrity_print_mask = intarg;
|
||||
btrfs_warn(info,
|
||||
"integrity checker is deprecated and will be removed in 6.7");
|
||||
btrfs_info(info, "check_integrity_print_mask 0x%x",
|
||||
info->check_integrity_print_mask);
|
||||
break;
|
||||
|
@ -414,6 +414,12 @@ static ssize_t supported_sectorsizes_show(struct kobject *kobj,
|
||||
BTRFS_ATTR(static_feature, supported_sectorsizes,
|
||||
supported_sectorsizes_show);
|
||||
|
||||
static ssize_t acl_show(struct kobject *kobj, struct kobj_attribute *a, char *buf)
|
||||
{
|
||||
return sysfs_emit(buf, "%d\n", !!IS_ENABLED(CONFIG_BTRFS_FS_POSIX_ACL));
|
||||
}
|
||||
BTRFS_ATTR(static_feature, acl, acl_show);
|
||||
|
||||
/*
|
||||
* Features which only depend on kernel version.
|
||||
*
|
||||
@ -421,6 +427,7 @@ BTRFS_ATTR(static_feature, supported_sectorsizes,
|
||||
* btrfs_supported_feature_attrs.
|
||||
*/
|
||||
static struct attribute *btrfs_supported_static_feature_attrs[] = {
|
||||
BTRFS_ATTR_PTR(static_feature, acl),
|
||||
BTRFS_ATTR_PTR(static_feature, rmdir_subvol),
|
||||
BTRFS_ATTR_PTR(static_feature, supported_checksums),
|
||||
BTRFS_ATTR_PTR(static_feature, send_stream_version),
|
||||
|
@ -319,86 +319,139 @@ static int test_find_delalloc(u32 sectorsize)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int check_eb_bitmap(unsigned long *bitmap, struct extent_buffer *eb,
|
||||
unsigned long len)
|
||||
static int check_eb_bitmap(unsigned long *bitmap, struct extent_buffer *eb)
|
||||
{
|
||||
unsigned long i;
|
||||
|
||||
for (i = 0; i < len * BITS_PER_BYTE; i++) {
|
||||
for (i = 0; i < eb->len * BITS_PER_BYTE; i++) {
|
||||
int bit, bit1;
|
||||
|
||||
bit = !!test_bit(i, bitmap);
|
||||
bit1 = !!extent_buffer_test_bit(eb, 0, i);
|
||||
if (bit1 != bit) {
|
||||
test_err("bits do not match");
|
||||
u8 has;
|
||||
u8 expect;
|
||||
|
||||
read_extent_buffer(eb, &has, i / BITS_PER_BYTE, 1);
|
||||
expect = bitmap_get_value8(bitmap, ALIGN(i, BITS_PER_BYTE));
|
||||
|
||||
test_err(
|
||||
"bits do not match, start byte 0 bit %lu, byte %lu has 0x%02x expect 0x%02x",
|
||||
i, i / BITS_PER_BYTE, has, expect);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
bit1 = !!extent_buffer_test_bit(eb, i / BITS_PER_BYTE,
|
||||
i % BITS_PER_BYTE);
|
||||
if (bit1 != bit) {
|
||||
test_err("offset bits do not match");
|
||||
u8 has;
|
||||
u8 expect;
|
||||
|
||||
read_extent_buffer(eb, &has, i / BITS_PER_BYTE, 1);
|
||||
expect = bitmap_get_value8(bitmap, ALIGN(i, BITS_PER_BYTE));
|
||||
|
||||
test_err(
|
||||
"bits do not match, start byte %lu bit %lu, byte %lu has 0x%02x expect 0x%02x",
|
||||
i / BITS_PER_BYTE, i % BITS_PER_BYTE,
|
||||
i / BITS_PER_BYTE, has, expect);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
|
||||
unsigned long len)
|
||||
static int test_bitmap_set(const char *name, unsigned long *bitmap,
|
||||
struct extent_buffer *eb,
|
||||
unsigned long byte_start, unsigned long bit_start,
|
||||
unsigned long bit_len)
|
||||
{
|
||||
int ret;
|
||||
|
||||
bitmap_set(bitmap, byte_start * BITS_PER_BYTE + bit_start, bit_len);
|
||||
extent_buffer_bitmap_set(eb, byte_start, bit_start, bit_len);
|
||||
ret = check_eb_bitmap(bitmap, eb);
|
||||
if (ret < 0)
|
||||
test_err("%s test failed", name);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int test_bitmap_clear(const char *name, unsigned long *bitmap,
|
||||
struct extent_buffer *eb,
|
||||
unsigned long byte_start, unsigned long bit_start,
|
||||
unsigned long bit_len)
|
||||
{
|
||||
int ret;
|
||||
|
||||
bitmap_clear(bitmap, byte_start * BITS_PER_BYTE + bit_start, bit_len);
|
||||
extent_buffer_bitmap_clear(eb, byte_start, bit_start, bit_len);
|
||||
ret = check_eb_bitmap(bitmap, eb);
|
||||
if (ret < 0)
|
||||
test_err("%s test failed", name);
|
||||
return ret;
|
||||
}
|
||||
static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb)
|
||||
{
|
||||
unsigned long i, j;
|
||||
unsigned long byte_len = eb->len;
|
||||
u32 x;
|
||||
int ret;
|
||||
|
||||
memset(bitmap, 0, len);
|
||||
memzero_extent_buffer(eb, 0, len);
|
||||
if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
|
||||
test_err("bitmap was not zeroed");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
bitmap_set(bitmap, 0, len * BITS_PER_BYTE);
|
||||
extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE);
|
||||
ret = check_eb_bitmap(bitmap, eb, len);
|
||||
if (ret) {
|
||||
test_err("setting all bits failed");
|
||||
ret = test_bitmap_clear("clear all run 1", bitmap, eb, 0, 0,
|
||||
byte_len * BITS_PER_BYTE);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
bitmap_clear(bitmap, 0, len * BITS_PER_BYTE);
|
||||
extent_buffer_bitmap_clear(eb, 0, 0, len * BITS_PER_BYTE);
|
||||
ret = check_eb_bitmap(bitmap, eb, len);
|
||||
if (ret) {
|
||||
test_err("clearing all bits failed");
|
||||
ret = test_bitmap_set("set all", bitmap, eb, 0, 0, byte_len * BITS_PER_BYTE);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ret = test_bitmap_clear("clear all run 2", bitmap, eb, 0, 0,
|
||||
byte_len * BITS_PER_BYTE);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ret = test_bitmap_set("same byte set", bitmap, eb, 0, 2, 4);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ret = test_bitmap_clear("same byte partial clear", bitmap, eb, 0, 4, 1);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ret = test_bitmap_set("cross byte set", bitmap, eb, 2, 4, 8);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ret = test_bitmap_set("cross multi byte set", bitmap, eb, 4, 4, 24);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ret = test_bitmap_clear("cross byte clear", bitmap, eb, 2, 6, 4);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ret = test_bitmap_clear("cross multi byte clear", bitmap, eb, 4, 6, 20);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Straddling pages test */
|
||||
if (len > PAGE_SIZE) {
|
||||
bitmap_set(bitmap,
|
||||
(PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
|
||||
sizeof(long) * BITS_PER_BYTE);
|
||||
extent_buffer_bitmap_set(eb, PAGE_SIZE - sizeof(long) / 2, 0,
|
||||
sizeof(long) * BITS_PER_BYTE);
|
||||
ret = check_eb_bitmap(bitmap, eb, len);
|
||||
if (ret) {
|
||||
test_err("setting straddling pages failed");
|
||||
if (byte_len > PAGE_SIZE) {
|
||||
ret = test_bitmap_set("cross page set", bitmap, eb,
|
||||
PAGE_SIZE - sizeof(long) / 2, 0,
|
||||
sizeof(long) * BITS_PER_BYTE);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
bitmap_set(bitmap, 0, len * BITS_PER_BYTE);
|
||||
bitmap_clear(bitmap,
|
||||
(PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
|
||||
sizeof(long) * BITS_PER_BYTE);
|
||||
extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE);
|
||||
extent_buffer_bitmap_clear(eb, PAGE_SIZE - sizeof(long) / 2, 0,
|
||||
sizeof(long) * BITS_PER_BYTE);
|
||||
ret = check_eb_bitmap(bitmap, eb, len);
|
||||
if (ret) {
|
||||
test_err("clearing straddling pages failed");
|
||||
ret = test_bitmap_set("cross page set all", bitmap, eb, 0, 0,
|
||||
byte_len * BITS_PER_BYTE);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ret = test_bitmap_clear("cross page clear", bitmap, eb,
|
||||
PAGE_SIZE - sizeof(long) / 2, 0,
|
||||
sizeof(long) * BITS_PER_BYTE);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -406,9 +459,12 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
|
||||
* something repetitive that could miss some hypothetical off-by-n bug.
|
||||
*/
|
||||
x = 0;
|
||||
bitmap_clear(bitmap, 0, len * BITS_PER_BYTE);
|
||||
extent_buffer_bitmap_clear(eb, 0, 0, len * BITS_PER_BYTE);
|
||||
for (i = 0; i < len * BITS_PER_BYTE / 32; i++) {
|
||||
ret = test_bitmap_clear("clear all run 3", bitmap, eb, 0, 0,
|
||||
byte_len * BITS_PER_BYTE);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
for (i = 0; i < byte_len * BITS_PER_BYTE / 32; i++) {
|
||||
x = (0x19660dULL * (u64)x + 0x3c6ef35fULL) & 0xffffffffU;
|
||||
for (j = 0; j < 32; j++) {
|
||||
if (x & (1U << j)) {
|
||||
@ -418,7 +474,7 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
|
||||
}
|
||||
}
|
||||
|
||||
ret = check_eb_bitmap(bitmap, eb, len);
|
||||
ret = check_eb_bitmap(bitmap, eb);
|
||||
if (ret) {
|
||||
test_err("random bit pattern failed");
|
||||
return ret;
|
||||
@ -456,7 +512,7 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = __test_eb_bitmaps(bitmap, eb, nodesize);
|
||||
ret = __test_eb_bitmaps(bitmap, eb);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@ -473,7 +529,7 @@ static int test_eb_bitmaps(u32 sectorsize, u32 nodesize)
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = __test_eb_bitmaps(bitmap, eb, nodesize);
|
||||
ret = __test_eb_bitmaps(bitmap, eb);
|
||||
out:
|
||||
free_extent_buffer(eb);
|
||||
kfree(bitmap);
|
||||
@ -592,6 +648,146 @@ static int test_find_first_clear_extent_bit(void)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void dump_eb_and_memory_contents(struct extent_buffer *eb, void *memory,
|
||||
const char *test_name)
|
||||
{
|
||||
for (int i = 0; i < eb->len; i++) {
|
||||
struct page *page = eb->pages[i >> PAGE_SHIFT];
|
||||
void *addr = page_address(page) + offset_in_page(i);
|
||||
|
||||
if (memcmp(addr, memory + i, 1) != 0) {
|
||||
test_err("%s failed", test_name);
|
||||
test_err("eb and memory diffs at byte %u, eb has 0x%02x memory has 0x%02x",
|
||||
i, *(u8 *)addr, *(u8 *)(memory + i));
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int verify_eb_and_memory(struct extent_buffer *eb, void *memory,
|
||||
const char *test_name)
|
||||
{
|
||||
for (int i = 0; i < (eb->len >> PAGE_SHIFT); i++) {
|
||||
void *eb_addr = page_address(eb->pages[i]);
|
||||
|
||||
if (memcmp(memory + (i << PAGE_SHIFT), eb_addr, PAGE_SIZE) != 0) {
|
||||
dump_eb_and_memory_contents(eb, memory, test_name);
|
||||
return -EUCLEAN;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Init both memory and extent buffer contents to the same randomly generated
|
||||
* contents.
|
||||
*/
|
||||
static void init_eb_and_memory(struct extent_buffer *eb, void *memory)
|
||||
{
|
||||
get_random_bytes(memory, eb->len);
|
||||
write_extent_buffer(eb, memory, 0, eb->len);
|
||||
}
|
||||
|
||||
static int test_eb_mem_ops(u32 sectorsize, u32 nodesize)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info;
|
||||
struct extent_buffer *eb = NULL;
|
||||
void *memory = NULL;
|
||||
int ret;
|
||||
|
||||
test_msg("running extent buffer memory operation tests");
|
||||
|
||||
fs_info = btrfs_alloc_dummy_fs_info(nodesize, sectorsize);
|
||||
if (!fs_info) {
|
||||
test_std_err(TEST_ALLOC_FS_INFO);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
memory = kvzalloc(nodesize, GFP_KERNEL);
|
||||
if (!memory) {
|
||||
test_err("failed to allocate memory");
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
eb = __alloc_dummy_extent_buffer(fs_info, SZ_1M, nodesize);
|
||||
if (!eb) {
|
||||
test_std_err(TEST_ALLOC_EXTENT_BUFFER);
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
init_eb_and_memory(eb, memory);
|
||||
ret = verify_eb_and_memory(eb, memory, "full eb write");
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
memcpy(memory, memory + 16, 16);
|
||||
memcpy_extent_buffer(eb, 0, 16, 16);
|
||||
ret = verify_eb_and_memory(eb, memory, "same page non-overlapping memcpy 1");
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
memcpy(memory, memory + 2048, 16);
|
||||
memcpy_extent_buffer(eb, 0, 2048, 16);
|
||||
ret = verify_eb_and_memory(eb, memory, "same page non-overlapping memcpy 2");
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
memcpy(memory, memory + 2048, 2048);
|
||||
memcpy_extent_buffer(eb, 0, 2048, 2048);
|
||||
ret = verify_eb_and_memory(eb, memory, "same page non-overlapping memcpy 3");
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
memmove(memory + 512, memory + 256, 512);
|
||||
memmove_extent_buffer(eb, 512, 256, 512);
|
||||
ret = verify_eb_and_memory(eb, memory, "same page overlapping memcpy 1");
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
memmove(memory + 2048, memory + 512, 2048);
|
||||
memmove_extent_buffer(eb, 2048, 512, 2048);
|
||||
ret = verify_eb_and_memory(eb, memory, "same page overlapping memcpy 2");
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
memmove(memory + 512, memory + 2048, 2048);
|
||||
memmove_extent_buffer(eb, 512, 2048, 2048);
|
||||
ret = verify_eb_and_memory(eb, memory, "same page overlapping memcpy 3");
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
if (nodesize > PAGE_SIZE) {
|
||||
memcpy(memory, memory + 4096 - 128, 256);
|
||||
memcpy_extent_buffer(eb, 0, 4096 - 128, 256);
|
||||
ret = verify_eb_and_memory(eb, memory, "cross page non-overlapping memcpy 1");
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
memcpy(memory + 4096 - 128, memory + 4096 + 128, 256);
|
||||
memcpy_extent_buffer(eb, 4096 - 128, 4096 + 128, 256);
|
||||
ret = verify_eb_and_memory(eb, memory, "cross page non-overlapping memcpy 2");
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
memmove(memory + 4096 - 128, memory + 4096 - 64, 256);
|
||||
memmove_extent_buffer(eb, 4096 - 128, 4096 - 64, 256);
|
||||
ret = verify_eb_and_memory(eb, memory, "cross page overlapping memcpy 1");
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
memmove(memory + 4096 - 64, memory + 4096 - 128, 256);
|
||||
memmove_extent_buffer(eb, 4096 - 64, 4096 - 128, 256);
|
||||
ret = verify_eb_and_memory(eb, memory, "cross page overlapping memcpy 2");
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
out:
|
||||
free_extent_buffer(eb);
|
||||
kvfree(memory);
|
||||
btrfs_free_dummy_fs_info(fs_info);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btrfs_test_extent_io(u32 sectorsize, u32 nodesize)
|
||||
{
|
||||
int ret;
|
||||
@ -607,6 +803,10 @@ int btrfs_test_extent_io(u32 sectorsize, u32 nodesize)
|
||||
goto out;
|
||||
|
||||
ret = test_eb_bitmaps(sectorsize, nodesize);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = test_eb_mem_ops(sectorsize, nodesize);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <linux/types.h>
|
||||
#include "btrfs-tests.h"
|
||||
#include "../ctree.h"
|
||||
#include "../btrfs_inode.h"
|
||||
#include "../volumes.h"
|
||||
#include "../disk-io.h"
|
||||
#include "../block-group.h"
|
||||
@ -442,6 +443,406 @@ static int test_case_4(struct btrfs_fs_info *fs_info,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int add_compressed_extent(struct extent_map_tree *em_tree,
|
||||
u64 start, u64 len, u64 block_start)
|
||||
{
|
||||
struct extent_map *em;
|
||||
int ret;
|
||||
|
||||
em = alloc_extent_map();
|
||||
if (!em) {
|
||||
test_std_err(TEST_ALLOC_EXTENT_MAP);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
em->start = start;
|
||||
em->len = len;
|
||||
em->block_start = block_start;
|
||||
em->block_len = SZ_4K;
|
||||
set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);
|
||||
write_lock(&em_tree->lock);
|
||||
ret = add_extent_mapping(em_tree, em, 0);
|
||||
write_unlock(&em_tree->lock);
|
||||
free_extent_map(em);
|
||||
if (ret < 0) {
|
||||
test_err("cannot add extent map [%llu, %llu)", start, start + len);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct extent_range {
|
||||
u64 start;
|
||||
u64 len;
|
||||
};
|
||||
|
||||
/* The valid states of the tree after every drop, as described below. */
|
||||
struct extent_range valid_ranges[][7] = {
|
||||
{
|
||||
{ .start = 0, .len = SZ_8K }, /* [0, 8K) */
|
||||
{ .start = SZ_4K * 3, .len = SZ_4K * 3}, /* [12k, 24k) */
|
||||
{ .start = SZ_4K * 6, .len = SZ_4K * 3}, /* [24k, 36k) */
|
||||
{ .start = SZ_32K + SZ_4K, .len = SZ_4K}, /* [36k, 40k) */
|
||||
{ .start = SZ_4K * 10, .len = SZ_4K * 6}, /* [40k, 64k) */
|
||||
},
|
||||
{
|
||||
{ .start = 0, .len = SZ_8K }, /* [0, 8K) */
|
||||
{ .start = SZ_4K * 5, .len = SZ_4K}, /* [20k, 24k) */
|
||||
{ .start = SZ_4K * 6, .len = SZ_4K * 3}, /* [24k, 36k) */
|
||||
{ .start = SZ_32K + SZ_4K, .len = SZ_4K}, /* [36k, 40k) */
|
||||
{ .start = SZ_4K * 10, .len = SZ_4K * 6}, /* [40k, 64k) */
|
||||
},
|
||||
{
|
||||
{ .start = 0, .len = SZ_8K }, /* [0, 8K) */
|
||||
{ .start = SZ_4K * 5, .len = SZ_4K}, /* [20k, 24k) */
|
||||
{ .start = SZ_4K * 6, .len = SZ_4K}, /* [24k, 28k) */
|
||||
{ .start = SZ_32K, .len = SZ_4K}, /* [32k, 36k) */
|
||||
{ .start = SZ_32K + SZ_4K, .len = SZ_4K}, /* [36k, 40k) */
|
||||
{ .start = SZ_4K * 10, .len = SZ_4K * 6}, /* [40k, 64k) */
|
||||
},
|
||||
{
|
||||
{ .start = 0, .len = SZ_8K}, /* [0, 8K) */
|
||||
{ .start = SZ_4K * 5, .len = SZ_4K}, /* [20k, 24k) */
|
||||
{ .start = SZ_4K * 6, .len = SZ_4K}, /* [24k, 28k) */
|
||||
}
|
||||
};
|
||||
|
||||
static int validate_range(struct extent_map_tree *em_tree, int index)
|
||||
{
|
||||
struct rb_node *n;
|
||||
int i;
|
||||
|
||||
for (i = 0, n = rb_first_cached(&em_tree->map);
|
||||
valid_ranges[index][i].len && n;
|
||||
i++, n = rb_next(n)) {
|
||||
struct extent_map *entry = rb_entry(n, struct extent_map, rb_node);
|
||||
|
||||
if (entry->start != valid_ranges[index][i].start) {
|
||||
test_err("mapping has start %llu expected %llu",
|
||||
entry->start, valid_ranges[index][i].start);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (entry->len != valid_ranges[index][i].len) {
|
||||
test_err("mapping has len %llu expected %llu",
|
||||
entry->len, valid_ranges[index][i].len);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We exited because we don't have any more entries in the extent_map
|
||||
* but we still expect more valid entries.
|
||||
*/
|
||||
if (valid_ranges[index][i].len) {
|
||||
test_err("missing an entry");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* We exited the loop but still have entries in the extent map. */
|
||||
if (n) {
|
||||
test_err("we have a left over entry in the extent map we didn't expect");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Test scenario:
|
||||
*
|
||||
* Test the various edge cases of btrfs_drop_extent_map_range, create the
|
||||
* following ranges
|
||||
*
|
||||
* [0, 12k)[12k, 24k)[24k, 36k)[36k, 40k)[40k,64k)
|
||||
*
|
||||
* And then we'll drop:
|
||||
*
|
||||
* [8k, 12k) - test the single front split
|
||||
* [12k, 20k) - test the single back split
|
||||
* [28k, 32k) - test the double split
|
||||
* [32k, 64k) - test whole em dropping
|
||||
*
|
||||
* They'll have the EXTENT_FLAG_COMPRESSED flag set to keep the em tree from
|
||||
* merging the em's.
|
||||
*/
|
||||
static int test_case_5(void)
|
||||
{
|
||||
struct extent_map_tree *em_tree;
|
||||
struct inode *inode;
|
||||
u64 start, end;
|
||||
int ret;
|
||||
|
||||
test_msg("Running btrfs_drop_extent_map_range tests");
|
||||
|
||||
inode = btrfs_new_test_inode();
|
||||
if (!inode) {
|
||||
test_std_err(TEST_ALLOC_INODE);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
em_tree = &BTRFS_I(inode)->extent_tree;
|
||||
|
||||
/* [0, 12k) */
|
||||
ret = add_compressed_extent(em_tree, 0, SZ_4K * 3, 0);
|
||||
if (ret) {
|
||||
test_err("cannot add extent range [0, 12K)");
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* [12k, 24k) */
|
||||
ret = add_compressed_extent(em_tree, SZ_4K * 3, SZ_4K * 3, SZ_4K);
|
||||
if (ret) {
|
||||
test_err("cannot add extent range [12k, 24k)");
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* [24k, 36k) */
|
||||
ret = add_compressed_extent(em_tree, SZ_4K * 6, SZ_4K * 3, SZ_8K);
|
||||
if (ret) {
|
||||
test_err("cannot add extent range [12k, 24k)");
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* [36k, 40k) */
|
||||
ret = add_compressed_extent(em_tree, SZ_32K + SZ_4K, SZ_4K, SZ_4K * 3);
|
||||
if (ret) {
|
||||
test_err("cannot add extent range [12k, 24k)");
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* [40k, 64k) */
|
||||
ret = add_compressed_extent(em_tree, SZ_4K * 10, SZ_4K * 6, SZ_16K);
|
||||
if (ret) {
|
||||
test_err("cannot add extent range [12k, 24k)");
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Drop [8k, 12k) */
|
||||
start = SZ_8K;
|
||||
end = (3 * SZ_4K) - 1;
|
||||
btrfs_drop_extent_map_range(BTRFS_I(inode), start, end, false);
|
||||
ret = validate_range(&BTRFS_I(inode)->extent_tree, 0);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/* Drop [12k, 20k) */
|
||||
start = SZ_4K * 3;
|
||||
end = SZ_16K + SZ_4K - 1;
|
||||
btrfs_drop_extent_map_range(BTRFS_I(inode), start, end, false);
|
||||
ret = validate_range(&BTRFS_I(inode)->extent_tree, 1);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/* Drop [28k, 32k) */
|
||||
start = SZ_32K - SZ_4K;
|
||||
end = SZ_32K - 1;
|
||||
btrfs_drop_extent_map_range(BTRFS_I(inode), start, end, false);
|
||||
ret = validate_range(&BTRFS_I(inode)->extent_tree, 2);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/* Drop [32k, 64k) */
|
||||
start = SZ_32K;
|
||||
end = SZ_64K - 1;
|
||||
btrfs_drop_extent_map_range(BTRFS_I(inode), start, end, false);
|
||||
ret = validate_range(&BTRFS_I(inode)->extent_tree, 3);
|
||||
if (ret)
|
||||
goto out;
|
||||
out:
|
||||
iput(inode);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Test the btrfs_add_extent_mapping helper which will attempt to create an em
|
||||
* for areas between two existing ems. Validate it doesn't do this when there
|
||||
* are two unmerged em's side by side.
|
||||
*/
|
||||
static int test_case_6(struct btrfs_fs_info *fs_info, struct extent_map_tree *em_tree)
|
||||
{
|
||||
struct extent_map *em = NULL;
|
||||
int ret;
|
||||
|
||||
ret = add_compressed_extent(em_tree, 0, SZ_4K, 0);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = add_compressed_extent(em_tree, SZ_4K, SZ_4K, 0);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
em = alloc_extent_map();
|
||||
if (!em) {
|
||||
test_std_err(TEST_ALLOC_EXTENT_MAP);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
em->start = SZ_4K;
|
||||
em->len = SZ_4K;
|
||||
em->block_start = SZ_16K;
|
||||
em->block_len = SZ_16K;
|
||||
write_lock(&em_tree->lock);
|
||||
ret = btrfs_add_extent_mapping(fs_info, em_tree, &em, 0, SZ_8K);
|
||||
write_unlock(&em_tree->lock);
|
||||
|
||||
if (ret != 0) {
|
||||
test_err("got an error when adding our em: %d", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = -EINVAL;
|
||||
if (em->start != 0) {
|
||||
test_err("unexpected em->start at %llu, wanted 0", em->start);
|
||||
goto out;
|
||||
}
|
||||
if (em->len != SZ_4K) {
|
||||
test_err("unexpected em->len %llu, expected 4K", em->len);
|
||||
goto out;
|
||||
}
|
||||
ret = 0;
|
||||
out:
|
||||
free_extent_map(em);
|
||||
free_extent_map_tree(em_tree);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Regression test for btrfs_drop_extent_map_range. Calling with skip_pinned ==
|
||||
* true would mess up the start/end calculations and subsequent splits would be
|
||||
* incorrect.
|
||||
*/
|
||||
static int test_case_7(void)
|
||||
{
|
||||
struct extent_map_tree *em_tree;
|
||||
struct extent_map *em;
|
||||
struct inode *inode;
|
||||
int ret;
|
||||
|
||||
test_msg("Running btrfs_drop_extent_cache with pinned");
|
||||
|
||||
inode = btrfs_new_test_inode();
|
||||
if (!inode) {
|
||||
test_std_err(TEST_ALLOC_INODE);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
em_tree = &BTRFS_I(inode)->extent_tree;
|
||||
|
||||
em = alloc_extent_map();
|
||||
if (!em) {
|
||||
test_std_err(TEST_ALLOC_EXTENT_MAP);
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* [0, 16K), pinned */
|
||||
em->start = 0;
|
||||
em->len = SZ_16K;
|
||||
em->block_start = 0;
|
||||
em->block_len = SZ_4K;
|
||||
set_bit(EXTENT_FLAG_PINNED, &em->flags);
|
||||
write_lock(&em_tree->lock);
|
||||
ret = add_extent_mapping(em_tree, em, 0);
|
||||
write_unlock(&em_tree->lock);
|
||||
if (ret < 0) {
|
||||
test_err("couldn't add extent map");
|
||||
goto out;
|
||||
}
|
||||
free_extent_map(em);
|
||||
|
||||
em = alloc_extent_map();
|
||||
if (!em) {
|
||||
test_std_err(TEST_ALLOC_EXTENT_MAP);
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* [32K, 48K), not pinned */
|
||||
em->start = SZ_32K;
|
||||
em->len = SZ_16K;
|
||||
em->block_start = SZ_32K;
|
||||
em->block_len = SZ_16K;
|
||||
write_lock(&em_tree->lock);
|
||||
ret = add_extent_mapping(em_tree, em, 0);
|
||||
write_unlock(&em_tree->lock);
|
||||
if (ret < 0) {
|
||||
test_err("couldn't add extent map");
|
||||
goto out;
|
||||
}
|
||||
free_extent_map(em);
|
||||
|
||||
/*
|
||||
* Drop [0, 36K) This should skip the [0, 4K) extent and then split the
|
||||
* [32K, 48K) extent.
|
||||
*/
|
||||
btrfs_drop_extent_map_range(BTRFS_I(inode), 0, (36 * SZ_1K) - 1, true);
|
||||
|
||||
/* Make sure our extent maps look sane. */
|
||||
ret = -EINVAL;
|
||||
|
||||
em = lookup_extent_mapping(em_tree, 0, SZ_16K);
|
||||
if (!em) {
|
||||
test_err("didn't find an em at 0 as expected");
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (em->start != 0) {
|
||||
test_err("em->start is %llu, expected 0", em->start);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (em->len != SZ_16K) {
|
||||
test_err("em->len is %llu, expected 16K", em->len);
|
||||
goto out;
|
||||
}
|
||||
|
||||
free_extent_map(em);
|
||||
|
||||
read_lock(&em_tree->lock);
|
||||
em = lookup_extent_mapping(em_tree, SZ_16K, SZ_16K);
|
||||
read_unlock(&em_tree->lock);
|
||||
if (em) {
|
||||
test_err("found an em when we weren't expecting one");
|
||||
goto out;
|
||||
}
|
||||
|
||||
read_lock(&em_tree->lock);
|
||||
em = lookup_extent_mapping(em_tree, SZ_32K, SZ_16K);
|
||||
read_unlock(&em_tree->lock);
|
||||
if (!em) {
|
||||
test_err("didn't find an em at 32K as expected");
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (em->start != (36 * SZ_1K)) {
|
||||
test_err("em->start is %llu, expected 36K", em->start);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (em->len != (12 * SZ_1K)) {
|
||||
test_err("em->len is %llu, expected 12K", em->len);
|
||||
goto out;
|
||||
}
|
||||
|
||||
free_extent_map(em);
|
||||
|
||||
read_lock(&em_tree->lock);
|
||||
em = lookup_extent_mapping(em_tree, 48 * SZ_1K, (u64)-1);
|
||||
read_unlock(&em_tree->lock);
|
||||
if (em) {
|
||||
test_err("found an unexpected em above 48K");
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
free_extent_map(em);
|
||||
iput(inode);
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct rmap_test_vector {
|
||||
u64 raid_type;
|
||||
u64 physical_start;
|
||||
@ -619,6 +1020,17 @@ int btrfs_test_extent_map(void)
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = test_case_4(fs_info, em_tree);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = test_case_5();
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = test_case_6(fs_info, em_tree);
|
||||
if (ret)
|
||||
goto out;
|
||||
ret = test_case_7();
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
test_msg("running rmap tests");
|
||||
for (i = 0; i < ARRAY_SIZE(rmap_tests); i++) {
|
||||
|
@ -292,10 +292,11 @@ static noinline int join_transaction(struct btrfs_fs_info *fs_info,
|
||||
spin_unlock(&fs_info->trans_lock);
|
||||
|
||||
/*
|
||||
* If we are ATTACH, we just want to catch the current transaction,
|
||||
* and commit it. If there is no transaction, just return ENOENT.
|
||||
* If we are ATTACH or TRANS_JOIN_NOSTART, we just want to catch the
|
||||
* current transaction, and commit it. If there is no transaction, just
|
||||
* return ENOENT.
|
||||
*/
|
||||
if (type == TRANS_ATTACH)
|
||||
if (type == TRANS_ATTACH || type == TRANS_JOIN_NOSTART)
|
||||
return -ENOENT;
|
||||
|
||||
/*
|
||||
@ -591,8 +592,13 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
|
||||
u64 delayed_refs_bytes = 0;
|
||||
|
||||
qgroup_reserved = num_items * fs_info->nodesize;
|
||||
ret = btrfs_qgroup_reserve_meta_pertrans(root, qgroup_reserved,
|
||||
enforce_qgroups);
|
||||
/*
|
||||
* Use prealloc for now, as there might be a currently running
|
||||
* transaction that could free this reserved space prematurely
|
||||
* by committing.
|
||||
*/
|
||||
ret = btrfs_qgroup_reserve_meta_prealloc(root, qgroup_reserved,
|
||||
enforce_qgroups, false);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
@ -705,6 +711,14 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
|
||||
h->reloc_reserved = reloc_reserved;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now that we have found a transaction to be a part of, convert the
|
||||
* qgroup reservation from prealloc to pertrans. A different transaction
|
||||
* can't race in and free our pertrans out from under us.
|
||||
*/
|
||||
if (qgroup_reserved)
|
||||
btrfs_qgroup_convert_reserved_meta(root, qgroup_reserved);
|
||||
|
||||
got_it:
|
||||
if (!current->journal_info)
|
||||
current->journal_info = h;
|
||||
@ -752,7 +766,7 @@ start_transaction(struct btrfs_root *root, unsigned int num_items,
|
||||
btrfs_block_rsv_release(fs_info, &fs_info->trans_block_rsv,
|
||||
num_bytes, NULL);
|
||||
reserve_fail:
|
||||
btrfs_qgroup_free_meta_pertrans(root, qgroup_reserved);
|
||||
btrfs_qgroup_free_meta_prealloc(root, qgroup_reserved);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
@ -785,7 +799,10 @@ struct btrfs_trans_handle *btrfs_join_transaction_spacecache(struct btrfs_root *
|
||||
|
||||
/*
|
||||
* Similar to regular join but it never starts a transaction when none is
|
||||
* running or after waiting for the current one to finish.
|
||||
* running or when there's a running one at a state >= TRANS_STATE_UNBLOCKED.
|
||||
* This is similar to btrfs_attach_transaction() but it allows the join to
|
||||
* happen if the transaction commit already started but it's not yet in the
|
||||
* "doing" phase (the state is < TRANS_STATE_COMMIT_DOING).
|
||||
*/
|
||||
struct btrfs_trans_handle *btrfs_join_transaction_nostart(struct btrfs_root *root)
|
||||
{
|
||||
@ -1060,8 +1077,8 @@ int btrfs_write_marked_extents(struct btrfs_fs_info *fs_info,
|
||||
u64 start = 0;
|
||||
u64 end;
|
||||
|
||||
while (!find_first_extent_bit(dirty_pages, start, &start, &end,
|
||||
mark, &cached_state)) {
|
||||
while (find_first_extent_bit(dirty_pages, start, &start, &end,
|
||||
mark, &cached_state)) {
|
||||
bool wait_writeback = false;
|
||||
|
||||
err = convert_extent_bit(dirty_pages, start, end,
|
||||
@ -1114,8 +1131,8 @@ static int __btrfs_wait_marked_extents(struct btrfs_fs_info *fs_info,
|
||||
u64 start = 0;
|
||||
u64 end;
|
||||
|
||||
while (!find_first_extent_bit(dirty_pages, start, &start, &end,
|
||||
EXTENT_NEED_WAIT, &cached_state)) {
|
||||
while (find_first_extent_bit(dirty_pages, start, &start, &end,
|
||||
EXTENT_NEED_WAIT, &cached_state)) {
|
||||
/*
|
||||
* Ignore -ENOMEM errors returned by clear_extent_bit().
|
||||
* When committing the transaction, we'll remove any entries
|
||||
|
@ -4841,13 +4841,11 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_ordered_extent *ordered;
|
||||
struct btrfs_ordered_extent *tmp;
|
||||
struct extent_map *em, *n;
|
||||
struct list_head extents;
|
||||
LIST_HEAD(extents);
|
||||
struct extent_map_tree *tree = &inode->extent_tree;
|
||||
int ret = 0;
|
||||
int num = 0;
|
||||
|
||||
INIT_LIST_HEAD(&extents);
|
||||
|
||||
write_lock(&tree->lock);
|
||||
|
||||
list_for_each_entry_safe(em, n, &tree->modified_extents, list) {
|
||||
@ -6794,8 +6792,8 @@ static int log_new_ancestors(struct btrfs_trans_handle *trans,
|
||||
|
||||
while (true) {
|
||||
struct btrfs_fs_info *fs_info = root->fs_info;
|
||||
struct extent_buffer *leaf = path->nodes[0];
|
||||
int slot = path->slots[0];
|
||||
struct extent_buffer *leaf;
|
||||
int slot;
|
||||
struct btrfs_key search_key;
|
||||
struct inode *inode;
|
||||
u64 ino;
|
||||
|
@ -681,6 +681,14 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
u8 *btrfs_sb_fsid_ptr(struct btrfs_super_block *sb)
|
||||
{
|
||||
bool has_metadata_uuid = (btrfs_super_incompat_flags(sb) &
|
||||
BTRFS_FEATURE_INCOMPAT_METADATA_UUID);
|
||||
|
||||
return has_metadata_uuid ? sb->metadata_uuid : sb->fsid;
|
||||
}
|
||||
|
||||
/*
|
||||
* Handle scanned device having its CHANGING_FSID_V2 flag set and the fs_devices
|
||||
* being created with a disk that has already completed its fsid change. Such
|
||||
@ -833,15 +841,8 @@ static noinline struct btrfs_device *device_list_add(const char *path,
|
||||
found_transid > fs_devices->latest_generation) {
|
||||
memcpy(fs_devices->fsid, disk_super->fsid,
|
||||
BTRFS_FSID_SIZE);
|
||||
|
||||
if (has_metadata_uuid)
|
||||
memcpy(fs_devices->metadata_uuid,
|
||||
disk_super->metadata_uuid,
|
||||
BTRFS_FSID_SIZE);
|
||||
else
|
||||
memcpy(fs_devices->metadata_uuid,
|
||||
disk_super->fsid, BTRFS_FSID_SIZE);
|
||||
|
||||
memcpy(fs_devices->metadata_uuid,
|
||||
btrfs_sb_fsid_ptr(disk_super), BTRFS_FSID_SIZE);
|
||||
fs_devices->fsid_change = false;
|
||||
}
|
||||
}
|
||||
@ -851,8 +852,9 @@ static noinline struct btrfs_device *device_list_add(const char *path,
|
||||
|
||||
if (fs_devices->opened) {
|
||||
btrfs_err(NULL,
|
||||
"device %s belongs to fsid %pU, and the fs is already mounted",
|
||||
path, fs_devices->fsid);
|
||||
"device %s belongs to fsid %pU, and the fs is already mounted, scanned by %s (%d)",
|
||||
path, fs_devices->fsid, current->comm,
|
||||
task_pid_nr(current));
|
||||
mutex_unlock(&fs_devices->device_list_mutex);
|
||||
return ERR_PTR(-EBUSY);
|
||||
}
|
||||
@ -1424,9 +1426,9 @@ static bool contains_pending_extent(struct btrfs_device *device, u64 *start,
|
||||
|
||||
lockdep_assert_held(&device->fs_info->chunk_mutex);
|
||||
|
||||
if (!find_first_extent_bit(&device->alloc_state, *start,
|
||||
&physical_start, &physical_end,
|
||||
CHUNK_ALLOCATED, NULL)) {
|
||||
if (find_first_extent_bit(&device->alloc_state, *start,
|
||||
&physical_start, &physical_end,
|
||||
CHUNK_ALLOCATED, NULL)) {
|
||||
|
||||
if (in_range(physical_start, *start, len) ||
|
||||
in_range(*start, physical_start,
|
||||
@ -1438,18 +1440,18 @@ static bool contains_pending_extent(struct btrfs_device *device, u64 *start,
|
||||
return false;
|
||||
}
|
||||
|
||||
static u64 dev_extent_search_start(struct btrfs_device *device, u64 start)
|
||||
static u64 dev_extent_search_start(struct btrfs_device *device)
|
||||
{
|
||||
switch (device->fs_devices->chunk_alloc_policy) {
|
||||
case BTRFS_CHUNK_ALLOC_REGULAR:
|
||||
return max_t(u64, start, BTRFS_DEVICE_RANGE_RESERVED);
|
||||
return BTRFS_DEVICE_RANGE_RESERVED;
|
||||
case BTRFS_CHUNK_ALLOC_ZONED:
|
||||
/*
|
||||
* We don't care about the starting region like regular
|
||||
* allocator, because we anyway use/reserve the first two zones
|
||||
* for superblock logging.
|
||||
*/
|
||||
return ALIGN(start, device->zone_info->zone_size);
|
||||
return 0;
|
||||
default:
|
||||
BUG();
|
||||
}
|
||||
@ -1581,15 +1583,15 @@ static bool dev_extent_hole_check(struct btrfs_device *device, u64 *hole_start,
|
||||
* correct usable device space, as device extent freed in current transaction
|
||||
* is not reported as available.
|
||||
*/
|
||||
static int find_free_dev_extent_start(struct btrfs_device *device,
|
||||
u64 num_bytes, u64 search_start, u64 *start,
|
||||
u64 *len)
|
||||
static int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
|
||||
u64 *start, u64 *len)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = device->fs_info;
|
||||
struct btrfs_root *root = fs_info->dev_root;
|
||||
struct btrfs_key key;
|
||||
struct btrfs_dev_extent *dev_extent;
|
||||
struct btrfs_path *path;
|
||||
u64 search_start;
|
||||
u64 hole_size;
|
||||
u64 max_hole_start;
|
||||
u64 max_hole_size;
|
||||
@ -1599,7 +1601,7 @@ static int find_free_dev_extent_start(struct btrfs_device *device,
|
||||
int slot;
|
||||
struct extent_buffer *l;
|
||||
|
||||
search_start = dev_extent_search_start(device, search_start);
|
||||
search_start = dev_extent_search_start(device);
|
||||
|
||||
WARN_ON(device->zone_info &&
|
||||
!IS_ALIGNED(num_bytes, device->zone_info->zone_size));
|
||||
@ -1725,13 +1727,6 @@ static int find_free_dev_extent_start(struct btrfs_device *device,
|
||||
return ret;
|
||||
}
|
||||
|
||||
int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
|
||||
u64 *start, u64 *len)
|
||||
{
|
||||
/* FIXME use last free of some kind */
|
||||
return find_free_dev_extent_start(device, num_bytes, 0, start, len);
|
||||
}
|
||||
|
||||
static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
|
||||
struct btrfs_device *device,
|
||||
u64 start, u64 *dev_extent_len)
|
||||
@ -6217,6 +6212,45 @@ static void set_io_stripe(struct btrfs_io_stripe *dst, const struct map_lookup *
|
||||
stripe_offset + btrfs_stripe_nr_to_offset(stripe_nr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Map one logical range to one or more physical ranges.
|
||||
*
|
||||
* @length: (Mandatory) mapped length of this run.
|
||||
* One logical range can be split into different segments
|
||||
* due to factors like zones and RAID0/5/6/10 stripe
|
||||
* boundaries.
|
||||
*
|
||||
* @bioc_ret: (Mandatory) returned btrfs_io_context structure.
|
||||
* which has one or more physical ranges (btrfs_io_stripe)
|
||||
* recorded inside.
|
||||
* Caller should call btrfs_put_bioc() to free it after use.
|
||||
*
|
||||
* @smap: (Optional) single physical range optimization.
|
||||
* If the map request can be fulfilled by one single
|
||||
* physical range, and this is parameter is not NULL,
|
||||
* then @bioc_ret would be NULL, and @smap would be
|
||||
* updated.
|
||||
*
|
||||
* @mirror_num_ret: (Mandatory) returned mirror number if the original
|
||||
* value is 0.
|
||||
*
|
||||
* Mirror number 0 means to choose any live mirrors.
|
||||
*
|
||||
* For non-RAID56 profiles, non-zero mirror_num means
|
||||
* the Nth mirror. (e.g. mirror_num 1 means the first
|
||||
* copy).
|
||||
*
|
||||
* For RAID56 profile, mirror 1 means rebuild from P and
|
||||
* the remaining data stripes.
|
||||
*
|
||||
* For RAID6 profile, mirror > 2 means mark another
|
||||
* data/P stripe error and rebuild from the remaining
|
||||
* stripes..
|
||||
*
|
||||
* @need_raid_map: (Used only for integrity checker) whether the map wants
|
||||
* a full stripe map (including all data and P/Q stripes)
|
||||
* for RAID56. Should always be 1 except integrity checker.
|
||||
*/
|
||||
int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
|
||||
u64 logical, u64 *length,
|
||||
struct btrfs_io_context **bioc_ret,
|
||||
@ -6391,9 +6425,7 @@ int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
|
||||
* I/O context structure.
|
||||
*/
|
||||
if (smap && num_alloc_stripes == 1 &&
|
||||
!((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1) &&
|
||||
(op == BTRFS_MAP_READ || !dev_replace_is_ongoing ||
|
||||
!dev_replace->tgtdev)) {
|
||||
!((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1)) {
|
||||
set_io_stripe(smap, map, stripe_index, stripe_offset, stripe_nr);
|
||||
if (mirror_num_ret)
|
||||
*mirror_num_ret = mirror_num;
|
||||
|
@ -650,8 +650,6 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_uuid_scan_kthread(void *data);
|
||||
bool btrfs_chunk_writeable(struct btrfs_fs_info *fs_info, u64 chunk_offset);
|
||||
int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
|
||||
u64 *start, u64 *max_avail);
|
||||
void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
|
||||
int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_ioctl_get_dev_stats *stats);
|
||||
@ -749,5 +747,6 @@ int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info);
|
||||
bool btrfs_repair_one_zone(struct btrfs_fs_info *fs_info, u64 logical);
|
||||
|
||||
bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr);
|
||||
u8 *btrfs_sb_fsid_ptr(struct btrfs_super_block *sb);
|
||||
|
||||
#endif
|
||||
|
292
fs/btrfs/zoned.c
292
fs/btrfs/zoned.c
@ -65,6 +65,9 @@
|
||||
|
||||
#define SUPER_INFO_SECTORS ((u64)BTRFS_SUPER_INFO_SIZE >> SECTOR_SHIFT)
|
||||
|
||||
static void wait_eb_writebacks(struct btrfs_block_group *block_group);
|
||||
static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_written);
|
||||
|
||||
static inline bool sb_zone_is_full(const struct blk_zone *zone)
|
||||
{
|
||||
return (zone->cond == BLK_ZONE_COND_FULL) ||
|
||||
@ -465,8 +468,8 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
|
||||
* use the cache.
|
||||
*/
|
||||
if (populate_cache && bdev_is_zoned(device->bdev)) {
|
||||
zone_info->zone_cache = vzalloc(sizeof(struct blk_zone) *
|
||||
zone_info->nr_zones);
|
||||
zone_info->zone_cache = vcalloc(zone_info->nr_zones,
|
||||
sizeof(struct blk_zone));
|
||||
if (!zone_info->zone_cache) {
|
||||
btrfs_err_in_rcu(device->fs_info,
|
||||
"zoned: failed to allocate zone cache for %s",
|
||||
@ -1583,19 +1586,9 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache)
|
||||
return;
|
||||
|
||||
WARN_ON(cache->bytes_super != 0);
|
||||
|
||||
/* Check for block groups never get activated */
|
||||
if (test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &cache->fs_info->flags) &&
|
||||
cache->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM) &&
|
||||
!test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags) &&
|
||||
cache->alloc_offset == 0) {
|
||||
unusable = cache->length;
|
||||
free = 0;
|
||||
} else {
|
||||
unusable = (cache->alloc_offset - cache->used) +
|
||||
(cache->length - cache->zone_capacity);
|
||||
free = cache->zone_capacity - cache->alloc_offset;
|
||||
}
|
||||
unusable = (cache->alloc_offset - cache->used) +
|
||||
(cache->length - cache->zone_capacity);
|
||||
free = cache->zone_capacity - cache->alloc_offset;
|
||||
|
||||
/* We only need ->free_space in ALLOC_SEQ block groups */
|
||||
cache->cached = BTRFS_CACHE_FINISHED;
|
||||
@ -1707,10 +1700,21 @@ void btrfs_finish_ordered_zoned(struct btrfs_ordered_extent *ordered)
|
||||
{
|
||||
struct btrfs_inode *inode = BTRFS_I(ordered->inode);
|
||||
struct btrfs_fs_info *fs_info = inode->root->fs_info;
|
||||
struct btrfs_ordered_sum *sum =
|
||||
list_first_entry(&ordered->list, typeof(*sum), list);
|
||||
u64 logical = sum->logical;
|
||||
u64 len = sum->len;
|
||||
struct btrfs_ordered_sum *sum;
|
||||
u64 logical, len;
|
||||
|
||||
/*
|
||||
* Write to pre-allocated region is for the data relocation, and so
|
||||
* it should use WRITE operation. No split/rewrite are necessary.
|
||||
*/
|
||||
if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags))
|
||||
return;
|
||||
|
||||
ASSERT(!list_empty(&ordered->list));
|
||||
/* The ordered->list can be empty in the above pre-alloc case. */
|
||||
sum = list_first_entry(&ordered->list, struct btrfs_ordered_sum, list);
|
||||
logical = sum->logical;
|
||||
len = sum->len;
|
||||
|
||||
while (len < ordered->disk_num_bytes) {
|
||||
sum = list_next_entry(sum, list);
|
||||
@ -1747,41 +1751,121 @@ void btrfs_finish_ordered_zoned(struct btrfs_ordered_extent *ordered)
|
||||
}
|
||||
}
|
||||
|
||||
bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
|
||||
struct extent_buffer *eb,
|
||||
struct btrfs_block_group **cache_ret)
|
||||
static bool check_bg_is_active(struct btrfs_eb_write_context *ctx,
|
||||
struct btrfs_block_group **active_bg)
|
||||
{
|
||||
struct btrfs_block_group *cache;
|
||||
bool ret = true;
|
||||
const struct writeback_control *wbc = ctx->wbc;
|
||||
struct btrfs_block_group *block_group = ctx->zoned_bg;
|
||||
struct btrfs_fs_info *fs_info = block_group->fs_info;
|
||||
|
||||
if (!btrfs_is_zoned(fs_info))
|
||||
if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags))
|
||||
return true;
|
||||
|
||||
cache = btrfs_lookup_block_group(fs_info, eb->start);
|
||||
if (!cache)
|
||||
return true;
|
||||
if (fs_info->treelog_bg == block_group->start) {
|
||||
if (!btrfs_zone_activate(block_group)) {
|
||||
int ret_fin = btrfs_zone_finish_one_bg(fs_info);
|
||||
|
||||
if (cache->meta_write_pointer != eb->start) {
|
||||
btrfs_put_block_group(cache);
|
||||
cache = NULL;
|
||||
ret = false;
|
||||
} else {
|
||||
cache->meta_write_pointer = eb->start + eb->len;
|
||||
if (ret_fin != 1 || !btrfs_zone_activate(block_group))
|
||||
return false;
|
||||
}
|
||||
} else if (*active_bg != block_group) {
|
||||
struct btrfs_block_group *tgt = *active_bg;
|
||||
|
||||
/* zoned_meta_io_lock protects fs_info->active_{meta,system}_bg. */
|
||||
lockdep_assert_held(&fs_info->zoned_meta_io_lock);
|
||||
|
||||
if (tgt) {
|
||||
/*
|
||||
* If there is an unsent IO left in the allocated area,
|
||||
* we cannot wait for them as it may cause a deadlock.
|
||||
*/
|
||||
if (tgt->meta_write_pointer < tgt->start + tgt->alloc_offset) {
|
||||
if (wbc->sync_mode == WB_SYNC_NONE ||
|
||||
(wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync))
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Pivot active metadata/system block group. */
|
||||
btrfs_zoned_meta_io_unlock(fs_info);
|
||||
wait_eb_writebacks(tgt);
|
||||
do_zone_finish(tgt, true);
|
||||
btrfs_zoned_meta_io_lock(fs_info);
|
||||
if (*active_bg == tgt) {
|
||||
btrfs_put_block_group(tgt);
|
||||
*active_bg = NULL;
|
||||
}
|
||||
}
|
||||
if (!btrfs_zone_activate(block_group))
|
||||
return false;
|
||||
if (*active_bg != block_group) {
|
||||
ASSERT(*active_bg == NULL);
|
||||
*active_bg = block_group;
|
||||
btrfs_get_block_group(block_group);
|
||||
}
|
||||
}
|
||||
|
||||
*cache_ret = cache;
|
||||
|
||||
return ret;
|
||||
return true;
|
||||
}
|
||||
|
||||
void btrfs_revert_meta_write_pointer(struct btrfs_block_group *cache,
|
||||
struct extent_buffer *eb)
|
||||
/*
|
||||
* Check if @ctx->eb is aligned to the write pointer.
|
||||
*
|
||||
* Return:
|
||||
* 0: @ctx->eb is at the write pointer. You can write it.
|
||||
* -EAGAIN: There is a hole. The caller should handle the case.
|
||||
* -EBUSY: There is a hole, but the caller can just bail out.
|
||||
*/
|
||||
int btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_eb_write_context *ctx)
|
||||
{
|
||||
if (!btrfs_is_zoned(eb->fs_info) || !cache)
|
||||
return;
|
||||
const struct writeback_control *wbc = ctx->wbc;
|
||||
const struct extent_buffer *eb = ctx->eb;
|
||||
struct btrfs_block_group *block_group = ctx->zoned_bg;
|
||||
|
||||
ASSERT(cache->meta_write_pointer == eb->start + eb->len);
|
||||
cache->meta_write_pointer = eb->start;
|
||||
if (!btrfs_is_zoned(fs_info))
|
||||
return 0;
|
||||
|
||||
if (block_group) {
|
||||
if (block_group->start > eb->start ||
|
||||
block_group->start + block_group->length <= eb->start) {
|
||||
btrfs_put_block_group(block_group);
|
||||
block_group = NULL;
|
||||
ctx->zoned_bg = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (!block_group) {
|
||||
block_group = btrfs_lookup_block_group(fs_info, eb->start);
|
||||
if (!block_group)
|
||||
return 0;
|
||||
ctx->zoned_bg = block_group;
|
||||
}
|
||||
|
||||
if (block_group->meta_write_pointer == eb->start) {
|
||||
struct btrfs_block_group **tgt;
|
||||
|
||||
if (!test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags))
|
||||
return 0;
|
||||
|
||||
if (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM)
|
||||
tgt = &fs_info->active_system_bg;
|
||||
else
|
||||
tgt = &fs_info->active_meta_bg;
|
||||
if (check_bg_is_active(ctx, tgt))
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Since we may release fs_info->zoned_meta_io_lock, someone can already
|
||||
* start writing this eb. In that case, we can just bail out.
|
||||
*/
|
||||
if (block_group->meta_write_pointer > eb->start)
|
||||
return -EBUSY;
|
||||
|
||||
/* If for_sync, this hole will be filled with trasnsaction commit. */
|
||||
if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync)
|
||||
return -EAGAIN;
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
int btrfs_zoned_issue_zeroout(struct btrfs_device *device, u64 physical, u64 length)
|
||||
@ -1879,10 +1963,10 @@ int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev, u64 logical,
|
||||
bool btrfs_zone_activate(struct btrfs_block_group *block_group)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = block_group->fs_info;
|
||||
struct btrfs_space_info *space_info = block_group->space_info;
|
||||
struct map_lookup *map;
|
||||
struct btrfs_device *device;
|
||||
u64 physical;
|
||||
const bool is_data = (block_group->flags & BTRFS_BLOCK_GROUP_DATA);
|
||||
bool ret;
|
||||
int i;
|
||||
|
||||
@ -1891,7 +1975,6 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
|
||||
|
||||
map = block_group->physical_map;
|
||||
|
||||
spin_lock(&space_info->lock);
|
||||
spin_lock(&block_group->lock);
|
||||
if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) {
|
||||
ret = true;
|
||||
@ -1904,30 +1987,44 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
spin_lock(&fs_info->zone_active_bgs_lock);
|
||||
for (i = 0; i < map->num_stripes; i++) {
|
||||
struct btrfs_zoned_device_info *zinfo;
|
||||
int reserved = 0;
|
||||
|
||||
device = map->stripes[i].dev;
|
||||
physical = map->stripes[i].physical;
|
||||
zinfo = device->zone_info;
|
||||
|
||||
if (device->zone_info->max_active_zones == 0)
|
||||
if (zinfo->max_active_zones == 0)
|
||||
continue;
|
||||
|
||||
if (is_data)
|
||||
reserved = zinfo->reserved_active_zones;
|
||||
/*
|
||||
* For the data block group, leave active zones for one
|
||||
* metadata block group and one system block group.
|
||||
*/
|
||||
if (atomic_read(&zinfo->active_zones_left) <= reserved) {
|
||||
ret = false;
|
||||
spin_unlock(&fs_info->zone_active_bgs_lock);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (!btrfs_dev_set_active_zone(device, physical)) {
|
||||
/* Cannot activate the zone */
|
||||
ret = false;
|
||||
spin_unlock(&fs_info->zone_active_bgs_lock);
|
||||
goto out_unlock;
|
||||
}
|
||||
if (!is_data)
|
||||
zinfo->reserved_active_zones--;
|
||||
}
|
||||
spin_unlock(&fs_info->zone_active_bgs_lock);
|
||||
|
||||
/* Successfully activated all the zones */
|
||||
set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags);
|
||||
WARN_ON(block_group->alloc_offset != 0);
|
||||
if (block_group->zone_unusable == block_group->length) {
|
||||
block_group->zone_unusable = block_group->length - block_group->zone_capacity;
|
||||
space_info->bytes_zone_unusable -= block_group->zone_capacity;
|
||||
}
|
||||
spin_unlock(&block_group->lock);
|
||||
btrfs_try_granting_tickets(fs_info, space_info);
|
||||
spin_unlock(&space_info->lock);
|
||||
|
||||
/* For the active block group list */
|
||||
btrfs_get_block_group(block_group);
|
||||
@ -1940,7 +2037,6 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
|
||||
|
||||
out_unlock:
|
||||
spin_unlock(&block_group->lock);
|
||||
spin_unlock(&space_info->lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@ -2006,6 +2102,10 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
|
||||
* and block_group->meta_write_pointer for metadata.
|
||||
*/
|
||||
if (!fully_written) {
|
||||
if (test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) {
|
||||
spin_unlock(&block_group->lock);
|
||||
return -EAGAIN;
|
||||
}
|
||||
spin_unlock(&block_group->lock);
|
||||
|
||||
ret = btrfs_inc_block_group_ro(block_group, false);
|
||||
@ -2034,7 +2134,9 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (block_group->reserved) {
|
||||
if (block_group->reserved ||
|
||||
test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC,
|
||||
&block_group->runtime_flags)) {
|
||||
spin_unlock(&block_group->lock);
|
||||
btrfs_dec_block_group_ro(block_group);
|
||||
return -EAGAIN;
|
||||
@ -2043,6 +2145,9 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
|
||||
|
||||
clear_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags);
|
||||
block_group->alloc_offset = block_group->zone_capacity;
|
||||
if (block_group->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM))
|
||||
block_group->meta_write_pointer = block_group->start +
|
||||
block_group->zone_capacity;
|
||||
block_group->free_space_ctl->free_space = 0;
|
||||
btrfs_clear_treelog_bg(block_group);
|
||||
btrfs_clear_data_reloc_bg(block_group);
|
||||
@ -2052,18 +2157,21 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
|
||||
for (i = 0; i < map->num_stripes; i++) {
|
||||
struct btrfs_device *device = map->stripes[i].dev;
|
||||
const u64 physical = map->stripes[i].physical;
|
||||
struct btrfs_zoned_device_info *zinfo = device->zone_info;
|
||||
|
||||
if (device->zone_info->max_active_zones == 0)
|
||||
if (zinfo->max_active_zones == 0)
|
||||
continue;
|
||||
|
||||
ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
|
||||
physical >> SECTOR_SHIFT,
|
||||
device->zone_info->zone_size >> SECTOR_SHIFT,
|
||||
zinfo->zone_size >> SECTOR_SHIFT,
|
||||
GFP_NOFS);
|
||||
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!(block_group->flags & BTRFS_BLOCK_GROUP_DATA))
|
||||
zinfo->reserved_active_zones++;
|
||||
btrfs_dev_clear_active_zone(device, physical);
|
||||
}
|
||||
|
||||
@ -2102,8 +2210,10 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
|
||||
|
||||
/* Check if there is a device with active zones left */
|
||||
mutex_lock(&fs_info->chunk_mutex);
|
||||
spin_lock(&fs_info->zone_active_bgs_lock);
|
||||
list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
|
||||
struct btrfs_zoned_device_info *zinfo = device->zone_info;
|
||||
int reserved = 0;
|
||||
|
||||
if (!device->bdev)
|
||||
continue;
|
||||
@ -2113,17 +2223,21 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
|
||||
break;
|
||||
}
|
||||
|
||||
if (flags & BTRFS_BLOCK_GROUP_DATA)
|
||||
reserved = zinfo->reserved_active_zones;
|
||||
|
||||
switch (flags & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
|
||||
case 0: /* single */
|
||||
ret = (atomic_read(&zinfo->active_zones_left) >= 1);
|
||||
ret = (atomic_read(&zinfo->active_zones_left) >= (1 + reserved));
|
||||
break;
|
||||
case BTRFS_BLOCK_GROUP_DUP:
|
||||
ret = (atomic_read(&zinfo->active_zones_left) >= 2);
|
||||
ret = (atomic_read(&zinfo->active_zones_left) >= (2 + reserved));
|
||||
break;
|
||||
}
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
spin_unlock(&fs_info->zone_active_bgs_lock);
|
||||
mutex_unlock(&fs_info->chunk_mutex);
|
||||
|
||||
if (!ret)
|
||||
@ -2265,7 +2379,10 @@ void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logica
|
||||
|
||||
/* All relocation extents are written. */
|
||||
if (block_group->start + block_group->alloc_offset == logical + length) {
|
||||
/* Now, release this block group for further allocations. */
|
||||
/*
|
||||
* Now, release this block group for further allocations and
|
||||
* zone finish.
|
||||
*/
|
||||
clear_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC,
|
||||
&block_group->runtime_flags);
|
||||
}
|
||||
@ -2289,7 +2406,8 @@ int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info)
|
||||
|
||||
spin_lock(&block_group->lock);
|
||||
if (block_group->reserved || block_group->alloc_offset == 0 ||
|
||||
(block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM)) {
|
||||
(block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM) ||
|
||||
test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) {
|
||||
spin_unlock(&block_group->lock);
|
||||
continue;
|
||||
}
|
||||
@ -2365,3 +2483,55 @@ int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Reserve zones for one metadata block group, one tree-log block group, and one
|
||||
* system block group.
|
||||
*/
|
||||
void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
|
||||
struct btrfs_block_group *block_group;
|
||||
struct btrfs_device *device;
|
||||
/* Reserve zones for normal SINGLE metadata and tree-log block group. */
|
||||
unsigned int metadata_reserve = 2;
|
||||
/* Reserve a zone for SINGLE system block group. */
|
||||
unsigned int system_reserve = 1;
|
||||
|
||||
if (!test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags))
|
||||
return;
|
||||
|
||||
/*
|
||||
* This function is called from the mount context. So, there is no
|
||||
* parallel process touching the bits. No need for read_seqretry().
|
||||
*/
|
||||
if (fs_info->avail_metadata_alloc_bits & BTRFS_BLOCK_GROUP_DUP)
|
||||
metadata_reserve = 4;
|
||||
if (fs_info->avail_system_alloc_bits & BTRFS_BLOCK_GROUP_DUP)
|
||||
system_reserve = 2;
|
||||
|
||||
/* Apply the reservation on all the devices. */
|
||||
mutex_lock(&fs_devices->device_list_mutex);
|
||||
list_for_each_entry(device, &fs_devices->devices, dev_list) {
|
||||
if (!device->bdev)
|
||||
continue;
|
||||
|
||||
device->zone_info->reserved_active_zones =
|
||||
metadata_reserve + system_reserve;
|
||||
}
|
||||
mutex_unlock(&fs_devices->device_list_mutex);
|
||||
|
||||
/* Release reservation for currently active block groups. */
|
||||
spin_lock(&fs_info->zone_active_bgs_lock);
|
||||
list_for_each_entry(block_group, &fs_info->zone_active_bgs, active_bg_list) {
|
||||
struct map_lookup *map = block_group->physical_map;
|
||||
|
||||
if (!(block_group->flags &
|
||||
(BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)))
|
||||
continue;
|
||||
|
||||
for (int i = 0; i < map->num_stripes; i++)
|
||||
map->stripes[i].dev->zone_info->reserved_active_zones--;
|
||||
}
|
||||
spin_unlock(&fs_info->zone_active_bgs_lock);
|
||||
}
|
||||
|
@ -22,6 +22,11 @@ struct btrfs_zoned_device_info {
|
||||
u8 zone_size_shift;
|
||||
u32 nr_zones;
|
||||
unsigned int max_active_zones;
|
||||
/*
|
||||
* Reserved active zones for one metadata and one system block group.
|
||||
* It can vary per-device depending on the allocation status.
|
||||
*/
|
||||
int reserved_active_zones;
|
||||
atomic_t active_zones_left;
|
||||
unsigned long *seq_zones;
|
||||
unsigned long *empty_zones;
|
||||
@ -58,11 +63,8 @@ void btrfs_redirty_list_add(struct btrfs_transaction *trans,
|
||||
struct extent_buffer *eb);
|
||||
bool btrfs_use_zone_append(struct btrfs_bio *bbio);
|
||||
void btrfs_record_physical_zoned(struct btrfs_bio *bbio);
|
||||
bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
|
||||
struct extent_buffer *eb,
|
||||
struct btrfs_block_group **cache_ret);
|
||||
void btrfs_revert_meta_write_pointer(struct btrfs_block_group *cache,
|
||||
struct extent_buffer *eb);
|
||||
int btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_eb_write_context *ctx);
|
||||
int btrfs_zoned_issue_zeroout(struct btrfs_device *device, u64 physical, u64 length);
|
||||
int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev, u64 logical,
|
||||
u64 physical_start, u64 physical_pos);
|
||||
@ -81,6 +83,7 @@ void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logica
|
||||
int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_space_info *space_info, bool do_finish);
|
||||
void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info);
|
||||
#else /* CONFIG_BLK_DEV_ZONED */
|
||||
static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
|
||||
struct blk_zone *zone)
|
||||
@ -189,17 +192,10 @@ static inline void btrfs_record_physical_zoned(struct btrfs_bio *bbio)
|
||||
{
|
||||
}
|
||||
|
||||
static inline bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
|
||||
struct extent_buffer *eb,
|
||||
struct btrfs_block_group **cache_ret)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline void btrfs_revert_meta_write_pointer(
|
||||
struct btrfs_block_group *cache,
|
||||
struct extent_buffer *eb)
|
||||
static inline int btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
|
||||
struct btrfs_eb_write_context *ctx)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int btrfs_zoned_issue_zeroout(struct btrfs_device *device,
|
||||
@ -262,6 +258,8 @@ static inline int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline void btrfs_check_active_zone_reservation(struct btrfs_fs_info *fs_info) { }
|
||||
|
||||
#endif
|
||||
|
||||
static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)
|
||||
|
@ -375,11 +375,6 @@ void tag_pages_for_writeback(struct address_space *mapping,
|
||||
pgoff_t start, pgoff_t end);
|
||||
|
||||
bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio);
|
||||
void folio_account_redirty(struct folio *folio);
|
||||
static inline void account_page_redirty(struct page *page)
|
||||
{
|
||||
folio_account_redirty(page_folio(page));
|
||||
}
|
||||
bool folio_redirty_for_writepage(struct writeback_control *, struct folio *);
|
||||
bool redirty_page_for_writepage(struct writeback_control *, struct page *);
|
||||
|
||||
|
@ -38,7 +38,6 @@ struct find_free_extent_ctl;
|
||||
__print_symbolic(type, \
|
||||
{ BTRFS_TREE_BLOCK_REF_KEY, "TREE_BLOCK_REF" }, \
|
||||
{ BTRFS_EXTENT_DATA_REF_KEY, "EXTENT_DATA_REF" }, \
|
||||
{ BTRFS_EXTENT_REF_V0_KEY, "EXTENT_REF_V0" }, \
|
||||
{ BTRFS_SHARED_BLOCK_REF_KEY, "SHARED_BLOCK_REF" }, \
|
||||
{ BTRFS_SHARED_DATA_REF_KEY, "SHARED_DATA_REF" })
|
||||
|
||||
@ -2482,7 +2481,7 @@ DECLARE_EVENT_CLASS(btrfs_raid56_bio,
|
||||
__entry->offset, __entry->opf, __entry->physical, __entry->len)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(btrfs_raid56_bio, raid56_read_partial,
|
||||
DEFINE_EVENT(btrfs_raid56_bio, raid56_read,
|
||||
TP_PROTO(const struct btrfs_raid_bio *rbio,
|
||||
const struct bio *bio,
|
||||
const struct raid56_bio_trace_info *trace_info),
|
||||
@ -2490,32 +2489,7 @@ DEFINE_EVENT(btrfs_raid56_bio, raid56_read_partial,
|
||||
TP_ARGS(rbio, bio, trace_info)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(btrfs_raid56_bio, raid56_write_stripe,
|
||||
TP_PROTO(const struct btrfs_raid_bio *rbio,
|
||||
const struct bio *bio,
|
||||
const struct raid56_bio_trace_info *trace_info),
|
||||
|
||||
TP_ARGS(rbio, bio, trace_info)
|
||||
);
|
||||
|
||||
|
||||
DEFINE_EVENT(btrfs_raid56_bio, raid56_scrub_write_stripe,
|
||||
TP_PROTO(const struct btrfs_raid_bio *rbio,
|
||||
const struct bio *bio,
|
||||
const struct raid56_bio_trace_info *trace_info),
|
||||
|
||||
TP_ARGS(rbio, bio, trace_info)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(btrfs_raid56_bio, raid56_scrub_read,
|
||||
TP_PROTO(const struct btrfs_raid_bio *rbio,
|
||||
const struct bio *bio,
|
||||
const struct raid56_bio_trace_info *trace_info),
|
||||
|
||||
TP_ARGS(rbio, bio, trace_info)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(btrfs_raid56_bio, raid56_scrub_read_recover,
|
||||
DEFINE_EVENT(btrfs_raid56_bio, raid56_write,
|
||||
TP_PROTO(const struct btrfs_raid_bio *rbio,
|
||||
const struct bio *bio,
|
||||
const struct raid56_bio_trace_info *trace_info),
|
||||
|
@ -220,7 +220,11 @@
|
||||
|
||||
#define BTRFS_EXTENT_DATA_REF_KEY 178
|
||||
|
||||
#define BTRFS_EXTENT_REF_V0_KEY 180
|
||||
/*
|
||||
* Obsolete key. Defintion removed in 6.6, value may be reused in the future.
|
||||
*
|
||||
* #define BTRFS_EXTENT_REF_V0_KEY 180
|
||||
*/
|
||||
|
||||
#define BTRFS_SHARED_BLOCK_REF_KEY 182
|
||||
|
||||
|
@ -1193,7 +1193,7 @@ static void wb_update_write_bandwidth(struct bdi_writeback *wb,
|
||||
* write_bandwidth = ---------------------------------------------------
|
||||
* period
|
||||
*
|
||||
* @written may have decreased due to folio_account_redirty().
|
||||
* @written may have decreased due to folio_redirty_for_writepage().
|
||||
* Avoid underflowing @bw calculation.
|
||||
*/
|
||||
bw = written - min(written, wb->written_stamp);
|
||||
@ -2711,37 +2711,6 @@ bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio)
|
||||
}
|
||||
EXPORT_SYMBOL(filemap_dirty_folio);
|
||||
|
||||
/**
|
||||
* folio_account_redirty - Manually account for redirtying a page.
|
||||
* @folio: The folio which is being redirtied.
|
||||
*
|
||||
* Most filesystems should call folio_redirty_for_writepage() instead
|
||||
* of this fuction. If your filesystem is doing writeback outside the
|
||||
* context of a writeback_control(), it can call this when redirtying
|
||||
* a folio, to de-account the dirty counters (NR_DIRTIED, WB_DIRTIED,
|
||||
* tsk->nr_dirtied), so that they match the written counters (NR_WRITTEN,
|
||||
* WB_WRITTEN) in long term. The mismatches will lead to systematic errors
|
||||
* in balanced_dirty_ratelimit and the dirty pages position control.
|
||||
*/
|
||||
void folio_account_redirty(struct folio *folio)
|
||||
{
|
||||
struct address_space *mapping = folio->mapping;
|
||||
|
||||
if (mapping && mapping_can_writeback(mapping)) {
|
||||
struct inode *inode = mapping->host;
|
||||
struct bdi_writeback *wb;
|
||||
struct wb_lock_cookie cookie = {};
|
||||
long nr = folio_nr_pages(folio);
|
||||
|
||||
wb = unlocked_inode_to_wb_begin(inode, &cookie);
|
||||
current->nr_dirtied -= nr;
|
||||
node_stat_mod_folio(folio, NR_DIRTIED, -nr);
|
||||
wb_stat_mod(wb, WB_DIRTIED, -nr);
|
||||
unlocked_inode_to_wb_end(inode, &cookie);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(folio_account_redirty);
|
||||
|
||||
/**
|
||||
* folio_redirty_for_writepage - Decline to write a dirty folio.
|
||||
* @wbc: The writeback control.
|
||||
@ -2757,13 +2726,23 @@ EXPORT_SYMBOL(folio_account_redirty);
|
||||
bool folio_redirty_for_writepage(struct writeback_control *wbc,
|
||||
struct folio *folio)
|
||||
{
|
||||
bool ret;
|
||||
struct address_space *mapping = folio->mapping;
|
||||
long nr = folio_nr_pages(folio);
|
||||
bool ret;
|
||||
|
||||
wbc->pages_skipped += nr;
|
||||
ret = filemap_dirty_folio(folio->mapping, folio);
|
||||
folio_account_redirty(folio);
|
||||
ret = filemap_dirty_folio(mapping, folio);
|
||||
if (mapping && mapping_can_writeback(mapping)) {
|
||||
struct inode *inode = mapping->host;
|
||||
struct bdi_writeback *wb;
|
||||
struct wb_lock_cookie cookie = {};
|
||||
|
||||
wb = unlocked_inode_to_wb_begin(inode, &cookie);
|
||||
current->nr_dirtied -= nr;
|
||||
node_stat_mod_folio(folio, NR_DIRTIED, -nr);
|
||||
wb_stat_mod(wb, WB_DIRTIED, -nr);
|
||||
unlocked_inode_to_wb_end(inode, &cookie);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL(folio_redirty_for_writepage);
|
||||
|
Loading…
Reference in New Issue
Block a user