mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-09 14:50:19 +00:00
btrfs: zoned: automatically reclaim zones
When a file gets deleted on a zoned file system, the space freed is not returned back into the block group's free space, but is migrated to zone_unusable. As this zone_unusable space is behind the current write pointer it is not possible to use it for new allocations. In the current implementation a zone is reset once all of the block group's space is accounted as zone unusable. This behaviour can lead to premature ENOSPC errors on a busy file system. Instead of only reclaiming the zone once it is completely unusable, kick off a reclaim job once the amount of unusable bytes exceeds a user configurable threshold between 51% and 100%. It can be set per mounted filesystem via the sysfs tunable bg_reclaim_threshold which is set to 75% by default. Similar to reclaiming unused block groups, these dirty block groups are added to a to_reclaim list and then on a transaction commit, the reclaim process is triggered but after we deleted unused block groups, which will free space for the relocation process. Reviewed-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> Reviewed-by: David Sterba <dsterba@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
parent
f33720657d
commit
18bb8bbf13
@ -1485,6 +1485,97 @@ void btrfs_mark_bg_unused(struct btrfs_block_group *bg)
|
||||
spin_unlock(&fs_info->unused_bgs_lock);
|
||||
}
|
||||
|
||||
void btrfs_reclaim_bgs_work(struct work_struct *work)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info =
|
||||
container_of(work, struct btrfs_fs_info, reclaim_bgs_work);
|
||||
struct btrfs_block_group *bg;
|
||||
struct btrfs_space_info *space_info;
|
||||
int ret;
|
||||
|
||||
if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
|
||||
return;
|
||||
|
||||
if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_BALANCE))
|
||||
return;
|
||||
|
||||
mutex_lock(&fs_info->reclaim_bgs_lock);
|
||||
spin_lock(&fs_info->unused_bgs_lock);
|
||||
while (!list_empty(&fs_info->reclaim_bgs)) {
|
||||
bg = list_first_entry(&fs_info->reclaim_bgs,
|
||||
struct btrfs_block_group,
|
||||
bg_list);
|
||||
list_del_init(&bg->bg_list);
|
||||
|
||||
space_info = bg->space_info;
|
||||
spin_unlock(&fs_info->unused_bgs_lock);
|
||||
|
||||
/* Don't race with allocators so take the groups_sem */
|
||||
down_write(&space_info->groups_sem);
|
||||
|
||||
spin_lock(&bg->lock);
|
||||
if (bg->reserved || bg->pinned || bg->ro) {
|
||||
/*
|
||||
* We want to bail if we made new allocations or have
|
||||
* outstanding allocations in this block group. We do
|
||||
* the ro check in case balance is currently acting on
|
||||
* this block group.
|
||||
*/
|
||||
spin_unlock(&bg->lock);
|
||||
up_write(&space_info->groups_sem);
|
||||
goto next;
|
||||
}
|
||||
spin_unlock(&bg->lock);
|
||||
|
||||
/* Get out fast, in case we're unmounting the filesystem */
|
||||
if (btrfs_fs_closing(fs_info)) {
|
||||
up_write(&space_info->groups_sem);
|
||||
goto next;
|
||||
}
|
||||
|
||||
ret = inc_block_group_ro(bg, 0);
|
||||
up_write(&space_info->groups_sem);
|
||||
if (ret < 0)
|
||||
goto next;
|
||||
|
||||
btrfs_info(fs_info, "reclaiming chunk %llu with %llu%% used",
|
||||
bg->start, div_u64(bg->used * 100, bg->length));
|
||||
trace_btrfs_reclaim_block_group(bg);
|
||||
ret = btrfs_relocate_chunk(fs_info, bg->start);
|
||||
if (ret)
|
||||
btrfs_err(fs_info, "error relocating chunk %llu",
|
||||
bg->start);
|
||||
|
||||
next:
|
||||
btrfs_put_block_group(bg);
|
||||
spin_lock(&fs_info->unused_bgs_lock);
|
||||
}
|
||||
spin_unlock(&fs_info->unused_bgs_lock);
|
||||
mutex_unlock(&fs_info->reclaim_bgs_lock);
|
||||
btrfs_exclop_finish(fs_info);
|
||||
}
|
||||
|
||||
void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info)
|
||||
{
|
||||
spin_lock(&fs_info->unused_bgs_lock);
|
||||
if (!list_empty(&fs_info->reclaim_bgs))
|
||||
queue_work(system_unbound_wq, &fs_info->reclaim_bgs_work);
|
||||
spin_unlock(&fs_info->unused_bgs_lock);
|
||||
}
|
||||
|
||||
void btrfs_mark_bg_to_reclaim(struct btrfs_block_group *bg)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = bg->fs_info;
|
||||
|
||||
spin_lock(&fs_info->unused_bgs_lock);
|
||||
if (list_empty(&bg->bg_list)) {
|
||||
btrfs_get_block_group(bg);
|
||||
trace_btrfs_add_reclaim_block_group(bg);
|
||||
list_add_tail(&bg->bg_list, &fs_info->reclaim_bgs);
|
||||
}
|
||||
spin_unlock(&fs_info->unused_bgs_lock);
|
||||
}
|
||||
|
||||
static int read_bg_from_eb(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
|
||||
struct btrfs_path *path)
|
||||
{
|
||||
@ -3446,6 +3537,16 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
|
||||
}
|
||||
spin_unlock(&info->unused_bgs_lock);
|
||||
|
||||
spin_lock(&info->unused_bgs_lock);
|
||||
while (!list_empty(&info->reclaim_bgs)) {
|
||||
block_group = list_first_entry(&info->reclaim_bgs,
|
||||
struct btrfs_block_group,
|
||||
bg_list);
|
||||
list_del_init(&block_group->bg_list);
|
||||
btrfs_put_block_group(block_group);
|
||||
}
|
||||
spin_unlock(&info->unused_bgs_lock);
|
||||
|
||||
spin_lock(&info->block_group_cache_lock);
|
||||
while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
|
||||
block_group = rb_entry(n, struct btrfs_block_group,
|
||||
|
@ -264,6 +264,9 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
|
||||
u64 group_start, struct extent_map *em);
|
||||
void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_mark_bg_unused(struct btrfs_block_group *bg);
|
||||
void btrfs_reclaim_bgs_work(struct work_struct *work);
|
||||
void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info);
|
||||
void btrfs_mark_bg_to_reclaim(struct btrfs_block_group *bg);
|
||||
int btrfs_read_block_groups(struct btrfs_fs_info *info);
|
||||
int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 bytes_used,
|
||||
u64 type, u64 chunk_offset, u64 size);
|
||||
|
@ -960,6 +960,11 @@ struct btrfs_fs_info {
|
||||
struct work_struct async_data_reclaim_work;
|
||||
struct work_struct preempt_reclaim_work;
|
||||
|
||||
/* Reclaim partially filled block groups in the background */
|
||||
struct work_struct reclaim_bgs_work;
|
||||
struct list_head reclaim_bgs;
|
||||
int bg_reclaim_threshold;
|
||||
|
||||
spinlock_t unused_bgs_lock;
|
||||
struct list_head unused_bgs;
|
||||
struct mutex unused_bg_unpin_mutex;
|
||||
|
@ -1898,6 +1898,13 @@ static int cleaner_kthread(void *arg)
|
||||
* unused block groups.
|
||||
*/
|
||||
btrfs_delete_unused_bgs(fs_info);
|
||||
|
||||
/*
|
||||
* Reclaim block groups in the reclaim_bgs list after we deleted
|
||||
* all unused block_groups. This possibly gives us some more free
|
||||
* space.
|
||||
*/
|
||||
btrfs_reclaim_bgs(fs_info);
|
||||
sleep:
|
||||
clear_and_wake_up_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags);
|
||||
if (kthread_should_park())
|
||||
@ -2886,6 +2893,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
|
||||
INIT_LIST_HEAD(&fs_info->space_info);
|
||||
INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
|
||||
INIT_LIST_HEAD(&fs_info->unused_bgs);
|
||||
INIT_LIST_HEAD(&fs_info->reclaim_bgs);
|
||||
#ifdef CONFIG_BTRFS_DEBUG
|
||||
INIT_LIST_HEAD(&fs_info->allocated_roots);
|
||||
INIT_LIST_HEAD(&fs_info->allocated_ebs);
|
||||
@ -2974,6 +2982,9 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
|
||||
fs_info->swapfile_pins = RB_ROOT;
|
||||
|
||||
fs_info->send_in_progress = 0;
|
||||
|
||||
fs_info->bg_reclaim_threshold = BTRFS_DEFAULT_RECLAIM_THRESH;
|
||||
INIT_WORK(&fs_info->reclaim_bgs_work, btrfs_reclaim_bgs_work);
|
||||
}
|
||||
|
||||
static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block *sb)
|
||||
@ -4332,6 +4343,8 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
|
||||
cancel_work_sync(&fs_info->async_data_reclaim_work);
|
||||
cancel_work_sync(&fs_info->preempt_reclaim_work);
|
||||
|
||||
cancel_work_sync(&fs_info->reclaim_bgs_work);
|
||||
|
||||
/* Cancel or finish ongoing discard work */
|
||||
btrfs_discard_cleanup(fs_info);
|
||||
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <linux/ratelimit.h>
|
||||
#include <linux/error-injection.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include "misc.h"
|
||||
#include "ctree.h"
|
||||
#include "free-space-cache.h"
|
||||
#include "transaction.h"
|
||||
@ -2539,6 +2540,7 @@ out:
|
||||
static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
|
||||
u64 bytenr, u64 size, bool used)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = block_group->fs_info;
|
||||
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
|
||||
u64 offset = bytenr - block_group->start;
|
||||
u64 to_free, to_unusable;
|
||||
@ -2569,8 +2571,13 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
|
||||
}
|
||||
|
||||
/* All the region is now unusable. Mark it as unused and reclaim */
|
||||
if (block_group->zone_unusable == block_group->length)
|
||||
if (block_group->zone_unusable == block_group->length) {
|
||||
btrfs_mark_bg_unused(block_group);
|
||||
} else if (block_group->zone_unusable >=
|
||||
div_factor_fine(block_group->length,
|
||||
fs_info->bg_reclaim_threshold)) {
|
||||
btrfs_mark_bg_to_reclaim(block_group);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -980,6 +980,40 @@ static ssize_t btrfs_read_policy_store(struct kobject *kobj,
|
||||
}
|
||||
BTRFS_ATTR_RW(, read_policy, btrfs_read_policy_show, btrfs_read_policy_store);
|
||||
|
||||
static ssize_t btrfs_bg_reclaim_threshold_show(struct kobject *kobj,
|
||||
struct kobj_attribute *a,
|
||||
char *buf)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
|
||||
ssize_t ret;
|
||||
|
||||
ret = scnprintf(buf, PAGE_SIZE, "%d\n", fs_info->bg_reclaim_threshold);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t btrfs_bg_reclaim_threshold_store(struct kobject *kobj,
|
||||
struct kobj_attribute *a,
|
||||
const char *buf, size_t len)
|
||||
{
|
||||
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
|
||||
int thresh;
|
||||
int ret;
|
||||
|
||||
ret = kstrtoint(buf, 10, &thresh);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (thresh <= 50 || thresh > 100)
|
||||
return -EINVAL;
|
||||
|
||||
fs_info->bg_reclaim_threshold = thresh;
|
||||
|
||||
return len;
|
||||
}
|
||||
BTRFS_ATTR_RW(, bg_reclaim_threshold, btrfs_bg_reclaim_threshold_show,
|
||||
btrfs_bg_reclaim_threshold_store);
|
||||
|
||||
static const struct attribute *btrfs_attrs[] = {
|
||||
BTRFS_ATTR_PTR(, label),
|
||||
BTRFS_ATTR_PTR(, nodesize),
|
||||
@ -991,6 +1025,7 @@ static const struct attribute *btrfs_attrs[] = {
|
||||
BTRFS_ATTR_PTR(, exclusive_operation),
|
||||
BTRFS_ATTR_PTR(, generation),
|
||||
BTRFS_ATTR_PTR(, read_policy),
|
||||
BTRFS_ATTR_PTR(, bg_reclaim_threshold),
|
||||
NULL,
|
||||
};
|
||||
|
||||
|
@ -3098,7 +3098,7 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
|
||||
int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
|
||||
{
|
||||
struct btrfs_root *root = fs_info->chunk_root;
|
||||
struct btrfs_trans_handle *trans;
|
||||
|
@ -484,6 +484,7 @@ void btrfs_describe_block_groups(u64 flags, char *buf, u32 size_buf);
|
||||
int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_recover_balance(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset);
|
||||
int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info);
|
||||
int btrfs_uuid_scan_kthread(void *data);
|
||||
|
@ -9,6 +9,12 @@
|
||||
#include "disk-io.h"
|
||||
#include "block-group.h"
|
||||
|
||||
/*
|
||||
* Block groups with more than this value (percents) of unusable space will be
|
||||
* scheduled for background reclaim.
|
||||
*/
|
||||
#define BTRFS_DEFAULT_RECLAIM_THRESH 75
|
||||
|
||||
struct btrfs_zoned_device_info {
|
||||
/*
|
||||
* Number of zones, zone size and types of zones if bdev is a
|
||||
|
@ -1903,6 +1903,18 @@ DEFINE_EVENT(btrfs__block_group, btrfs_add_unused_block_group,
|
||||
TP_ARGS(bg_cache)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(btrfs__block_group, btrfs_add_reclaim_block_group,
|
||||
TP_PROTO(const struct btrfs_block_group *bg_cache),
|
||||
|
||||
TP_ARGS(bg_cache)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(btrfs__block_group, btrfs_reclaim_block_group,
|
||||
TP_PROTO(const struct btrfs_block_group *bg_cache),
|
||||
|
||||
TP_ARGS(bg_cache)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(btrfs__block_group, btrfs_skip_unused_block_group,
|
||||
TP_PROTO(const struct btrfs_block_group *bg_cache),
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user