From 9acaa64187f9b4cbb75622883c96ea1a893d5431 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 14 Sep 2023 09:07:01 -0700 Subject: [PATCH] btrfs: scrub: implement raid stripe tree support A filesystem that uses the raid stripe tree for logical to physical address translation can't use the regular scrub path, that reads all stripes and then checks if a sector is unused afterwards. When using the raid stripe tree, this will result in lookup errors, as the stripe tree doesn't know the requested logical addresses. In case we're scrubbing a filesystem which uses the RAID stripe tree for multi-device logical to physical address translation, perform an extra block mapping step to get the real on-disk stripe length from the stripe tree when scrubbing the sectors. This prevents a double completion of the btrfs_bio caused by splitting the underlying bio and ultimately a use-after-free. Signed-off-by: Johannes Thumshirn Signed-off-by: David Sterba --- fs/btrfs/bio.c | 2 ++ fs/btrfs/raid-stripe-tree.c | 7 +++- fs/btrfs/scrub.c | 71 +++++++++++++++++++++++++++++++++++++ fs/btrfs/volumes.h | 1 + 4 files changed, 80 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c index ac6bc4f43ffb..ab7ee090b606 100644 --- a/fs/btrfs/bio.c +++ b/fs/btrfs/bio.c @@ -663,6 +663,8 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num) blk_status_t ret; int error; + smap.is_scrub = !bbio->inode; + btrfs_bio_counter_inc_blocked(fs_info); error = btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length, &bioc, &smap, &mirror_num); diff --git a/fs/btrfs/raid-stripe-tree.c b/fs/btrfs/raid-stripe-tree.c index c7e18a85f723..a4451452ab68 100644 --- a/fs/btrfs/raid-stripe-tree.c +++ b/fs/btrfs/raid-stripe-tree.c @@ -171,6 +171,11 @@ int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info, if (!path) return -ENOMEM; + if (stripe->is_scrub) { + path->skip_locking = 1; + path->search_commit_root = 1; + } + ret = btrfs_search_slot(NULL, stripe_root, &stripe_key, path, 0, 0); if (ret < 0) goto free_path; @@ -246,7 +251,7 @@ int btrfs_get_raid_extent_offset(struct btrfs_fs_info *fs_info, out: if (ret > 0) ret = -ENOENT; - if (ret && ret != -EIO) { + if (ret && ret != -EIO && !stripe->is_scrub) { if (IS_ENABLED(CONFIG_BTRFS_DEBUG)) btrfs_print_tree(leaf, 1); btrfs_err(fs_info, diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 7bcab3899ae6..2ac574bde350 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -23,6 +23,7 @@ #include "accessors.h" #include "file-item.h" #include "scrub.h" +#include "raid-stripe-tree.h" /* * This is only the first step towards a full-features scrub. It reads all @@ -1634,6 +1635,71 @@ static void scrub_reset_stripe(struct scrub_stripe *stripe) } } +static void scrub_submit_extent_sector_read(struct scrub_ctx *sctx, + struct scrub_stripe *stripe) +{ + struct btrfs_fs_info *fs_info = stripe->bg->fs_info; + struct btrfs_bio *bbio = NULL; + u64 stripe_len = BTRFS_STRIPE_LEN; + int mirror = stripe->mirror_num; + int i; + + atomic_inc(&stripe->pending_io); + + for_each_set_bit(i, &stripe->extent_sector_bitmap, stripe->nr_sectors) { + struct page *page = scrub_stripe_get_page(stripe, i); + unsigned int pgoff = scrub_stripe_get_page_offset(stripe, i); + + /* The current sector cannot be merged, submit the bio. */ + if (bbio && + ((i > 0 && + !test_bit(i - 1, &stripe->extent_sector_bitmap)) || + bbio->bio.bi_iter.bi_size >= stripe_len)) { + ASSERT(bbio->bio.bi_iter.bi_size); + atomic_inc(&stripe->pending_io); + btrfs_submit_bio(bbio, mirror); + bbio = NULL; + } + + if (!bbio) { + struct btrfs_io_stripe io_stripe = {}; + struct btrfs_io_context *bioc = NULL; + const u64 logical = stripe->logical + + (i << fs_info->sectorsize_bits); + int err; + + bbio = btrfs_bio_alloc(stripe->nr_sectors, REQ_OP_READ, + fs_info, scrub_read_endio, stripe); + bbio->bio.bi_iter.bi_sector = logical >> SECTOR_SHIFT; + + io_stripe.is_scrub = true; + err = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical, + &stripe_len, &bioc, &io_stripe, + &mirror); + btrfs_put_bioc(bioc); + if (err) { + btrfs_bio_end_io(bbio, + errno_to_blk_status(err)); + return; + } + } + + __bio_add_page(&bbio->bio, page, fs_info->sectorsize, pgoff); + } + + if (bbio) { + ASSERT(bbio->bio.bi_iter.bi_size); + atomic_inc(&stripe->pending_io); + btrfs_submit_bio(bbio, mirror); + } + + if (atomic_dec_and_test(&stripe->pending_io)) { + wake_up(&stripe->io_wait); + INIT_WORK(&stripe->work, scrub_stripe_read_repair_worker); + queue_work(stripe->bg->fs_info->scrub_workers, &stripe->work); + } +} + static void scrub_submit_initial_read(struct scrub_ctx *sctx, struct scrub_stripe *stripe) { @@ -1645,6 +1711,11 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx, ASSERT(stripe->mirror_num > 0); ASSERT(test_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &stripe->state)); + if (btrfs_need_stripe_tree_update(fs_info, stripe->bg->flags)) { + scrub_submit_extent_sector_read(sctx, stripe); + return; + } + bbio = btrfs_bio_alloc(SCRUB_STRIPE_PAGES, REQ_OP_READ, fs_info, scrub_read_endio, stripe); diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 000c56a3d2a2..a0e76bb20140 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -393,6 +393,7 @@ struct btrfs_io_stripe { /* Block mapping. */ u64 physical; u64 length; + bool is_scrub; /* For the endio handler. */ struct btrfs_io_context *bioc; };