for-6.5-tag

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmSZ0YUACgkQxWXV+ddt
 WDuF9g/+OsEZflGYIZj11trG0l5HWApnINKqhZ524J0TNy9KxY0KOTqPCOg0O41E
 Vt7uJCMG06+ifvVEby8srjzUZxUutIuMeGIP91VyXUbt+CleAWunxnL8aKcTPS3T
 QxyGx0VmukO2UHYCXhXQLRXo/+zPcBxnk6UgVzcBCIOecOMTB1KCeblBmqd3q86f
 NVFse+BTkmtm86u/1rzEDqgY6lMHQ+jNoHhpRVGpzmSnSX8GELyOW3QnixS0LCo2
 no0vvW0QXkRJ+S68V5zBWlqa3xr21jOYcmON2Ra2G8Etsjx7W5XKS3I9k/4uonxb
 LbITmBwEZWt/aTzmLFT16S5M9BlRqH5Ffmsw7Ls+NDmdvH/f1zM8XeNAb7kpFTrn
 T3aALjkcd65/JFmgyVmzdt4BSmrUkYm0EEmLirQec86HJ4NlQJpJ2B7cfMWKPyal
 +VgaT4S+fLTc/HJD3nObMXTCxZrMf0sBUhU4/QXqL7TTjqqosSn26mlGNUocw7Ty
 HaESk7j2L9TMPt640r1G98j9ND7sWmyBmiYsah8F3MKZCIS892qhtFs0m5g2tA1F
 sjPv9u6M5Pi6ie5Eo8xs+SqKa7TPPVsbZ9XcMRBuzDc5AtUPAm6ii9QVwref8wTq
 qO379jDepgPj4HZkXMzQKxd6rw6wrF854304XhjHZefk+ChhIc4=
 =nN4X
 -----END PGP SIGNATURE-----

Merge tag 'for-6.5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs updates from David Sterba:
 "Mainly core changes, refactoring and optimizations.

  Performance is improved in some areas, overall there may be a
  cumulative improvement due to refactoring that removed lookups in the
  IO path or simplified IO submission tracking.

  Core:

   - submit IO synchronously for fast checksums (crc32c and xxhash),
     remove high priority worker kthread

   - read extent buffer in one go, simplify IO tracking, bio submission
     and locking

   - remove additional tracking of redirtied extent buffers, originally
     added for zoned mode but actually not needed

   - track ordered extent pointer in bio to avoid rbtree lookups during
     IO

   - scrub, use recovered data stripes as cache to avoid unnecessary
     read

   - in zoned mode, optimize logical to physical mappings of extents

   - remove PageError handling, not set by VFS nor writeback

   - cleanups, refactoring, better structure packing

   - lots of error handling improvements

   - more assertions, lockdep annotations

   - print assertion failure with the exact line where it happens

   - tracepoint updates

   - more debugging prints

  Performance:

   - speedup in fsync(), better tracking of inode logged status can
     avoid transaction commit

   - IO path structures track logical offsets in data structures and
     does not need to look it up

  User visible changes:

   - don't commit transaction for every created subvolume, this can
     reduce time when many subvolumes are created in a batch

   - print affected files when relocation fails

   - trigger orphan file cleanup during START_SYNC ioctl

  Notable fixes:

   - fix crash when disabling quota and relocation

   - fix crashes when removing roots from drity list

   - fix transacion abort during relocation when converting from newer
     profiles not covered by fallback

   - in zoned mode, stop reclaiming block groups if filesystem becomes
     read-only

   - fix rare race condition in tree mod log rewind that can miss some
     btree node slots

   - with enabled fsverity, drop up-to-date page bit in case the
     verification fails"

* tag 'for-6.5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (194 commits)
  btrfs: fix race between quota disable and relocation
  btrfs: add comment to struct btrfs_fs_info::dirty_cowonly_roots
  btrfs: fix race when deleting free space root from the dirty cow roots list
  btrfs: fix race when deleting quota root from the dirty cow roots list
  btrfs: tracepoints: also show actual number of the outstanding extents
  btrfs: update i_version in update_dev_time
  btrfs: make btrfs_compressed_bioset static
  btrfs: add handling for RAID1C23/DUP to btrfs_reduce_alloc_profile
  btrfs: scrub: remove btrfs_fs_info::scrub_wr_completion_workers
  btrfs: scrub: remove scrub_ctx::csum_list member
  btrfs: do not BUG_ON after failure to migrate space during truncation
  btrfs: do not BUG_ON on failure to get dir index for new snapshot
  btrfs: send: do not BUG_ON() on unexpected symlink data extent
  btrfs: do not BUG_ON() when dropping inode items from log root
  btrfs: replace BUG_ON() at split_item() with proper error handling
  btrfs: do not BUG_ON() on tree mod log failures at btrfs_del_ptr()
  btrfs: do not BUG_ON() on tree mod log failures at insert_ptr()
  btrfs: do not BUG_ON() on tree mod log failure at insert_new_root()
  btrfs: do not BUG_ON() on tree mod log failures at push_nodes_for_insert()
  btrfs: abort transaction at update_ref_for_cow() when ref count is zero
  ...
This commit is contained in:
Linus Torvalds 2023-06-26 11:41:38 -07:00
commit cc423f6337
75 changed files with 2780 additions and 2710 deletions

View File

@ -71,6 +71,16 @@ bool btrfs_workqueue_normal_congested(const struct btrfs_workqueue *wq)
return atomic_read(&wq->pending) > wq->thresh * 2;
}
static void btrfs_init_workqueue(struct btrfs_workqueue *wq,
struct btrfs_fs_info *fs_info)
{
wq->fs_info = fs_info;
atomic_set(&wq->pending, 0);
INIT_LIST_HEAD(&wq->ordered_list);
spin_lock_init(&wq->list_lock);
spin_lock_init(&wq->thres_lock);
}
struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
const char *name, unsigned int flags,
int limit_active, int thresh)
@ -80,9 +90,9 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
if (!ret)
return NULL;
ret->fs_info = fs_info;
btrfs_init_workqueue(ret, fs_info);
ret->limit_active = limit_active;
atomic_set(&ret->pending, 0);
if (thresh == 0)
thresh = DFT_THRESHOLD;
/* For low threshold, disabling threshold is a better choice */
@ -106,9 +116,33 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
return NULL;
}
INIT_LIST_HEAD(&ret->ordered_list);
spin_lock_init(&ret->list_lock);
spin_lock_init(&ret->thres_lock);
trace_btrfs_workqueue_alloc(ret, name);
return ret;
}
struct btrfs_workqueue *btrfs_alloc_ordered_workqueue(
struct btrfs_fs_info *fs_info, const char *name,
unsigned int flags)
{
struct btrfs_workqueue *ret;
ret = kzalloc(sizeof(*ret), GFP_KERNEL);
if (!ret)
return NULL;
btrfs_init_workqueue(ret, fs_info);
/* Ordered workqueues don't allow @max_active adjustments. */
ret->limit_active = 1;
ret->current_active = 1;
ret->thresh = NO_THRESHOLD;
ret->normal_wq = alloc_ordered_workqueue("btrfs-%s", flags, name);
if (!ret->normal_wq) {
kfree(ret);
return NULL;
}
trace_btrfs_workqueue_alloc(ret, name);
return ret;
}

View File

@ -31,6 +31,9 @@ struct btrfs_workqueue *btrfs_alloc_workqueue(struct btrfs_fs_info *fs_info,
unsigned int flags,
int limit_active,
int thresh);
struct btrfs_workqueue *btrfs_alloc_ordered_workqueue(
struct btrfs_fs_info *fs_info, const char *name,
unsigned int flags);
void btrfs_init_work(struct btrfs_work *work, btrfs_func_t func,
btrfs_func_t ordered_func, btrfs_func_t ordered_free);
void btrfs_queue_work(struct btrfs_workqueue *wq,

View File

@ -27,6 +27,17 @@ struct btrfs_failed_bio {
atomic_t repair_count;
};
/* Is this a data path I/O that needs storage layer checksum and repair? */
static inline bool is_data_bbio(struct btrfs_bio *bbio)
{
return bbio->inode && is_data_inode(&bbio->inode->vfs_inode);
}
static bool bbio_has_ordered_extent(struct btrfs_bio *bbio)
{
return is_data_bbio(bbio) && btrfs_op(&bbio->bio) == BTRFS_MAP_WRITE;
}
/*
* Initialize a btrfs_bio structure. This skips the embedded bio itself as it
* is already initialized by the block layer.
@ -61,20 +72,6 @@ struct btrfs_bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
return bbio;
}
static blk_status_t btrfs_bio_extract_ordered_extent(struct btrfs_bio *bbio)
{
struct btrfs_ordered_extent *ordered;
int ret;
ordered = btrfs_lookup_ordered_extent(bbio->inode, bbio->file_offset);
if (WARN_ON_ONCE(!ordered))
return BLK_STS_IOERR;
ret = btrfs_extract_ordered_extent(bbio, ordered);
btrfs_put_ordered_extent(ordered);
return errno_to_blk_status(ret);
}
static struct btrfs_bio *btrfs_split_bio(struct btrfs_fs_info *fs_info,
struct btrfs_bio *orig_bbio,
u64 map_length, bool use_append)
@ -95,13 +92,41 @@ static struct btrfs_bio *btrfs_split_bio(struct btrfs_fs_info *fs_info,
btrfs_bio_init(bbio, fs_info, NULL, orig_bbio);
bbio->inode = orig_bbio->inode;
bbio->file_offset = orig_bbio->file_offset;
if (!(orig_bbio->bio.bi_opf & REQ_BTRFS_ONE_ORDERED))
orig_bbio->file_offset += map_length;
orig_bbio->file_offset += map_length;
if (bbio_has_ordered_extent(bbio)) {
refcount_inc(&orig_bbio->ordered->refs);
bbio->ordered = orig_bbio->ordered;
}
atomic_inc(&orig_bbio->pending_ios);
return bbio;
}
/* Free a bio that was never submitted to the underlying device. */
static void btrfs_cleanup_bio(struct btrfs_bio *bbio)
{
if (bbio_has_ordered_extent(bbio))
btrfs_put_ordered_extent(bbio->ordered);
bio_put(&bbio->bio);
}
static void __btrfs_bio_end_io(struct btrfs_bio *bbio)
{
if (bbio_has_ordered_extent(bbio)) {
struct btrfs_ordered_extent *ordered = bbio->ordered;
bbio->end_io(bbio);
btrfs_put_ordered_extent(ordered);
} else {
bbio->end_io(bbio);
}
}
void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
{
bbio->bio.bi_status = status;
__btrfs_bio_end_io(bbio);
}
static void btrfs_orig_write_end_io(struct bio *bio);
static void btrfs_bbio_propagate_error(struct btrfs_bio *bbio,
@ -130,12 +155,12 @@ static void btrfs_orig_bbio_end_io(struct btrfs_bio *bbio)
if (bbio->bio.bi_status)
btrfs_bbio_propagate_error(bbio, orig_bbio);
bio_put(&bbio->bio);
btrfs_cleanup_bio(bbio);
bbio = orig_bbio;
}
if (atomic_dec_and_test(&bbio->pending_ios))
bbio->end_io(bbio);
__btrfs_bio_end_io(bbio);
}
static int next_repair_mirror(struct btrfs_failed_bio *fbio, int cur_mirror)
@ -327,7 +352,7 @@ static void btrfs_end_bio_work(struct work_struct *work)
struct btrfs_bio *bbio = container_of(work, struct btrfs_bio, end_io_work);
/* Metadata reads are checked and repaired by the submitter. */
if (bbio->inode && !(bbio->bio.bi_opf & REQ_META))
if (is_data_bbio(bbio))
btrfs_check_read_bio(bbio, bbio->bio.bi_private);
else
btrfs_orig_bbio_end_io(bbio);
@ -348,7 +373,7 @@ static void btrfs_simple_end_io(struct bio *bio)
INIT_WORK(&bbio->end_io_work, btrfs_end_bio_work);
queue_work(btrfs_end_io_wq(fs_info, bio), &bbio->end_io_work);
} else {
if (bio_op(bio) == REQ_OP_ZONE_APPEND)
if (bio_op(bio) == REQ_OP_ZONE_APPEND && !bio->bi_status)
btrfs_record_physical_zoned(bbio);
btrfs_orig_bbio_end_io(bbio);
}
@ -361,8 +386,7 @@ static void btrfs_raid56_end_io(struct bio *bio)
btrfs_bio_counter_dec(bioc->fs_info);
bbio->mirror_num = bioc->mirror_num;
if (bio_op(bio) == REQ_OP_READ && bbio->inode &&
!(bbio->bio.bi_opf & REQ_META))
if (bio_op(bio) == REQ_OP_READ && is_data_bbio(bbio))
btrfs_check_read_bio(bbio, NULL);
else
btrfs_orig_bbio_end_io(bbio);
@ -472,13 +496,12 @@ static void btrfs_submit_mirrored_bio(struct btrfs_io_context *bioc, int dev_nr)
static void __btrfs_submit_bio(struct bio *bio, struct btrfs_io_context *bioc,
struct btrfs_io_stripe *smap, int mirror_num)
{
/* Do not leak our private flag into the block layer. */
bio->bi_opf &= ~REQ_BTRFS_ONE_ORDERED;
if (!bioc) {
/* Single mirror read/write fast path. */
btrfs_bio(bio)->mirror_num = mirror_num;
bio->bi_iter.bi_sector = smap->physical >> SECTOR_SHIFT;
if (bio_op(bio) != REQ_OP_READ)
btrfs_bio(bio)->orig_physical = smap->physical;
bio->bi_private = smap->dev;
bio->bi_end_io = btrfs_simple_end_io;
btrfs_submit_dev_bio(smap->dev, bio);
@ -574,27 +597,20 @@ static void run_one_async_free(struct btrfs_work *work)
static bool should_async_write(struct btrfs_bio *bbio)
{
/*
* If the I/O is not issued by fsync and friends, (->sync_writers != 0),
* then try to defer the submission to a workqueue to parallelize the
* checksum calculation.
*/
if (atomic_read(&bbio->inode->sync_writers))
/* Submit synchronously if the checksum implementation is fast. */
if (test_bit(BTRFS_FS_CSUM_IMPL_FAST, &bbio->fs_info->flags))
return false;
/*
* Submit metadata writes synchronously if the checksum implementation
* is fast, or we are on a zoned device that wants I/O to be submitted
* in order.
* Try to defer the submission to a workqueue to parallelize the
* checksum calculation unless the I/O is issued synchronously.
*/
if (bbio->bio.bi_opf & REQ_META) {
struct btrfs_fs_info *fs_info = bbio->fs_info;
if (op_is_sync(bbio->bio.bi_opf))
return false;
if (btrfs_is_zoned(fs_info))
return false;
if (test_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags))
return false;
}
/* Zoned devices require I/O to be submitted in order. */
if ((bbio->bio.bi_opf & REQ_META) && btrfs_is_zoned(bbio->fs_info))
return false;
return true;
}
@ -622,10 +638,7 @@ static bool btrfs_wq_submit_bio(struct btrfs_bio *bbio,
btrfs_init_work(&async->work, run_one_async_start, run_one_async_done,
run_one_async_free);
if (op_is_sync(bbio->bio.bi_opf))
btrfs_queue_work(fs_info->hipri_workers, &async->work);
else
btrfs_queue_work(fs_info->workers, &async->work);
btrfs_queue_work(fs_info->workers, &async->work);
return true;
}
@ -635,7 +648,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
struct btrfs_fs_info *fs_info = bbio->fs_info;
struct btrfs_bio *orig_bbio = bbio;
struct bio *bio = &bbio->bio;
u64 logical = bio->bi_iter.bi_sector << 9;
u64 logical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
u64 length = bio->bi_iter.bi_size;
u64 map_length = length;
bool use_append = btrfs_use_zone_append(bbio);
@ -645,8 +658,8 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
int error;
btrfs_bio_counter_inc_blocked(fs_info);
error = __btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
&bioc, &smap, &mirror_num, 1);
error = btrfs_map_block(fs_info, btrfs_op(bio), logical, &map_length,
&bioc, &smap, &mirror_num, 1);
if (error) {
ret = errno_to_blk_status(error);
goto fail;
@ -665,7 +678,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
* Save the iter for the end_io handler and preload the checksums for
* data reads.
*/
if (bio_op(bio) == REQ_OP_READ && inode && !(bio->bi_opf & REQ_META)) {
if (bio_op(bio) == REQ_OP_READ && is_data_bbio(bbio)) {
bbio->saved_iter = bio->bi_iter;
ret = btrfs_lookup_bio_sums(bbio);
if (ret)
@ -676,9 +689,6 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
if (use_append) {
bio->bi_opf &= ~REQ_OP_WRITE;
bio->bi_opf |= REQ_OP_ZONE_APPEND;
ret = btrfs_bio_extract_ordered_extent(bbio);
if (ret)
goto fail_put_bio;
}
/*
@ -695,6 +705,10 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
ret = btrfs_bio_csum(bbio);
if (ret)
goto fail_put_bio;
} else if (use_append) {
ret = btrfs_alloc_dummy_sum(bbio);
if (ret)
goto fail_put_bio;
}
}
@ -704,7 +718,7 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
fail_put_bio:
if (map_length < length)
bio_put(bio);
btrfs_cleanup_bio(bbio);
fail:
btrfs_bio_counter_dec(fs_info);
btrfs_bio_end_io(orig_bbio, ret);

View File

@ -39,8 +39,8 @@ struct btrfs_bio {
union {
/*
* Data checksumming and original I/O information for internal
* use in the btrfs_submit_bio machinery.
* For data reads: checksumming and original I/O information.
* (for internal use in the btrfs_submit_bio machinery only)
*/
struct {
u8 *csum;
@ -48,7 +48,20 @@ struct btrfs_bio {
struct bvec_iter saved_iter;
};
/* For metadata parentness verification. */
/*
* For data writes:
* - ordered extent covering the bio
* - pointer to the checksums for this bio
* - original physical address from the allocator
* (for zone append only)
*/
struct {
struct btrfs_ordered_extent *ordered;
struct btrfs_ordered_sum *sums;
u64 orig_physical;
};
/* For metadata reads: parentness verification. */
struct btrfs_tree_parent_check parent_check;
};
@ -84,15 +97,7 @@ void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_fs_info *fs_info,
struct btrfs_bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
struct btrfs_fs_info *fs_info,
btrfs_bio_end_io_t end_io, void *private);
static inline void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
{
bbio->bio.bi_status = status;
bbio->end_io(bbio);
}
/* Bio only refers to one ordered extent. */
#define REQ_BTRFS_ONE_ORDERED REQ_DRV
void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status);
/* Submit using blkcg_punt_bio_submit. */
#define REQ_BTRFS_CGROUP_PUNT REQ_FS_PRIVATE

View File

@ -95,14 +95,21 @@ static u64 btrfs_reduce_alloc_profile(struct btrfs_fs_info *fs_info, u64 flags)
}
allowed &= flags;
if (allowed & BTRFS_BLOCK_GROUP_RAID6)
/* Select the highest-redundancy RAID level. */
if (allowed & BTRFS_BLOCK_GROUP_RAID1C4)
allowed = BTRFS_BLOCK_GROUP_RAID1C4;
else if (allowed & BTRFS_BLOCK_GROUP_RAID6)
allowed = BTRFS_BLOCK_GROUP_RAID6;
else if (allowed & BTRFS_BLOCK_GROUP_RAID1C3)
allowed = BTRFS_BLOCK_GROUP_RAID1C3;
else if (allowed & BTRFS_BLOCK_GROUP_RAID5)
allowed = BTRFS_BLOCK_GROUP_RAID5;
else if (allowed & BTRFS_BLOCK_GROUP_RAID10)
allowed = BTRFS_BLOCK_GROUP_RAID10;
else if (allowed & BTRFS_BLOCK_GROUP_RAID1)
allowed = BTRFS_BLOCK_GROUP_RAID1;
else if (allowed & BTRFS_BLOCK_GROUP_DUP)
allowed = BTRFS_BLOCK_GROUP_DUP;
else if (allowed & BTRFS_BLOCK_GROUP_RAID0)
allowed = BTRFS_BLOCK_GROUP_RAID0;
@ -1633,11 +1640,14 @@ void btrfs_mark_bg_unused(struct btrfs_block_group *bg)
{
struct btrfs_fs_info *fs_info = bg->fs_info;
trace_btrfs_add_unused_block_group(bg);
spin_lock(&fs_info->unused_bgs_lock);
if (list_empty(&bg->bg_list)) {
btrfs_get_block_group(bg);
trace_btrfs_add_unused_block_group(bg);
list_add_tail(&bg->bg_list, &fs_info->unused_bgs);
} else {
/* Pull out the block group from the reclaim_bgs list. */
list_move_tail(&bg->bg_list, &fs_info->unused_bgs);
}
spin_unlock(&fs_info->unused_bgs_lock);
}
@ -1791,8 +1801,15 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
}
spin_unlock(&bg->lock);
/* Get out fast, in case we're unmounting the filesystem */
if (btrfs_fs_closing(fs_info)) {
/*
* Get out fast, in case we're read-only or unmounting the
* filesystem. It is OK to drop block groups from the list even
* for the read-only case. As we did sb_start_write(),
* "mount -o remount,ro" won't happen and read-only filesystem
* means it is forced read-only due to a fatal error. So, it
* never gets back to read-write to let us reclaim again.
*/
if (btrfs_need_cleaner_sleep(fs_info)) {
up_write(&space_info->groups_sem);
goto next;
}
@ -1823,11 +1840,27 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
}
next:
if (ret)
btrfs_mark_bg_to_reclaim(bg);
btrfs_put_block_group(bg);
mutex_unlock(&fs_info->reclaim_bgs_lock);
/*
* Reclaiming all the block groups in the list can take really
* long. Prioritize cleaning up unused block groups.
*/
btrfs_delete_unused_bgs(fs_info);
/*
* If we are interrupted by a balance, we can just bail out. The
* cleaner thread restart again if necessary.
*/
if (!mutex_trylock(&fs_info->reclaim_bgs_lock))
goto end;
spin_lock(&fs_info->unused_bgs_lock);
}
spin_unlock(&fs_info->unused_bgs_lock);
mutex_unlock(&fs_info->reclaim_bgs_lock);
end:
btrfs_exclop_finish(fs_info);
sb_end_write(fs_info->sb);
}
@ -3521,9 +3554,9 @@ int btrfs_update_block_group(struct btrfs_trans_handle *trans,
spin_unlock(&cache->lock);
spin_unlock(&space_info->lock);
set_extent_dirty(&trans->transaction->pinned_extents,
bytenr, bytenr + num_bytes - 1,
GFP_NOFS | __GFP_NOFAIL);
set_extent_bit(&trans->transaction->pinned_extents,
bytenr, bytenr + num_bytes - 1,
EXTENT_DIRTY, NULL);
}
spin_lock(&trans->transaction->dirty_bgs_lock);

View File

@ -162,7 +162,14 @@ struct btrfs_block_group {
*/
struct list_head cluster_list;
/* For delayed block group creation or deletion of empty block groups */
/*
* Used for several lists:
*
* 1) struct btrfs_fs_info::unused_bgs
* 2) struct btrfs_fs_info::reclaim_bgs
* 3) struct btrfs_transaction::deleted_bgs
* 4) struct btrfs_trans_handle::new_bgs
*/
struct list_head bg_list;
/* For read-only block groups */

View File

@ -541,3 +541,22 @@ struct btrfs_block_rsv *btrfs_use_block_rsv(struct btrfs_trans_handle *trans,
return ERR_PTR(ret);
}
int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv)
{
u64 needed_bytes;
int ret;
/* 1 for slack space, 1 for updating the inode */
needed_bytes = btrfs_calc_insert_metadata_size(fs_info, 1) +
btrfs_calc_metadata_size(fs_info, 1);
spin_lock(&rsv->lock);
if (rsv->reserved < needed_bytes)
ret = -ENOSPC;
else
ret = 0;
spin_unlock(&rsv->lock);
return ret;
}

View File

@ -82,6 +82,8 @@ void btrfs_release_global_block_rsv(struct btrfs_fs_info *fs_info);
struct btrfs_block_rsv *btrfs_use_block_rsv(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u32 blocksize);
int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv);
static inline void btrfs_unuse_block_rsv(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *block_rsv,
u32 blocksize)

View File

@ -116,9 +116,6 @@ struct btrfs_inode {
unsigned long runtime_flags;
/* Keep track of who's O_SYNC/fsyncing currently */
atomic_t sync_writers;
/* full 64 bit generation number, struct vfs_inode doesn't have a big
* enough field for this.
*/
@ -335,7 +332,7 @@ static inline void btrfs_mod_outstanding_extents(struct btrfs_inode *inode,
if (btrfs_is_free_space_inode(inode))
return;
trace_btrfs_inode_mod_outstanding_extents(inode->root, btrfs_ino(inode),
mod);
mod, inode->outstanding_extents);
}
/*
@ -407,30 +404,12 @@ static inline bool btrfs_inode_can_compress(const struct btrfs_inode *inode)
return true;
}
/*
* btrfs_inode_item stores flags in a u64, btrfs_inode stores them in two
* separate u32s. These two functions convert between the two representations.
*/
static inline u64 btrfs_inode_combine_flags(u32 flags, u32 ro_flags)
{
return (flags | ((u64)ro_flags << 32));
}
static inline void btrfs_inode_split_flags(u64 inode_item_flags,
u32 *flags, u32 *ro_flags)
{
*flags = (u32)inode_item_flags;
*ro_flags = (u32)(inode_item_flags >> 32);
}
/* Array of bytes with variable length, hexadecimal format 0x1234 */
#define CSUM_FMT "0x%*phN"
#define CSUM_FMT_VALUE(size, bytes) size, bytes
int btrfs_check_sector_csum(struct btrfs_fs_info *fs_info, struct page *page,
u32 pgoff, u8 *csum, const u8 * const csum_expected);
int btrfs_extract_ordered_extent(struct btrfs_bio *bbio,
struct btrfs_ordered_extent *ordered);
bool btrfs_data_csum_ok(struct btrfs_bio *bbio, struct btrfs_device *dev,
u32 bio_offset, struct bio_vec *bv);
noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,

View File

@ -1459,13 +1459,13 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
struct btrfs_fs_info *fs_info = state->fs_info;
int ret;
u64 length;
struct btrfs_io_context *multi = NULL;
struct btrfs_io_context *bioc = NULL;
struct btrfs_io_stripe smap, *map;
struct btrfs_device *device;
length = len;
ret = btrfs_map_block(fs_info, BTRFS_MAP_READ,
bytenr, &length, &multi, mirror_num);
ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, bytenr, &length, &bioc,
NULL, &mirror_num, 0);
if (ret) {
block_ctx_out->start = 0;
block_ctx_out->dev_bytenr = 0;
@ -1478,21 +1478,26 @@ static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
return ret;
}
device = multi->stripes[0].dev;
if (bioc)
map = &bioc->stripes[0];
else
map = &smap;
device = map->dev;
if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state) ||
!device->bdev || !device->name)
block_ctx_out->dev = NULL;
else
block_ctx_out->dev = btrfsic_dev_state_lookup(
device->bdev->bd_dev);
block_ctx_out->dev_bytenr = multi->stripes[0].physical;
block_ctx_out->dev_bytenr = map->physical;
block_ctx_out->start = bytenr;
block_ctx_out->len = len;
block_ctx_out->datav = NULL;
block_ctx_out->pagev = NULL;
block_ctx_out->mem_to_free = NULL;
kfree(multi);
kfree(bioc);
if (NULL == block_ctx_out->dev) {
ret = -ENXIO;
pr_info("btrfsic: error, cannot lookup dev (#1)!\n");
@ -1565,7 +1570,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
bio = bio_alloc(block_ctx->dev->bdev, num_pages - i,
REQ_OP_READ, GFP_NOFS);
bio->bi_iter.bi_sector = dev_bytenr >> 9;
bio->bi_iter.bi_sector = dev_bytenr >> SECTOR_SHIFT;
for (j = i; j < num_pages; j++) {
ret = bio_add_page(bio, block_ctx->pagev[j],

View File

@ -37,7 +37,7 @@
#include "file-item.h"
#include "super.h"
struct bio_set btrfs_compressed_bioset;
static struct bio_set btrfs_compressed_bioset;
static const char* const btrfs_compress_types[] = { "", "zlib", "lzo", "zstd" };
@ -211,8 +211,6 @@ static noinline void end_compressed_writeback(const struct compressed_bio *cb)
for (i = 0; i < ret; i++) {
struct folio *folio = fbatch.folios[i];
if (errno)
folio_set_error(folio);
btrfs_page_clamp_clear_writeback(fs_info, &folio->page,
cb->start, cb->len);
}
@ -226,13 +224,8 @@ static void btrfs_finish_compressed_write_work(struct work_struct *work)
struct compressed_bio *cb =
container_of(work, struct compressed_bio, write_end_work);
/*
* Ok, we're the last bio for this extent, step one is to call back
* into the FS and do all the end_io operations.
*/
btrfs_writepage_endio_finish_ordered(cb->bbio.inode, NULL,
cb->start, cb->start + cb->len - 1,
cb->bbio.bio.bi_status == BLK_STS_OK);
btrfs_finish_ordered_extent(cb->bbio.ordered, NULL, cb->start, cb->len,
cb->bbio.bio.bi_status == BLK_STS_OK);
if (cb->writeback)
end_compressed_writeback(cb);
@ -281,32 +274,31 @@ static void btrfs_add_compressed_bio_pages(struct compressed_bio *cb)
* This also checksums the file bytes and gets things ready for
* the end io hooks.
*/
void btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
unsigned int len, u64 disk_start,
unsigned int compressed_len,
struct page **compressed_pages,
unsigned int nr_pages,
blk_opf_t write_flags,
bool writeback)
void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered,
struct page **compressed_pages,
unsigned int nr_pages,
blk_opf_t write_flags,
bool writeback)
{
struct btrfs_inode *inode = BTRFS_I(ordered->inode);
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct compressed_bio *cb;
ASSERT(IS_ALIGNED(start, fs_info->sectorsize) &&
IS_ALIGNED(len, fs_info->sectorsize));
ASSERT(IS_ALIGNED(ordered->file_offset, fs_info->sectorsize));
ASSERT(IS_ALIGNED(ordered->num_bytes, fs_info->sectorsize));
write_flags |= REQ_BTRFS_ONE_ORDERED;
cb = alloc_compressed_bio(inode, start, REQ_OP_WRITE | write_flags,
cb = alloc_compressed_bio(inode, ordered->file_offset,
REQ_OP_WRITE | write_flags,
end_compressed_bio_write);
cb->start = start;
cb->len = len;
cb->start = ordered->file_offset;
cb->len = ordered->num_bytes;
cb->compressed_pages = compressed_pages;
cb->compressed_len = compressed_len;
cb->compressed_len = ordered->disk_num_bytes;
cb->writeback = writeback;
INIT_WORK(&cb->write_end_work, btrfs_finish_compressed_write_work);
cb->nr_pages = nr_pages;
cb->bbio.bio.bi_iter.bi_sector = disk_start >> SECTOR_SHIFT;
cb->bbio.bio.bi_iter.bi_sector = ordered->disk_bytenr >> SECTOR_SHIFT;
cb->bbio.ordered = ordered;
btrfs_add_compressed_bio_pages(cb);
btrfs_submit_bio(&cb->bbio, 0);
@ -421,7 +413,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
*/
if (!em || cur < em->start ||
(cur + fs_info->sectorsize > extent_map_end(em)) ||
(em->block_start >> 9) != orig_bio->bi_iter.bi_sector) {
(em->block_start >> SECTOR_SHIFT) != orig_bio->bi_iter.bi_sector) {
free_extent_map(em);
unlock_extent(tree, cur, page_end, NULL);
unlock_page(page);
@ -472,7 +464,7 @@ static noinline int add_ra_bio_pages(struct inode *inode,
* After the compressed pages are read, we copy the bytes into the
* bio we were passed and then call the bio end_io calls
*/
void btrfs_submit_compressed_read(struct btrfs_bio *bbio, int mirror_num)
void btrfs_submit_compressed_read(struct btrfs_bio *bbio)
{
struct btrfs_inode *inode = bbio->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
@ -538,7 +530,7 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio, int mirror_num)
if (memstall)
psi_memstall_leave(&pflags);
btrfs_submit_bio(&cb->bbio, mirror_num);
btrfs_submit_bio(&cb->bbio, 0);
return;
out_free_compressed_pages:

View File

@ -10,6 +10,7 @@
#include "bio.h"
struct btrfs_inode;
struct btrfs_ordered_extent;
/*
* We want to make sure that amount of RAM required to uncompress an extent is
@ -86,14 +87,12 @@ int btrfs_decompress(int type, const u8 *data_in, struct page *dest_page,
int btrfs_decompress_buf2page(const char *buf, u32 buf_len,
struct compressed_bio *cb, u32 decompressed);
void btrfs_submit_compressed_write(struct btrfs_inode *inode, u64 start,
unsigned int len, u64 disk_start,
unsigned int compressed_len,
void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered,
struct page **compressed_pages,
unsigned int nr_pages,
blk_opf_t write_flags,
bool writeback);
void btrfs_submit_compressed_read(struct btrfs_bio *bbio, int mirror_num);
void btrfs_submit_compressed_read(struct btrfs_bio *bbio);
unsigned int btrfs_compress_str2level(unsigned int type, const char *str);

View File

@ -37,8 +37,6 @@ static int push_node_left(struct btrfs_trans_handle *trans,
static int balance_node_right(struct btrfs_trans_handle *trans,
struct extent_buffer *dst_buf,
struct extent_buffer *src_buf);
static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
int level, int slot);
static const struct btrfs_csums {
u16 size;
@ -150,13 +148,19 @@ static inline void copy_leaf_items(const struct extent_buffer *dst,
nr_items * sizeof(struct btrfs_item));
}
/* This exists for btrfs-progs usages. */
u16 btrfs_csum_type_size(u16 type)
{
return btrfs_csums[type].size;
}
int btrfs_super_csum_size(const struct btrfs_super_block *s)
{
u16 t = btrfs_super_csum_type(s);
/*
* csum type is validated at mount time
*/
return btrfs_csums[t].size;
return btrfs_csum_type_size(t);
}
const char *btrfs_super_csum_name(u16 csum_type)
@ -417,9 +421,13 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
&refs, &flags);
if (ret)
return ret;
if (refs == 0) {
ret = -EROFS;
btrfs_handle_fs_error(fs_info, ret, NULL);
if (unlikely(refs == 0)) {
btrfs_crit(fs_info,
"found 0 references for tree block at bytenr %llu level %d root %llu",
buf->start, btrfs_header_level(buf),
btrfs_root_id(root));
ret = -EUCLEAN;
btrfs_abort_transaction(trans, ret);
return ret;
}
} else {
@ -464,10 +472,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
return ret;
}
if (new_flags != 0) {
int level = btrfs_header_level(buf);
ret = btrfs_set_disk_extent_flags(trans, buf,
new_flags, level);
ret = btrfs_set_disk_extent_flags(trans, buf, new_flags);
if (ret)
return ret;
}
@ -583,9 +588,14 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
parent_start = buf->start;
atomic_inc(&cow->refs);
ret = btrfs_tree_mod_log_insert_root(root->node, cow, true);
BUG_ON(ret < 0);
if (ret < 0) {
btrfs_tree_unlock(cow);
free_extent_buffer(cow);
btrfs_abort_transaction(trans, ret);
return ret;
}
atomic_inc(&cow->refs);
rcu_assign_pointer(root->node, cow);
btrfs_free_tree_block(trans, btrfs_root_id(root), buf,
@ -594,8 +604,14 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
add_root_to_dirty_list(root);
} else {
WARN_ON(trans->transid != btrfs_header_generation(parent));
btrfs_tree_mod_log_insert_key(parent, parent_slot,
BTRFS_MOD_LOG_KEY_REPLACE);
ret = btrfs_tree_mod_log_insert_key(parent, parent_slot,
BTRFS_MOD_LOG_KEY_REPLACE);
if (ret) {
btrfs_tree_unlock(cow);
free_extent_buffer(cow);
btrfs_abort_transaction(trans, ret);
return ret;
}
btrfs_set_node_blockptr(parent, parent_slot,
cow->start);
btrfs_set_node_ptr_generation(parent, parent_slot,
@ -1028,8 +1044,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
child = btrfs_read_node_slot(mid, 0);
if (IS_ERR(child)) {
ret = PTR_ERR(child);
btrfs_handle_fs_error(fs_info, ret, NULL);
goto enospc;
goto out;
}
btrfs_tree_lock(child);
@ -1038,11 +1053,16 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (ret) {
btrfs_tree_unlock(child);
free_extent_buffer(child);
goto enospc;
goto out;
}
ret = btrfs_tree_mod_log_insert_root(root->node, child, true);
BUG_ON(ret < 0);
if (ret < 0) {
btrfs_tree_unlock(child);
free_extent_buffer(child);
btrfs_abort_transaction(trans, ret);
goto out;
}
rcu_assign_pointer(root->node, child);
add_root_to_dirty_list(root);
@ -1070,7 +1090,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (IS_ERR(left)) {
ret = PTR_ERR(left);
left = NULL;
goto enospc;
goto out;
}
__btrfs_tree_lock(left, BTRFS_NESTING_LEFT);
@ -1079,7 +1099,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
BTRFS_NESTING_LEFT_COW);
if (wret) {
ret = wret;
goto enospc;
goto out;
}
}
@ -1088,7 +1108,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (IS_ERR(right)) {
ret = PTR_ERR(right);
right = NULL;
goto enospc;
goto out;
}
__btrfs_tree_lock(right, BTRFS_NESTING_RIGHT);
@ -1097,7 +1117,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
BTRFS_NESTING_RIGHT_COW);
if (wret) {
ret = wret;
goto enospc;
goto out;
}
}
@ -1119,7 +1139,12 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (btrfs_header_nritems(right) == 0) {
btrfs_clear_buffer_dirty(trans, right);
btrfs_tree_unlock(right);
del_ptr(root, path, level + 1, pslot + 1);
ret = btrfs_del_ptr(trans, root, path, level + 1, pslot + 1);
if (ret < 0) {
free_extent_buffer_stale(right);
right = NULL;
goto out;
}
root_sub_used(root, right->len);
btrfs_free_tree_block(trans, btrfs_root_id(root), right,
0, 1);
@ -1130,7 +1155,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
btrfs_node_key(right, &right_key, 0);
ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1,
BTRFS_MOD_LOG_KEY_REPLACE);
BUG_ON(ret < 0);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
goto out;
}
btrfs_set_node_key(parent, &right_key, pslot + 1);
btrfs_mark_buffer_dirty(parent);
}
@ -1145,15 +1173,19 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
* otherwise we would have pulled some pointers from the
* right
*/
if (!left) {
ret = -EROFS;
btrfs_handle_fs_error(fs_info, ret, NULL);
goto enospc;
if (unlikely(!left)) {
btrfs_crit(fs_info,
"missing left child when middle child only has 1 item, parent bytenr %llu level %d mid bytenr %llu root %llu",
parent->start, btrfs_header_level(parent),
mid->start, btrfs_root_id(root));
ret = -EUCLEAN;
btrfs_abort_transaction(trans, ret);
goto out;
}
wret = balance_node_right(trans, mid, left);
if (wret < 0) {
ret = wret;
goto enospc;
goto out;
}
if (wret == 1) {
wret = push_node_left(trans, left, mid, 1);
@ -1165,7 +1197,12 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (btrfs_header_nritems(mid) == 0) {
btrfs_clear_buffer_dirty(trans, mid);
btrfs_tree_unlock(mid);
del_ptr(root, path, level + 1, pslot);
ret = btrfs_del_ptr(trans, root, path, level + 1, pslot);
if (ret < 0) {
free_extent_buffer_stale(mid);
mid = NULL;
goto out;
}
root_sub_used(root, mid->len);
btrfs_free_tree_block(trans, btrfs_root_id(root), mid, 0, 1);
free_extent_buffer_stale(mid);
@ -1176,7 +1213,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
btrfs_node_key(mid, &mid_key, 0);
ret = btrfs_tree_mod_log_insert_key(parent, pslot,
BTRFS_MOD_LOG_KEY_REPLACE);
BUG_ON(ret < 0);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
goto out;
}
btrfs_set_node_key(parent, &mid_key, pslot);
btrfs_mark_buffer_dirty(parent);
}
@ -1202,7 +1242,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (orig_ptr !=
btrfs_node_blockptr(path->nodes[level], path->slots[level]))
BUG();
enospc:
out:
if (right) {
btrfs_tree_unlock(right);
free_extent_buffer(right);
@ -1278,7 +1318,12 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
btrfs_node_key(mid, &disk_key, 0);
ret = btrfs_tree_mod_log_insert_key(parent, pslot,
BTRFS_MOD_LOG_KEY_REPLACE);
BUG_ON(ret < 0);
if (ret < 0) {
btrfs_tree_unlock(left);
free_extent_buffer(left);
btrfs_abort_transaction(trans, ret);
return ret;
}
btrfs_set_node_key(parent, &disk_key, pslot);
btrfs_mark_buffer_dirty(parent);
if (btrfs_header_nritems(left) > orig_slot) {
@ -1333,7 +1378,12 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
btrfs_node_key(right, &disk_key, 0);
ret = btrfs_tree_mod_log_insert_key(parent, pslot + 1,
BTRFS_MOD_LOG_KEY_REPLACE);
BUG_ON(ret < 0);
if (ret < 0) {
btrfs_tree_unlock(right);
free_extent_buffer(right);
btrfs_abort_transaction(trans, ret);
return ret;
}
btrfs_set_node_key(parent, &disk_key, pslot + 1);
btrfs_mark_buffer_dirty(parent);
@ -2378,6 +2428,87 @@ int btrfs_search_old_slot(struct btrfs_root *root, const struct btrfs_key *key,
return ret;
}
/*
* Search the tree again to find a leaf with smaller keys.
* Returns 0 if it found something.
* Returns 1 if there are no smaller keys.
* Returns < 0 on error.
*
* This may release the path, and so you may lose any locks held at the
* time you call it.
*/
static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
{
struct btrfs_key key;
struct btrfs_key orig_key;
struct btrfs_disk_key found_key;
int ret;
btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
orig_key = key;
if (key.offset > 0) {
key.offset--;
} else if (key.type > 0) {
key.type--;
key.offset = (u64)-1;
} else if (key.objectid > 0) {
key.objectid--;
key.type = (u8)-1;
key.offset = (u64)-1;
} else {
return 1;
}
btrfs_release_path(path);
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret <= 0)
return ret;
/*
* Previous key not found. Even if we were at slot 0 of the leaf we had
* before releasing the path and calling btrfs_search_slot(), we now may
* be in a slot pointing to the same original key - this can happen if
* after we released the path, one of more items were moved from a
* sibling leaf into the front of the leaf we had due to an insertion
* (see push_leaf_right()).
* If we hit this case and our slot is > 0 and just decrement the slot
* so that the caller does not process the same key again, which may or
* may not break the caller, depending on its logic.
*/
if (path->slots[0] < btrfs_header_nritems(path->nodes[0])) {
btrfs_item_key(path->nodes[0], &found_key, path->slots[0]);
ret = comp_keys(&found_key, &orig_key);
if (ret == 0) {
if (path->slots[0] > 0) {
path->slots[0]--;
return 0;
}
/*
* At slot 0, same key as before, it means orig_key is
* the lowest, leftmost, key in the tree. We're done.
*/
return 1;
}
}
btrfs_item_key(path->nodes[0], &found_key, 0);
ret = comp_keys(&found_key, &key);
/*
* We might have had an item with the previous key in the tree right
* before we released our path. And after we released our path, that
* item might have been pushed to the first slot (0) of the leaf we
* were holding due to a tree balance. Alternatively, an item with the
* previous key can exist as the only element of a leaf (big fat item).
* Therefore account for these 2 cases, so that our callers (like
* btrfs_previous_item) don't miss an existing item with a key matching
* the previous key we computed above.
*/
if (ret <= 0)
return 0;
return 1;
}
/*
* helper to use instead of search slot if no exact match is needed but
* instead the next or previous item should be returned.
@ -2552,6 +2683,7 @@ void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
if (slot > 0) {
btrfs_item_key(eb, &disk_key, slot - 1);
if (unlikely(comp_keys(&disk_key, new_key) >= 0)) {
btrfs_print_leaf(eb);
btrfs_crit(fs_info,
"slot %u key (%llu %u %llu) new key (%llu %u %llu)",
slot, btrfs_disk_key_objectid(&disk_key),
@ -2559,13 +2691,13 @@ void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
btrfs_disk_key_offset(&disk_key),
new_key->objectid, new_key->type,
new_key->offset);
btrfs_print_leaf(eb);
BUG();
}
}
if (slot < btrfs_header_nritems(eb) - 1) {
btrfs_item_key(eb, &disk_key, slot + 1);
if (unlikely(comp_keys(&disk_key, new_key) <= 0)) {
btrfs_print_leaf(eb);
btrfs_crit(fs_info,
"slot %u key (%llu %u %llu) new key (%llu %u %llu)",
slot, btrfs_disk_key_objectid(&disk_key),
@ -2573,7 +2705,6 @@ void btrfs_set_item_key_safe(struct btrfs_fs_info *fs_info,
btrfs_disk_key_offset(&disk_key),
new_key->objectid, new_key->type,
new_key->offset);
btrfs_print_leaf(eb);
BUG();
}
}
@ -2626,7 +2757,7 @@ static bool check_sibling_keys(struct extent_buffer *left,
btrfs_item_key_to_cpu(right, &right_first, 0);
}
if (btrfs_comp_cpu_keys(&left_last, &right_first) >= 0) {
if (unlikely(btrfs_comp_cpu_keys(&left_last, &right_first) >= 0)) {
btrfs_crit(left->fs_info, "left extent buffer:");
btrfs_print_tree(left, false);
btrfs_crit(left->fs_info, "right extent buffer:");
@ -2703,8 +2834,8 @@ static int push_node_left(struct btrfs_trans_handle *trans,
if (push_items < src_nritems) {
/*
* Don't call btrfs_tree_mod_log_insert_move() here, key removal
* was already fully logged by btrfs_tree_mod_log_eb_copy() above.
* btrfs_tree_mod_log_eb_copy handles logging the move, so we
* don't need to do an explicit tree mod log operation for it.
*/
memmove_extent_buffer(src, btrfs_node_key_ptr_offset(src, 0),
btrfs_node_key_ptr_offset(src, push_items),
@ -2765,8 +2896,11 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
btrfs_abort_transaction(trans, ret);
return ret;
}
ret = btrfs_tree_mod_log_insert_move(dst, push_items, 0, dst_nritems);
BUG_ON(ret < 0);
/*
* btrfs_tree_mod_log_eb_copy handles logging the move, so we don't
* need to do an explicit tree mod log operation for it.
*/
memmove_extent_buffer(dst, btrfs_node_key_ptr_offset(dst, push_items),
btrfs_node_key_ptr_offset(dst, 0),
(dst_nritems) *
@ -2840,7 +2974,12 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
old = root->node;
ret = btrfs_tree_mod_log_insert_root(root->node, c, false);
BUG_ON(ret < 0);
if (ret < 0) {
btrfs_free_tree_block(trans, btrfs_root_id(root), c, 0, 1);
btrfs_tree_unlock(c);
free_extent_buffer(c);
return ret;
}
rcu_assign_pointer(root->node, c);
/* the super has an extra ref to root->node */
@ -2861,10 +3000,10 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
* slot and level indicate where you want the key to go, and
* blocknr is the block the key points to.
*/
static void insert_ptr(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct btrfs_disk_key *key, u64 bytenr,
int slot, int level)
static int insert_ptr(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct btrfs_disk_key *key, u64 bytenr,
int slot, int level)
{
struct extent_buffer *lower;
int nritems;
@ -2880,7 +3019,10 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
if (level) {
ret = btrfs_tree_mod_log_insert_move(lower, slot + 1,
slot, nritems - slot);
BUG_ON(ret < 0);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
return ret;
}
}
memmove_extent_buffer(lower,
btrfs_node_key_ptr_offset(lower, slot + 1),
@ -2890,7 +3032,10 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
if (level) {
ret = btrfs_tree_mod_log_insert_key(lower, slot,
BTRFS_MOD_LOG_KEY_ADD);
BUG_ON(ret < 0);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
return ret;
}
}
btrfs_set_node_key(lower, key, slot);
btrfs_set_node_blockptr(lower, slot, bytenr);
@ -2898,6 +3043,8 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
btrfs_set_node_ptr_generation(lower, slot, trans->transid);
btrfs_set_header_nritems(lower, nritems + 1);
btrfs_mark_buffer_dirty(lower);
return 0;
}
/*
@ -2962,6 +3109,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
ret = btrfs_tree_mod_log_eb_copy(split, c, 0, mid, c_nritems - mid);
if (ret) {
btrfs_tree_unlock(split);
free_extent_buffer(split);
btrfs_abort_transaction(trans, ret);
return ret;
}
@ -2975,8 +3124,13 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(c);
btrfs_mark_buffer_dirty(split);
insert_ptr(trans, path, &disk_key, split->start,
path->slots[level + 1] + 1, level + 1);
ret = insert_ptr(trans, path, &disk_key, split->start,
path->slots[level + 1] + 1, level + 1);
if (ret < 0) {
btrfs_tree_unlock(split);
free_extent_buffer(split);
return ret;
}
if (path->slots[level] >= mid) {
path->slots[level] -= mid;
@ -2996,7 +3150,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
* and nr indicate which items in the leaf to check. This totals up the
* space used both by the item structs and the item data
*/
static int leaf_space_used(struct extent_buffer *l, int start, int nr)
static int leaf_space_used(const struct extent_buffer *l, int start, int nr)
{
int data_len;
int nritems = btrfs_header_nritems(l);
@ -3016,7 +3170,7 @@ static int leaf_space_used(struct extent_buffer *l, int start, int nr)
* the start of the leaf data. IOW, how much room
* the leaf has left for both items and data
*/
noinline int btrfs_leaf_free_space(struct extent_buffer *leaf)
int btrfs_leaf_free_space(const struct extent_buffer *leaf)
{
struct btrfs_fs_info *fs_info = leaf->fs_info;
int nritems = btrfs_header_nritems(leaf);
@ -3453,16 +3607,17 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root
* split the path's leaf in two, making sure there is at least data_size
* available for the resulting leaf level of the path.
*/
static noinline void copy_for_split(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct extent_buffer *l,
struct extent_buffer *right,
int slot, int mid, int nritems)
static noinline int copy_for_split(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct extent_buffer *l,
struct extent_buffer *right,
int slot, int mid, int nritems)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
int data_copy_size;
int rt_data_off;
int i;
int ret;
struct btrfs_disk_key disk_key;
struct btrfs_map_token token;
@ -3487,7 +3642,9 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
btrfs_set_header_nritems(l, mid);
btrfs_item_key(right, &disk_key, 0);
insert_ptr(trans, path, &disk_key, right->start, path->slots[1] + 1, 1);
ret = insert_ptr(trans, path, &disk_key, right->start, path->slots[1] + 1, 1);
if (ret < 0)
return ret;
btrfs_mark_buffer_dirty(right);
btrfs_mark_buffer_dirty(l);
@ -3505,6 +3662,8 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
}
BUG_ON(path->slots[0] < 0);
return 0;
}
/*
@ -3703,8 +3862,13 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
if (split == 0) {
if (mid <= slot) {
btrfs_set_header_nritems(right, 0);
insert_ptr(trans, path, &disk_key,
right->start, path->slots[1] + 1, 1);
ret = insert_ptr(trans, path, &disk_key,
right->start, path->slots[1] + 1, 1);
if (ret < 0) {
btrfs_tree_unlock(right);
free_extent_buffer(right);
return ret;
}
btrfs_tree_unlock(path->nodes[0]);
free_extent_buffer(path->nodes[0]);
path->nodes[0] = right;
@ -3712,8 +3876,13 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
path->slots[1] += 1;
} else {
btrfs_set_header_nritems(right, 0);
insert_ptr(trans, path, &disk_key,
right->start, path->slots[1], 1);
ret = insert_ptr(trans, path, &disk_key,
right->start, path->slots[1], 1);
if (ret < 0) {
btrfs_tree_unlock(right);
free_extent_buffer(right);
return ret;
}
btrfs_tree_unlock(path->nodes[0]);
free_extent_buffer(path->nodes[0]);
path->nodes[0] = right;
@ -3729,7 +3898,12 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
return ret;
}
copy_for_split(trans, path, l, right, slot, mid, nritems);
ret = copy_for_split(trans, path, l, right, slot, mid, nritems);
if (ret < 0) {
btrfs_tree_unlock(right);
free_extent_buffer(right);
return ret;
}
if (split == 2) {
BUG_ON(num_doubles != 0);
@ -3826,7 +4000,12 @@ static noinline int split_item(struct btrfs_path *path,
struct btrfs_disk_key disk_key;
leaf = path->nodes[0];
BUG_ON(btrfs_leaf_free_space(leaf) < sizeof(struct btrfs_item));
/*
* Shouldn't happen because the caller must have previously called
* setup_leaf_for_split() to make room for the new item in the leaf.
*/
if (WARN_ON(btrfs_leaf_free_space(leaf) < sizeof(struct btrfs_item)))
return -ENOSPC;
orig_slot = path->slots[0];
orig_offset = btrfs_item_offset(leaf, path->slots[0]);
@ -4273,9 +4452,11 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
*
* the tree should have been previously balanced so the deletion does not
* empty a node.
*
* This is exported for use inside btrfs-progs, don't un-export it.
*/
static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
int level, int slot)
int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_path *path, int level, int slot)
{
struct extent_buffer *parent = path->nodes[level];
u32 nritems;
@ -4286,7 +4467,10 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
if (level) {
ret = btrfs_tree_mod_log_insert_move(parent, slot,
slot + 1, nritems - slot - 1);
BUG_ON(ret < 0);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
return ret;
}
}
memmove_extent_buffer(parent,
btrfs_node_key_ptr_offset(parent, slot),
@ -4296,7 +4480,10 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
} else if (level) {
ret = btrfs_tree_mod_log_insert_key(parent, slot,
BTRFS_MOD_LOG_KEY_REMOVE);
BUG_ON(ret < 0);
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
return ret;
}
}
nritems--;
@ -4312,6 +4499,7 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
fixup_low_keys(path, &disk_key, level + 1);
}
btrfs_mark_buffer_dirty(parent);
return 0;
}
/*
@ -4324,13 +4512,17 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
* The path must have already been setup for deleting the leaf, including
* all the proper balancing. path->nodes[1] must be locked.
*/
static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct extent_buffer *leaf)
static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
struct extent_buffer *leaf)
{
int ret;
WARN_ON(btrfs_header_generation(leaf) != trans->transid);
del_ptr(root, path, 1, path->slots[1]);
ret = btrfs_del_ptr(trans, root, path, 1, path->slots[1]);
if (ret < 0)
return ret;
/*
* btrfs_free_extent is expensive, we want to make sure we
@ -4343,6 +4535,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
atomic_inc(&leaf->refs);
btrfs_free_tree_block(trans, btrfs_root_id(root), leaf, 0, 1);
free_extent_buffer_stale(leaf);
return 0;
}
/*
* delete the item at the leaf level in path. If that empties
@ -4392,7 +4585,9 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
btrfs_set_header_level(leaf, 0);
} else {
btrfs_clear_buffer_dirty(trans, leaf);
btrfs_del_leaf(trans, root, path, leaf);
ret = btrfs_del_leaf(trans, root, path, leaf);
if (ret < 0)
return ret;
}
} else {
int used = leaf_space_used(leaf, 0, nritems);
@ -4416,7 +4611,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
/* push_leaf_left fixes the path.
* make sure the path still points to our leaf
* for possible call to del_ptr below
* for possible call to btrfs_del_ptr below
*/
slot = path->slots[1];
atomic_inc(&leaf->refs);
@ -4453,7 +4648,9 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
if (btrfs_header_nritems(leaf) == 0) {
path->slots[1] = slot;
btrfs_del_leaf(trans, root, path, leaf);
ret = btrfs_del_leaf(trans, root, path, leaf);
if (ret < 0)
return ret;
free_extent_buffer(leaf);
ret = 0;
} else {
@ -4473,86 +4670,6 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
return ret;
}
/*
* search the tree again to find a leaf with lesser keys
* returns 0 if it found something or 1 if there are no lesser leaves.
* returns < 0 on io errors.
*
* This may release the path, and so you may lose any locks held at the
* time you call it.
*/
int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
{
struct btrfs_key key;
struct btrfs_key orig_key;
struct btrfs_disk_key found_key;
int ret;
btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
orig_key = key;
if (key.offset > 0) {
key.offset--;
} else if (key.type > 0) {
key.type--;
key.offset = (u64)-1;
} else if (key.objectid > 0) {
key.objectid--;
key.type = (u8)-1;
key.offset = (u64)-1;
} else {
return 1;
}
btrfs_release_path(path);
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret <= 0)
return ret;
/*
* Previous key not found. Even if we were at slot 0 of the leaf we had
* before releasing the path and calling btrfs_search_slot(), we now may
* be in a slot pointing to the same original key - this can happen if
* after we released the path, one of more items were moved from a
* sibling leaf into the front of the leaf we had due to an insertion
* (see push_leaf_right()).
* If we hit this case and our slot is > 0 and just decrement the slot
* so that the caller does not process the same key again, which may or
* may not break the caller, depending on its logic.
*/
if (path->slots[0] < btrfs_header_nritems(path->nodes[0])) {
btrfs_item_key(path->nodes[0], &found_key, path->slots[0]);
ret = comp_keys(&found_key, &orig_key);
if (ret == 0) {
if (path->slots[0] > 0) {
path->slots[0]--;
return 0;
}
/*
* At slot 0, same key as before, it means orig_key is
* the lowest, leftmost, key in the tree. We're done.
*/
return 1;
}
}
btrfs_item_key(path->nodes[0], &found_key, 0);
ret = comp_keys(&found_key, &key);
/*
* We might have had an item with the previous key in the tree right
* before we released our path. And after we released our path, that
* item might have been pushed to the first slot (0) of the leaf we
* were holding due to a tree balance. Alternatively, an item with the
* previous key can exist as the only element of a leaf (big fat item).
* Therefore account for these 2 cases, so that our callers (like
* btrfs_previous_item) don't miss an existing item with a key matching
* the previous key we computed above.
*/
if (ret <= 0)
return 0;
return 1;
}
/*
* A helper function to walk down the tree starting at min_key, and looking
* for nodes or leaves that are have a minimum transaction id.

View File

@ -541,6 +541,8 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
struct extent_buffer **cow_ret, u64 new_root_objectid);
int btrfs_block_can_be_shared(struct btrfs_root *root,
struct extent_buffer *buf);
int btrfs_del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_path *path, int level, int slot);
void btrfs_extend_item(struct btrfs_path *path, u32 data_size);
void btrfs_truncate_item(struct btrfs_path *path, u32 new_size, int from_end);
int btrfs_split_item(struct btrfs_trans_handle *trans,
@ -633,7 +635,6 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
return btrfs_insert_empty_items(trans, root, path, &batch);
}
int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
u64 time_seq);
@ -686,7 +687,7 @@ static inline int btrfs_next_item(struct btrfs_root *root, struct btrfs_path *p)
{
return btrfs_next_old_item(root, p, 0);
}
int btrfs_leaf_free_space(struct extent_buffer *leaf);
int btrfs_leaf_free_space(const struct extent_buffer *leaf);
static inline int is_fstree(u64 rootid)
{
@ -702,6 +703,7 @@ static inline bool btrfs_is_data_reloc_root(const struct btrfs_root *root)
return root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID;
}
u16 btrfs_csum_type_size(u16 type);
int btrfs_super_csum_size(const struct btrfs_super_block *s);
const char *btrfs_super_csum_name(u16 csum_type);
const char *btrfs_super_csum_driver(u16 csum_type);

View File

@ -1040,7 +1040,8 @@ static int defrag_one_locked_target(struct btrfs_inode *inode,
clear_extent_bit(&inode->io_tree, start, start + len - 1,
EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
EXTENT_DEFRAG, cached_state);
set_extent_defrag(&inode->io_tree, start, start + len - 1, cached_state);
set_extent_bit(&inode->io_tree, start, start + len - 1,
EXTENT_DELALLOC | EXTENT_DEFRAG, cached_state);
/* Update the page status */
for (i = start_index - first_index; i <= last_index - first_index; i++) {

View File

@ -407,7 +407,6 @@ static inline void drop_delayed_ref(struct btrfs_delayed_ref_root *delayed_refs,
RB_CLEAR_NODE(&ref->ref_node);
if (!list_empty(&ref->add_list))
list_del(&ref->add_list);
ref->in_tree = 0;
btrfs_put_delayed_ref(ref);
atomic_dec(&delayed_refs->num_entries);
}
@ -507,6 +506,7 @@ struct btrfs_delayed_ref_head *btrfs_select_ref_head(
{
struct btrfs_delayed_ref_head *head;
lockdep_assert_held(&delayed_refs->lock);
again:
head = find_ref_head(delayed_refs, delayed_refs->run_delayed_start,
true);
@ -531,7 +531,7 @@ struct btrfs_delayed_ref_head *btrfs_select_ref_head(
href_node);
}
head->processing = 1;
head->processing = true;
WARN_ON(delayed_refs->num_heads_ready == 0);
delayed_refs->num_heads_ready--;
delayed_refs->run_delayed_start = head->bytenr +
@ -549,31 +549,35 @@ void btrfs_delete_ref_head(struct btrfs_delayed_ref_root *delayed_refs,
RB_CLEAR_NODE(&head->href_node);
atomic_dec(&delayed_refs->num_entries);
delayed_refs->num_heads--;
if (head->processing == 0)
if (!head->processing)
delayed_refs->num_heads_ready--;
}
/*
* Helper to insert the ref_node to the tail or merge with tail.
*
* Return 0 for insert.
* Return >0 for merge.
* Return false if the ref was inserted.
* Return true if the ref was merged into an existing one (and therefore can be
* freed by the caller).
*/
static int insert_delayed_ref(struct btrfs_delayed_ref_root *root,
struct btrfs_delayed_ref_head *href,
struct btrfs_delayed_ref_node *ref)
static bool insert_delayed_ref(struct btrfs_delayed_ref_root *root,
struct btrfs_delayed_ref_head *href,
struct btrfs_delayed_ref_node *ref)
{
struct btrfs_delayed_ref_node *exist;
int mod;
int ret = 0;
spin_lock(&href->lock);
exist = tree_insert(&href->ref_tree, ref);
if (!exist)
goto inserted;
if (!exist) {
if (ref->action == BTRFS_ADD_DELAYED_REF)
list_add_tail(&ref->add_list, &href->ref_add_list);
atomic_inc(&root->num_entries);
spin_unlock(&href->lock);
return false;
}
/* Now we are sure we can merge */
ret = 1;
if (exist->action == ref->action) {
mod = ref->ref_mod;
} else {
@ -600,13 +604,7 @@ static int insert_delayed_ref(struct btrfs_delayed_ref_root *root,
if (exist->ref_mod == 0)
drop_delayed_ref(root, href, exist);
spin_unlock(&href->lock);
return ret;
inserted:
if (ref->action == BTRFS_ADD_DELAYED_REF)
list_add_tail(&ref->add_list, &href->ref_add_list);
atomic_inc(&root->num_entries);
spin_unlock(&href->lock);
return ret;
return true;
}
/*
@ -699,34 +697,38 @@ static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref,
bool is_system)
{
int count_mod = 1;
int must_insert_reserved = 0;
bool must_insert_reserved = false;
/* If reserved is provided, it must be a data extent. */
BUG_ON(!is_data && reserved);
/*
* The head node stores the sum of all the mods, so dropping a ref
* should drop the sum in the head node by one.
*/
if (action == BTRFS_UPDATE_DELAYED_HEAD)
switch (action) {
case BTRFS_UPDATE_DELAYED_HEAD:
count_mod = 0;
else if (action == BTRFS_DROP_DELAYED_REF)
break;
case BTRFS_DROP_DELAYED_REF:
/*
* The head node stores the sum of all the mods, so dropping a ref
* should drop the sum in the head node by one.
*/
count_mod = -1;
/*
* BTRFS_ADD_DELAYED_EXTENT means that we need to update the reserved
* accounting when the extent is finally added, or if a later
* modification deletes the delayed ref without ever inserting the
* extent into the extent allocation tree. ref->must_insert_reserved
* is the flag used to record that accounting mods are required.
*
* Once we record must_insert_reserved, switch the action to
* BTRFS_ADD_DELAYED_REF because other special casing is not required.
*/
if (action == BTRFS_ADD_DELAYED_EXTENT)
must_insert_reserved = 1;
else
must_insert_reserved = 0;
break;
case BTRFS_ADD_DELAYED_EXTENT:
/*
* BTRFS_ADD_DELAYED_EXTENT means that we need to update the
* reserved accounting when the extent is finally added, or if a
* later modification deletes the delayed ref without ever
* inserting the extent into the extent allocation tree.
* ref->must_insert_reserved is the flag used to record that
* accounting mods are required.
*
* Once we record must_insert_reserved, switch the action to
* BTRFS_ADD_DELAYED_REF because other special casing is not
* required.
*/
must_insert_reserved = true;
break;
}
refcount_set(&head_ref->refs, 1);
head_ref->bytenr = bytenr;
@ -738,7 +740,7 @@ static void init_delayed_ref_head(struct btrfs_delayed_ref_head *head_ref,
head_ref->ref_tree = RB_ROOT_CACHED;
INIT_LIST_HEAD(&head_ref->ref_add_list);
RB_CLEAR_NODE(&head_ref->href_node);
head_ref->processing = 0;
head_ref->processing = false;
head_ref->total_ref_mod = count_mod;
spin_lock_init(&head_ref->lock);
mutex_init(&head_ref->mutex);
@ -763,11 +765,11 @@ static noinline struct btrfs_delayed_ref_head *
add_delayed_ref_head(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head_ref,
struct btrfs_qgroup_extent_record *qrecord,
int action, int *qrecord_inserted_ret)
int action, bool *qrecord_inserted_ret)
{
struct btrfs_delayed_ref_head *existing;
struct btrfs_delayed_ref_root *delayed_refs;
int qrecord_inserted = 0;
bool qrecord_inserted = false;
delayed_refs = &trans->transaction->delayed_refs;
@ -777,7 +779,7 @@ add_delayed_ref_head(struct btrfs_trans_handle *trans,
delayed_refs, qrecord))
kfree(qrecord);
else
qrecord_inserted = 1;
qrecord_inserted = true;
}
trace_add_delayed_ref_head(trans->fs_info, head_ref, action);
@ -853,8 +855,6 @@ static void init_delayed_ref_common(struct btrfs_fs_info *fs_info,
ref->num_bytes = num_bytes;
ref->ref_mod = 1;
ref->action = action;
ref->is_head = 0;
ref->in_tree = 1;
ref->seq = seq;
ref->type = ref_type;
RB_CLEAR_NODE(&ref->ref_node);
@ -875,11 +875,11 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_qgroup_extent_record *record = NULL;
int qrecord_inserted;
bool qrecord_inserted;
bool is_system;
bool merged;
int action = generic_ref->action;
int level = generic_ref->tree_ref.level;
int ret;
u64 bytenr = generic_ref->bytenr;
u64 num_bytes = generic_ref->len;
u64 parent = generic_ref->parent;
@ -935,7 +935,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
head_ref = add_delayed_ref_head(trans, head_ref, record,
action, &qrecord_inserted);
ret = insert_delayed_ref(delayed_refs, head_ref, &ref->node);
merged = insert_delayed_ref(delayed_refs, head_ref, &ref->node);
spin_unlock(&delayed_refs->lock);
/*
@ -947,7 +947,7 @@ int btrfs_add_delayed_tree_ref(struct btrfs_trans_handle *trans,
trace_add_delayed_tree_ref(fs_info, &ref->node, ref,
action == BTRFS_ADD_DELAYED_EXTENT ?
BTRFS_ADD_DELAYED_REF : action);
if (ret > 0)
if (merged)
kmem_cache_free(btrfs_delayed_tree_ref_cachep, ref);
if (qrecord_inserted)
@ -968,9 +968,9 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_head *head_ref;
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_qgroup_extent_record *record = NULL;
int qrecord_inserted;
bool qrecord_inserted;
int action = generic_ref->action;
int ret;
bool merged;
u64 bytenr = generic_ref->bytenr;
u64 num_bytes = generic_ref->len;
u64 parent = generic_ref->parent;
@ -1027,7 +1027,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
head_ref = add_delayed_ref_head(trans, head_ref, record,
action, &qrecord_inserted);
ret = insert_delayed_ref(delayed_refs, head_ref, &ref->node);
merged = insert_delayed_ref(delayed_refs, head_ref, &ref->node);
spin_unlock(&delayed_refs->lock);
/*
@ -1039,7 +1039,7 @@ int btrfs_add_delayed_data_ref(struct btrfs_trans_handle *trans,
trace_add_delayed_data_ref(trans->fs_info, &ref->node, ref,
action == BTRFS_ADD_DELAYED_EXTENT ?
BTRFS_ADD_DELAYED_REF : action);
if (ret > 0)
if (merged)
kmem_cache_free(btrfs_delayed_data_ref_cachep, ref);

View File

@ -48,9 +48,6 @@ struct btrfs_delayed_ref_node {
unsigned int action:8;
unsigned int type:8;
/* is this node still in the rbtree? */
unsigned int is_head:1;
unsigned int in_tree:1;
};
struct btrfs_delayed_extent_op {
@ -70,20 +67,26 @@ struct btrfs_delayed_extent_op {
struct btrfs_delayed_ref_head {
u64 bytenr;
u64 num_bytes;
refcount_t refs;
/*
* For insertion into struct btrfs_delayed_ref_root::href_root.
* Keep it in the same cache line as 'bytenr' for more efficient
* searches in the rbtree.
*/
struct rb_node href_node;
/*
* the mutex is held while running the refs, and it is also
* held when checking the sum of reference modifications.
*/
struct mutex mutex;
refcount_t refs;
/* Protects 'ref_tree' and 'ref_add_list'. */
spinlock_t lock;
struct rb_root_cached ref_tree;
/* accumulate add BTRFS_ADD_DELAYED_REF nodes to this ref_add_list. */
struct list_head ref_add_list;
struct rb_node href_node;
struct btrfs_delayed_extent_op *extent_op;
/*
@ -113,10 +116,10 @@ struct btrfs_delayed_ref_head {
* we need to update the in ram accounting to properly reflect
* the free has happened.
*/
unsigned int must_insert_reserved:1;
unsigned int is_data:1;
unsigned int is_system:1;
unsigned int processing:1;
bool must_insert_reserved;
bool is_data;
bool is_system;
bool processing;
};
struct btrfs_delayed_tree_ref {
@ -337,7 +340,7 @@ static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref)
{
WARN_ON(refcount_read(&ref->refs) == 0);
if (refcount_dec_and_test(&ref->refs)) {
WARN_ON(ref->in_tree);
WARN_ON(!RB_EMPTY_NODE(&ref->ref_node));
switch (ref->type) {
case BTRFS_TREE_BLOCK_REF_KEY:
case BTRFS_SHARED_BLOCK_REF_KEY:

View File

@ -41,7 +41,7 @@
* All new writes will be written to both target and source devices, so even
* if replace gets canceled, sources device still contains up-to-date data.
*
* Location: handle_ops_on_dev_replace() from __btrfs_map_block()
* Location: handle_ops_on_dev_replace() from btrfs_map_block()
* Start: btrfs_dev_replace_start()
* End: btrfs_dev_replace_finishing()
* Content: Latest data/metadata
@ -795,8 +795,8 @@ static int btrfs_set_target_alloc_state(struct btrfs_device *srcdev,
while (!find_first_extent_bit(&srcdev->alloc_state, start,
&found_start, &found_end,
CHUNK_ALLOCATED, &cached_state)) {
ret = set_extent_bits(&tgtdev->alloc_state, found_start,
found_end, CHUNK_ALLOCATED);
ret = set_extent_bit(&tgtdev->alloc_state, found_start,
found_end, CHUNK_ALLOCATED, NULL);
if (ret)
break;
start = found_end + 1;

View File

@ -73,6 +73,23 @@ static struct list_head *get_discard_list(struct btrfs_discard_ctl *discard_ctl,
return &discard_ctl->discard_list[block_group->discard_index];
}
/*
* Determine if async discard should be running.
*
* @discard_ctl: discard control
*
* Check if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set.
*/
static bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl)
{
struct btrfs_fs_info *fs_info = container_of(discard_ctl,
struct btrfs_fs_info,
discard_ctl);
return (!(fs_info->sb->s_flags & SB_RDONLY) &&
test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags));
}
static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
struct btrfs_block_group *block_group)
{
@ -544,23 +561,6 @@ static void btrfs_discard_workfn(struct work_struct *work)
spin_unlock(&discard_ctl->lock);
}
/*
* Determine if async discard should be running.
*
* @discard_ctl: discard control
*
* Check if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set.
*/
bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl)
{
struct btrfs_fs_info *fs_info = container_of(discard_ctl,
struct btrfs_fs_info,
discard_ctl);
return (!(fs_info->sb->s_flags & SB_RDONLY) &&
test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags));
}
/*
* Recalculate the base delay.
*

View File

@ -24,7 +24,6 @@ void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl,
struct btrfs_block_group *block_group);
void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
bool override);
bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl);
/* Update operations */
void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl);

View File

@ -60,15 +60,6 @@
BTRFS_SUPER_FLAG_METADUMP |\
BTRFS_SUPER_FLAG_METADUMP_V2)
static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
struct btrfs_fs_info *fs_info);
static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root);
static int btrfs_destroy_marked_extents(struct btrfs_fs_info *fs_info,
struct extent_io_tree *dirty_pages,
int mark);
static int btrfs_destroy_pinned_extent(struct btrfs_fs_info *fs_info,
struct extent_io_tree *pinned_extents);
static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info);
static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info);
@ -110,35 +101,27 @@ static void csum_tree_block(struct extent_buffer *buf, u8 *result)
* detect blocks that either didn't get written at all or got written
* in the wrong place.
*/
static int verify_parent_transid(struct extent_io_tree *io_tree,
struct extent_buffer *eb, u64 parent_transid,
int atomic)
int btrfs_buffer_uptodate(struct extent_buffer *eb, u64 parent_transid, int atomic)
{
struct extent_state *cached_state = NULL;
int ret;
if (!extent_buffer_uptodate(eb))
return 0;
if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
return 0;
return 1;
if (atomic)
return -EAGAIN;
lock_extent(io_tree, eb->start, eb->start + eb->len - 1, &cached_state);
if (extent_buffer_uptodate(eb) &&
btrfs_header_generation(eb) == parent_transid) {
ret = 0;
goto out;
}
btrfs_err_rl(eb->fs_info,
if (!extent_buffer_uptodate(eb) ||
btrfs_header_generation(eb) != parent_transid) {
btrfs_err_rl(eb->fs_info,
"parent transid verify failed on logical %llu mirror %u wanted %llu found %llu",
eb->start, eb->read_mirror,
parent_transid, btrfs_header_generation(eb));
ret = 1;
clear_extent_buffer_uptodate(eb);
out:
unlock_extent(io_tree, eb->start, eb->start + eb->len - 1,
&cached_state);
return ret;
clear_extent_buffer_uptodate(eb);
return 0;
}
return 1;
}
static bool btrfs_supported_super_csum(u16 csum_type)
@ -180,64 +163,6 @@ int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
return 0;
}
int btrfs_verify_level_key(struct extent_buffer *eb, int level,
struct btrfs_key *first_key, u64 parent_transid)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
int found_level;
struct btrfs_key found_key;
int ret;
found_level = btrfs_header_level(eb);
if (found_level != level) {
WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
KERN_ERR "BTRFS: tree level check failed\n");
btrfs_err(fs_info,
"tree level mismatch detected, bytenr=%llu level expected=%u has=%u",
eb->start, level, found_level);
return -EIO;
}
if (!first_key)
return 0;
/*
* For live tree block (new tree blocks in current transaction),
* we need proper lock context to avoid race, which is impossible here.
* So we only checks tree blocks which is read from disk, whose
* generation <= fs_info->last_trans_committed.
*/
if (btrfs_header_generation(eb) > fs_info->last_trans_committed)
return 0;
/* We have @first_key, so this @eb must have at least one item */
if (btrfs_header_nritems(eb) == 0) {
btrfs_err(fs_info,
"invalid tree nritems, bytenr=%llu nritems=0 expect >0",
eb->start);
WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
return -EUCLEAN;
}
if (found_level)
btrfs_node_key_to_cpu(eb, &found_key, 0);
else
btrfs_item_key_to_cpu(eb, &found_key, 0);
ret = btrfs_comp_cpu_keys(first_key, &found_key);
if (ret) {
WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
KERN_ERR "BTRFS: tree first key check failed\n");
btrfs_err(fs_info,
"tree first key mismatch detected, bytenr=%llu parent_transid=%llu key expected=(%llu,%u,%llu) has=(%llu,%u,%llu)",
eb->start, parent_transid, first_key->objectid,
first_key->type, first_key->offset,
found_key.objectid, found_key.type,
found_key.offset);
}
return ret;
}
static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb,
int mirror_num)
{
@ -312,12 +237,34 @@ int btrfs_read_extent_buffer(struct extent_buffer *eb,
return ret;
}
static int csum_one_extent_buffer(struct extent_buffer *eb)
/*
* Checksum a dirty tree block before IO.
*/
blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio)
{
struct extent_buffer *eb = bbio->private;
struct btrfs_fs_info *fs_info = eb->fs_info;
u64 found_start = btrfs_header_bytenr(eb);
u8 result[BTRFS_CSUM_SIZE];
int ret;
/* Btree blocks are always contiguous on disk. */
if (WARN_ON_ONCE(bbio->file_offset != eb->start))
return BLK_STS_IOERR;
if (WARN_ON_ONCE(bbio->bio.bi_iter.bi_size != eb->len))
return BLK_STS_IOERR;
if (test_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags)) {
WARN_ON_ONCE(found_start != 0);
return BLK_STS_OK;
}
if (WARN_ON_ONCE(found_start != eb->start))
return BLK_STS_IOERR;
if (WARN_ON(!btrfs_page_test_uptodate(fs_info, eb->pages[0], eb->start,
eb->len)))
return BLK_STS_IOERR;
ASSERT(memcmp_extent_buffer(eb, fs_info->fs_devices->metadata_uuid,
offsetof(struct btrfs_header, fsid),
BTRFS_FSID_SIZE) == 0);
@ -326,7 +273,7 @@ static int csum_one_extent_buffer(struct extent_buffer *eb)
if (btrfs_header_level(eb))
ret = btrfs_check_node(eb);
else
ret = btrfs_check_leaf_full(eb);
ret = btrfs_check_leaf(eb);
if (ret < 0)
goto error;
@ -344,8 +291,7 @@ static int csum_one_extent_buffer(struct extent_buffer *eb)
goto error;
}
write_extent_buffer(eb, result, 0, fs_info->csum_size);
return 0;
return BLK_STS_OK;
error:
btrfs_print_tree(eb, 0);
@ -359,103 +305,10 @@ static int csum_one_extent_buffer(struct extent_buffer *eb)
*/
WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG) ||
btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID);
return ret;
}
/* Checksum all dirty extent buffers in one bio_vec */
static int csum_dirty_subpage_buffers(struct btrfs_fs_info *fs_info,
struct bio_vec *bvec)
{
struct page *page = bvec->bv_page;
u64 bvec_start = page_offset(page) + bvec->bv_offset;
u64 cur;
int ret = 0;
for (cur = bvec_start; cur < bvec_start + bvec->bv_len;
cur += fs_info->nodesize) {
struct extent_buffer *eb;
bool uptodate;
eb = find_extent_buffer(fs_info, cur);
uptodate = btrfs_subpage_test_uptodate(fs_info, page, cur,
fs_info->nodesize);
/* A dirty eb shouldn't disappear from buffer_radix */
if (WARN_ON(!eb))
return -EUCLEAN;
if (WARN_ON(cur != btrfs_header_bytenr(eb))) {
free_extent_buffer(eb);
return -EUCLEAN;
}
if (WARN_ON(!uptodate)) {
free_extent_buffer(eb);
return -EUCLEAN;
}
ret = csum_one_extent_buffer(eb);
free_extent_buffer(eb);
if (ret < 0)
return ret;
}
return ret;
}
/*
* Checksum a dirty tree block before IO. This has extra checks to make sure
* we only fill in the checksum field in the first page of a multi-page block.
* For subpage extent buffers we need bvec to also read the offset in the page.
*/
static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct bio_vec *bvec)
{
struct page *page = bvec->bv_page;
u64 start = page_offset(page);
u64 found_start;
struct extent_buffer *eb;
if (fs_info->nodesize < PAGE_SIZE)
return csum_dirty_subpage_buffers(fs_info, bvec);
eb = (struct extent_buffer *)page->private;
if (page != eb->pages[0])
return 0;
found_start = btrfs_header_bytenr(eb);
if (test_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags)) {
WARN_ON(found_start != 0);
return 0;
}
/*
* Please do not consolidate these warnings into a single if.
* It is useful to know what went wrong.
*/
if (WARN_ON(found_start != start))
return -EUCLEAN;
if (WARN_ON(!PageUptodate(page)))
return -EUCLEAN;
return csum_one_extent_buffer(eb);
}
blk_status_t btree_csum_one_bio(struct btrfs_bio *bbio)
{
struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info;
struct bvec_iter iter;
struct bio_vec bv;
int ret = 0;
bio_for_each_segment(bv, &bbio->bio, iter) {
ret = csum_dirty_buffer(fs_info, &bv);
if (ret)
break;
}
return errno_to_blk_status(ret);
}
static int check_tree_block_fsid(struct extent_buffer *eb)
static bool check_tree_block_fsid(struct extent_buffer *eb)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
struct btrfs_fs_devices *fs_devices = fs_info->fs_devices, *seed_devs;
@ -475,18 +328,18 @@ static int check_tree_block_fsid(struct extent_buffer *eb)
metadata_uuid = fs_devices->fsid;
if (!memcmp(fsid, metadata_uuid, BTRFS_FSID_SIZE))
return 0;
return false;
list_for_each_entry(seed_devs, &fs_devices->seed_list, seed_list)
if (!memcmp(fsid, seed_devs->fsid, BTRFS_FSID_SIZE))
return 0;
return false;
return 1;
return true;
}
/* Do basic extent buffer checks at read time */
static int validate_extent_buffer(struct extent_buffer *eb,
struct btrfs_tree_parent_check *check)
int btrfs_validate_extent_buffer(struct extent_buffer *eb,
struct btrfs_tree_parent_check *check)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
u64 found_start;
@ -583,7 +436,7 @@ static int validate_extent_buffer(struct extent_buffer *eb,
* that we don't try and read the other copies of this block, just
* return -EIO.
*/
if (found_level == 0 && btrfs_check_leaf_full(eb)) {
if (found_level == 0 && btrfs_check_leaf(eb)) {
set_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
ret = -EIO;
}
@ -591,9 +444,7 @@ static int validate_extent_buffer(struct extent_buffer *eb,
if (found_level > 0 && btrfs_check_node(eb))
ret = -EIO;
if (!ret)
set_extent_buffer_uptodate(eb);
else
if (ret)
btrfs_err(fs_info,
"read time tree block corruption detected on logical %llu mirror %u",
eb->start, eb->read_mirror);
@ -601,105 +452,6 @@ static int validate_extent_buffer(struct extent_buffer *eb,
return ret;
}
static int validate_subpage_buffer(struct page *page, u64 start, u64 end,
int mirror, struct btrfs_tree_parent_check *check)
{
struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb);
struct extent_buffer *eb;
bool reads_done;
int ret = 0;
ASSERT(check);
/*
* We don't allow bio merge for subpage metadata read, so we should
* only get one eb for each endio hook.
*/
ASSERT(end == start + fs_info->nodesize - 1);
ASSERT(PagePrivate(page));
eb = find_extent_buffer(fs_info, start);
/*
* When we are reading one tree block, eb must have been inserted into
* the radix tree. If not, something is wrong.
*/
ASSERT(eb);
reads_done = atomic_dec_and_test(&eb->io_pages);
/* Subpage read must finish in page read */
ASSERT(reads_done);
eb->read_mirror = mirror;
if (test_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags)) {
ret = -EIO;
goto err;
}
ret = validate_extent_buffer(eb, check);
if (ret < 0)
goto err;
set_extent_buffer_uptodate(eb);
free_extent_buffer(eb);
return ret;
err:
/*
* end_bio_extent_readpage decrements io_pages in case of error,
* make sure it has something to decrement.
*/
atomic_inc(&eb->io_pages);
clear_extent_buffer_uptodate(eb);
free_extent_buffer(eb);
return ret;
}
int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio,
struct page *page, u64 start, u64 end,
int mirror)
{
struct extent_buffer *eb;
int ret = 0;
int reads_done;
ASSERT(page->private);
if (btrfs_sb(page->mapping->host->i_sb)->nodesize < PAGE_SIZE)
return validate_subpage_buffer(page, start, end, mirror,
&bbio->parent_check);
eb = (struct extent_buffer *)page->private;
/*
* The pending IO might have been the only thing that kept this buffer
* in memory. Make sure we have a ref for all this other checks
*/
atomic_inc(&eb->refs);
reads_done = atomic_dec_and_test(&eb->io_pages);
if (!reads_done)
goto err;
eb->read_mirror = mirror;
if (test_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags)) {
ret = -EIO;
goto err;
}
ret = validate_extent_buffer(eb, &bbio->parent_check);
err:
if (ret) {
/*
* our io error hook is going to dec the io pages
* again, we have to make sure it has something
* to decrement
*/
atomic_inc(&eb->io_pages);
clear_extent_buffer_uptodate(eb);
}
free_extent_buffer(eb);
return ret;
}
#ifdef CONFIG_MIGRATION
static int btree_migrate_folio(struct address_space *mapping,
struct folio *dst, struct folio *src, enum migrate_mode mode)
@ -1396,8 +1148,7 @@ static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
spin_lock(&fs_info->fs_roots_radix_lock);
root = radix_tree_lookup(&fs_info->fs_roots_radix,
(unsigned long)root_id);
if (root)
root = btrfs_grab_root(root);
root = btrfs_grab_root(root);
spin_unlock(&fs_info->fs_roots_radix_lock);
return root;
}
@ -1411,31 +1162,28 @@ static struct btrfs_root *btrfs_get_global_root(struct btrfs_fs_info *fs_info,
.offset = 0,
};
if (objectid == BTRFS_ROOT_TREE_OBJECTID)
switch (objectid) {
case BTRFS_ROOT_TREE_OBJECTID:
return btrfs_grab_root(fs_info->tree_root);
if (objectid == BTRFS_EXTENT_TREE_OBJECTID)
case BTRFS_EXTENT_TREE_OBJECTID:
return btrfs_grab_root(btrfs_global_root(fs_info, &key));
if (objectid == BTRFS_CHUNK_TREE_OBJECTID)
case BTRFS_CHUNK_TREE_OBJECTID:
return btrfs_grab_root(fs_info->chunk_root);
if (objectid == BTRFS_DEV_TREE_OBJECTID)
case BTRFS_DEV_TREE_OBJECTID:
return btrfs_grab_root(fs_info->dev_root);
if (objectid == BTRFS_CSUM_TREE_OBJECTID)
case BTRFS_CSUM_TREE_OBJECTID:
return btrfs_grab_root(btrfs_global_root(fs_info, &key));
if (objectid == BTRFS_QUOTA_TREE_OBJECTID)
return btrfs_grab_root(fs_info->quota_root) ?
fs_info->quota_root : ERR_PTR(-ENOENT);
if (objectid == BTRFS_UUID_TREE_OBJECTID)
return btrfs_grab_root(fs_info->uuid_root) ?
fs_info->uuid_root : ERR_PTR(-ENOENT);
if (objectid == BTRFS_BLOCK_GROUP_TREE_OBJECTID)
return btrfs_grab_root(fs_info->block_group_root) ?
fs_info->block_group_root : ERR_PTR(-ENOENT);
if (objectid == BTRFS_FREE_SPACE_TREE_OBJECTID) {
struct btrfs_root *root = btrfs_global_root(fs_info, &key);
return btrfs_grab_root(root) ? root : ERR_PTR(-ENOENT);
case BTRFS_QUOTA_TREE_OBJECTID:
return btrfs_grab_root(fs_info->quota_root);
case BTRFS_UUID_TREE_OBJECTID:
return btrfs_grab_root(fs_info->uuid_root);
case BTRFS_BLOCK_GROUP_TREE_OBJECTID:
return btrfs_grab_root(fs_info->block_group_root);
case BTRFS_FREE_SPACE_TREE_OBJECTID:
return btrfs_grab_root(btrfs_global_root(fs_info, &key));
default:
return NULL;
}
return NULL;
}
int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
@ -1991,7 +1739,6 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
{
btrfs_destroy_workqueue(fs_info->fixup_workers);
btrfs_destroy_workqueue(fs_info->delalloc_workers);
btrfs_destroy_workqueue(fs_info->hipri_workers);
btrfs_destroy_workqueue(fs_info->workers);
if (fs_info->endio_workers)
destroy_workqueue(fs_info->endio_workers);
@ -2183,12 +1930,10 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
{
u32 max_active = fs_info->thread_pool_size;
unsigned int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND;
unsigned int ordered_flags = WQ_MEM_RECLAIM | WQ_FREEZABLE;
fs_info->workers =
btrfs_alloc_workqueue(fs_info, "worker", flags, max_active, 16);
fs_info->hipri_workers =
btrfs_alloc_workqueue(fs_info, "worker-high",
flags | WQ_HIGHPRI, max_active, 16);
fs_info->delalloc_workers =
btrfs_alloc_workqueue(fs_info, "delalloc",
@ -2202,7 +1947,7 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
btrfs_alloc_workqueue(fs_info, "cache", flags, max_active, 0);
fs_info->fixup_workers =
btrfs_alloc_workqueue(fs_info, "fixup", flags, 1, 0);
btrfs_alloc_ordered_workqueue(fs_info, "fixup", ordered_flags);
fs_info->endio_workers =
alloc_workqueue("btrfs-endio", flags, max_active);
@ -2221,11 +1966,12 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info)
btrfs_alloc_workqueue(fs_info, "delayed-meta", flags,
max_active, 0);
fs_info->qgroup_rescan_workers =
btrfs_alloc_workqueue(fs_info, "qgroup-rescan", flags, 1, 0);
btrfs_alloc_ordered_workqueue(fs_info, "qgroup-rescan",
ordered_flags);
fs_info->discard_ctl.discard_workers =
alloc_workqueue("btrfs_discard", WQ_UNBOUND | WQ_FREEZABLE, 1);
alloc_ordered_workqueue("btrfs_discard", WQ_FREEZABLE);
if (!(fs_info->workers && fs_info->hipri_workers &&
if (!(fs_info->workers &&
fs_info->delalloc_workers && fs_info->flush_workers &&
fs_info->endio_workers && fs_info->endio_meta_workers &&
fs_info->compressed_write_workers &&
@ -2265,6 +2011,9 @@ static int btrfs_init_csum_hash(struct btrfs_fs_info *fs_info, u16 csum_type)
if (!strstr(crypto_shash_driver_name(csum_shash), "generic"))
set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags);
break;
case BTRFS_CSUM_TYPE_XXHASH:
set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags);
break;
default:
break;
}
@ -2642,6 +2391,14 @@ int btrfs_validate_super(struct btrfs_fs_info *fs_info,
ret = -EINVAL;
}
if (memcmp(fs_info->fs_devices->metadata_uuid, sb->dev_item.fsid,
BTRFS_FSID_SIZE) != 0) {
btrfs_err(fs_info,
"dev_item UUID does not match metadata fsid: %pU != %pU",
fs_info->fs_devices->metadata_uuid, sb->dev_item.fsid);
ret = -EINVAL;
}
/*
* Artificial requirement for block-group-tree to force newer features
* (free-space-tree, no-holes) so the test matrix is smaller.
@ -2654,14 +2411,6 @@ int btrfs_validate_super(struct btrfs_fs_info *fs_info,
ret = -EINVAL;
}
if (memcmp(fs_info->fs_devices->metadata_uuid, sb->dev_item.fsid,
BTRFS_FSID_SIZE) != 0) {
btrfs_err(fs_info,
"dev_item UUID does not match metadata fsid: %pU != %pU",
fs_info->fs_devices->metadata_uuid, sb->dev_item.fsid);
ret = -EINVAL;
}
/*
* Hint to catch really bogus numbers, bitflips or so, more exact checks are
* done later
@ -4662,28 +4411,10 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
btrfs_close_devices(fs_info->fs_devices);
}
int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid,
int atomic)
{
int ret;
struct inode *btree_inode = buf->pages[0]->mapping->host;
ret = extent_buffer_uptodate(buf);
if (!ret)
return ret;
ret = verify_parent_transid(&BTRFS_I(btree_inode)->io_tree, buf,
parent_transid, atomic);
if (ret == -EAGAIN)
return ret;
return !ret;
}
void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
{
struct btrfs_fs_info *fs_info = buf->fs_info;
u64 transid = btrfs_header_generation(buf);
int was_dirty;
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
/*
@ -4698,19 +4429,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
if (transid != fs_info->generation)
WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, found %llu running %llu\n",
buf->start, transid, fs_info->generation);
was_dirty = set_extent_buffer_dirty(buf);
if (!was_dirty)
percpu_counter_add_batch(&fs_info->dirty_metadata_bytes,
buf->len,
fs_info->dirty_metadata_batch);
set_extent_buffer_dirty(buf);
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
/*
* Since btrfs_mark_buffer_dirty() can be called with item pointer set
* but item data not updated.
* So here we should only check item pointers, not item data.
* btrfs_check_leaf() won't check item data if we don't have WRITTEN
* set, so this will only validate the basic structure of the items.
*/
if (btrfs_header_level(buf) == 0 &&
btrfs_check_leaf_relaxed(buf)) {
if (btrfs_header_level(buf) == 0 && btrfs_check_leaf(buf)) {
btrfs_print_leaf(buf);
ASSERT(0);
}
@ -4840,13 +4565,12 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1);
}
static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
struct btrfs_fs_info *fs_info)
static void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
struct btrfs_fs_info *fs_info)
{
struct rb_node *node;
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_delayed_ref_node *ref;
int ret = 0;
delayed_refs = &trans->delayed_refs;
@ -4854,7 +4578,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
if (atomic_read(&delayed_refs->num_entries) == 0) {
spin_unlock(&delayed_refs->lock);
btrfs_debug(fs_info, "delayed_refs has NO entry");
return ret;
return;
}
while ((node = rb_first_cached(&delayed_refs->href_root)) != NULL) {
@ -4871,7 +4595,6 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
while ((n = rb_first_cached(&head->ref_tree)) != NULL) {
ref = rb_entry(n, struct btrfs_delayed_ref_node,
ref_node);
ref->in_tree = 0;
rb_erase_cached(&ref->ref_node, &head->ref_tree);
RB_CLEAR_NODE(&ref->ref_node);
if (!list_empty(&ref->add_list))
@ -4916,8 +4639,6 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
btrfs_qgroup_destroy_extent_records(trans);
spin_unlock(&delayed_refs->lock);
return ret;
}
static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
@ -5142,8 +4863,6 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
EXTENT_DIRTY);
btrfs_destroy_pinned_extent(fs_info, &cur_trans->pinned_extents);
btrfs_free_redirty_list(cur_trans);
cur_trans->state =TRANS_STATE_COMPLETED;
wake_up(&cur_trans->commit_wait);
}

View File

@ -31,8 +31,6 @@ struct btrfs_tree_parent_check;
void btrfs_check_leaked_roots(struct btrfs_fs_info *fs_info);
void btrfs_init_fs_info(struct btrfs_fs_info *fs_info);
int btrfs_verify_level_key(struct extent_buffer *eb, int level,
struct btrfs_key *first_key, u64 parent_transid);
struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr,
struct btrfs_tree_parent_check *check);
struct extent_buffer *btrfs_find_create_tree_block(
@ -84,9 +82,8 @@ void btrfs_btree_balance_dirty(struct btrfs_fs_info *fs_info);
void btrfs_btree_balance_dirty_nodelay(struct btrfs_fs_info *fs_info);
void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
struct btrfs_root *root);
int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio,
struct page *page, u64 start, u64 end,
int mirror);
int btrfs_validate_extent_buffer(struct extent_buffer *eb,
struct btrfs_tree_parent_check *check);
#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
struct btrfs_root *btrfs_alloc_dummy_root(struct btrfs_fs_info *fs_info);
#endif

View File

@ -532,6 +532,16 @@ static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
return next;
}
/*
* Detect if extent bits request NOWAIT semantics and set the gfp mask accordingly,
* unset the EXTENT_NOWAIT bit.
*/
static void set_gfp_mask_from_bits(u32 *bits, gfp_t *mask)
{
*mask = (*bits & EXTENT_NOWAIT ? GFP_NOWAIT : GFP_NOFS);
*bits &= EXTENT_NOWAIT - 1;
}
/*
* Clear some bits on a range in the tree. This may require splitting or
* inserting elements in the tree, so the gfp mask is used to indicate which
@ -546,7 +556,7 @@ static struct extent_state *clear_state_bit(struct extent_io_tree *tree,
*/
int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits, struct extent_state **cached_state,
gfp_t mask, struct extent_changeset *changeset)
struct extent_changeset *changeset)
{
struct extent_state *state;
struct extent_state *cached;
@ -556,7 +566,9 @@ int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
int clear = 0;
int wake;
int delete = (bits & EXTENT_CLEAR_ALL_BITS);
gfp_t mask;
set_gfp_mask_from_bits(&bits, &mask);
btrfs_debug_check_extent_io_range(tree, start, end);
trace_btrfs_clear_extent_bit(tree, start, end - start + 1, bits);
@ -953,7 +965,8 @@ bool btrfs_find_delalloc_range(struct extent_io_tree *tree, u64 *start,
/*
* Set some bits on a range in the tree. This may require allocations or
* sleeping, so the gfp mask is used to indicate what is allowed.
* sleeping. By default all allocations use GFP_NOFS, use EXTENT_NOWAIT for
* GFP_NOWAIT.
*
* If any of the exclusive bits are set, this will fail with -EEXIST if some
* part of the range already has the desired bits set. The extent_state of the
@ -968,7 +981,7 @@ static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits, u64 *failed_start,
struct extent_state **failed_state,
struct extent_state **cached_state,
struct extent_changeset *changeset, gfp_t mask)
struct extent_changeset *changeset)
{
struct extent_state *state;
struct extent_state *prealloc = NULL;
@ -978,7 +991,9 @@ static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
u64 last_start;
u64 last_end;
u32 exclusive_bits = (bits & EXTENT_LOCKED);
gfp_t mask;
set_gfp_mask_from_bits(&bits, &mask);
btrfs_debug_check_extent_io_range(tree, start, end);
trace_btrfs_set_extent_bit(tree, start, end - start + 1, bits);
@ -1188,10 +1203,10 @@ static int __set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
}
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits, struct extent_state **cached_state, gfp_t mask)
u32 bits, struct extent_state **cached_state)
{
return __set_extent_bit(tree, start, end, bits, NULL, NULL,
cached_state, NULL, mask);
cached_state, NULL);
}
/*
@ -1687,8 +1702,7 @@ int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
*/
ASSERT(!(bits & EXTENT_LOCKED));
return __set_extent_bit(tree, start, end, bits, NULL, NULL, NULL,
changeset, GFP_NOFS);
return __set_extent_bit(tree, start, end, bits, NULL, NULL, NULL, changeset);
}
int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
@ -1700,8 +1714,7 @@ int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
*/
ASSERT(!(bits & EXTENT_LOCKED));
return __clear_extent_bit(tree, start, end, bits, NULL, GFP_NOFS,
changeset);
return __clear_extent_bit(tree, start, end, bits, NULL, changeset);
}
int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
@ -1711,7 +1724,7 @@ int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
u64 failed_start;
err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start,
NULL, cached, NULL, GFP_NOFS);
NULL, cached, NULL);
if (err == -EEXIST) {
if (failed_start > start)
clear_extent_bit(tree, start, failed_start - 1,
@ -1733,7 +1746,7 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
u64 failed_start;
err = __set_extent_bit(tree, start, end, EXTENT_LOCKED, &failed_start,
&failed_state, cached_state, NULL, GFP_NOFS);
&failed_state, cached_state, NULL);
while (err == -EEXIST) {
if (failed_start != start)
clear_extent_bit(tree, start, failed_start - 1,
@ -1743,7 +1756,7 @@ int lock_extent(struct extent_io_tree *tree, u64 start, u64 end,
&failed_state);
err = __set_extent_bit(tree, start, end, EXTENT_LOCKED,
&failed_start, &failed_state,
cached_state, NULL, GFP_NOFS);
cached_state, NULL);
}
return err;
}

View File

@ -43,6 +43,15 @@ enum {
* want the extent states to go away.
*/
ENUM_BIT(EXTENT_CLEAR_ALL_BITS),
/*
* This must be last.
*
* Bit not representing a state but a request for NOWAIT semantics,
* e.g. when allocating memory, and must be masked out from the other
* bits.
*/
ENUM_BIT(EXTENT_NOWAIT)
};
#define EXTENT_DO_ACCOUNTING (EXTENT_CLEAR_META_RESV | \
@ -127,22 +136,20 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
int clear_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits, struct extent_changeset *changeset);
int __clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits, struct extent_state **cached, gfp_t mask,
u32 bits, struct extent_state **cached,
struct extent_changeset *changeset);
static inline int clear_extent_bit(struct extent_io_tree *tree, u64 start,
u64 end, u32 bits,
struct extent_state **cached)
{
return __clear_extent_bit(tree, start, end, bits, cached,
GFP_NOFS, NULL);
return __clear_extent_bit(tree, start, end, bits, cached, NULL);
}
static inline int unlock_extent(struct extent_io_tree *tree, u64 start, u64 end,
struct extent_state **cached)
{
return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, cached,
GFP_NOFS, NULL);
return __clear_extent_bit(tree, start, end, EXTENT_LOCKED, cached, NULL);
}
static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start,
@ -154,31 +161,13 @@ static inline int clear_extent_bits(struct extent_io_tree *tree, u64 start,
int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits, struct extent_changeset *changeset);
int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits, struct extent_state **cached_state, gfp_t mask);
static inline int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start,
u64 end, u32 bits)
{
return set_extent_bit(tree, start, end, bits, NULL, GFP_NOWAIT);
}
static inline int set_extent_bits(struct extent_io_tree *tree, u64 start,
u64 end, u32 bits)
{
return set_extent_bit(tree, start, end, bits, NULL, GFP_NOFS);
}
u32 bits, struct extent_state **cached_state);
static inline int clear_extent_uptodate(struct extent_io_tree *tree, u64 start,
u64 end, struct extent_state **cached_state)
{
return __clear_extent_bit(tree, start, end, EXTENT_UPTODATE,
cached_state, GFP_NOFS, NULL);
}
static inline int set_extent_dirty(struct extent_io_tree *tree, u64 start,
u64 end, gfp_t mask)
{
return set_extent_bit(tree, start, end, EXTENT_DIRTY, NULL, mask);
cached_state, NULL);
}
static inline int clear_extent_dirty(struct extent_io_tree *tree, u64 start,
@ -193,29 +182,6 @@ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
u32 bits, u32 clear_bits,
struct extent_state **cached_state);
static inline int set_extent_delalloc(struct extent_io_tree *tree, u64 start,
u64 end, u32 extra_bits,
struct extent_state **cached_state)
{
return set_extent_bit(tree, start, end,
EXTENT_DELALLOC | extra_bits,
cached_state, GFP_NOFS);
}
static inline int set_extent_defrag(struct extent_io_tree *tree, u64 start,
u64 end, struct extent_state **cached_state)
{
return set_extent_bit(tree, start, end,
EXTENT_DELALLOC | EXTENT_DEFRAG,
cached_state, GFP_NOFS);
}
static inline int set_extent_new(struct extent_io_tree *tree, u64 start,
u64 end)
{
return set_extent_bit(tree, start, end, EXTENT_NEW, NULL, GFP_NOFS);
}
int find_first_extent_bit(struct extent_io_tree *tree, u64 start,
u64 *start_ret, u64 *end_ret, u32 bits,
struct extent_state **cached_state);

View File

@ -73,8 +73,8 @@ int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info,
u64 start, u64 num_bytes)
{
u64 end = start + num_bytes - 1;
set_extent_bits(&fs_info->excluded_extents, start, end,
EXTENT_UPTODATE);
set_extent_bit(&fs_info->excluded_extents, start, end,
EXTENT_UPTODATE, NULL);
return 0;
}
@ -402,7 +402,7 @@ int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb,
}
}
btrfs_print_leaf((struct extent_buffer *)eb);
btrfs_print_leaf(eb);
btrfs_err(eb->fs_info,
"eb %llu iref 0x%lx invalid extent inline ref type %d",
eb->start, (unsigned long)iref, type);
@ -1164,15 +1164,10 @@ int insert_inline_extent_backref(struct btrfs_trans_handle *trans,
* should not happen at all.
*/
if (owner < BTRFS_FIRST_FREE_OBJECTID) {
btrfs_print_leaf(path->nodes[0]);
btrfs_crit(trans->fs_info,
"adding refs to an existing tree ref, bytenr %llu num_bytes %llu root_objectid %llu",
bytenr, num_bytes, root_objectid);
if (IS_ENABLED(CONFIG_BTRFS_DEBUG)) {
WARN_ON(1);
btrfs_crit(trans->fs_info,
"path->slots[0]=%d path->nodes[0]:", path->slots[0]);
btrfs_print_leaf(path->nodes[0]);
}
"adding refs to an existing tree ref, bytenr %llu num_bytes %llu root_objectid %llu slot %u",
bytenr, num_bytes, root_objectid, path->slots[0]);
return -EUCLEAN;
}
update_inline_extent_backref(path, iref, refs_to_add, extent_op);
@ -1208,11 +1203,11 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
{
int j, ret = 0;
u64 bytes_left, end;
u64 aligned_start = ALIGN(start, 1 << 9);
u64 aligned_start = ALIGN(start, 1 << SECTOR_SHIFT);
if (WARN_ON(start != aligned_start)) {
len -= aligned_start - start;
len = round_down(len, 1 << 9);
len = round_down(len, 1 << SECTOR_SHIFT);
start = aligned_start;
}
@ -1250,7 +1245,8 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
}
if (size) {
ret = blkdev_issue_discard(bdev, start >> 9, size >> 9,
ret = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT,
size >> SECTOR_SHIFT,
GFP_NOFS);
if (!ret)
*discarded_bytes += size;
@ -1267,7 +1263,8 @@ static int btrfs_issue_discard(struct block_device *bdev, u64 start, u64 len,
}
if (bytes_left) {
ret = blkdev_issue_discard(bdev, start >> 9, bytes_left >> 9,
ret = blkdev_issue_discard(bdev, start >> SECTOR_SHIFT,
bytes_left >> SECTOR_SHIFT,
GFP_NOFS);
if (!ret)
*discarded_bytes += bytes_left;
@ -1500,7 +1497,7 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op,
int insert_reserved)
bool insert_reserved)
{
int ret = 0;
struct btrfs_delayed_data_ref *ref;
@ -1650,7 +1647,7 @@ static int run_delayed_extent_op(struct btrfs_trans_handle *trans,
static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op,
int insert_reserved)
bool insert_reserved)
{
int ret = 0;
struct btrfs_delayed_tree_ref *ref;
@ -1690,7 +1687,7 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *node,
struct btrfs_delayed_extent_op *extent_op,
int insert_reserved)
bool insert_reserved)
{
int ret = 0;
@ -1748,7 +1745,7 @@ static void unselect_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_ref
struct btrfs_delayed_ref_head *head)
{
spin_lock(&delayed_refs->lock);
head->processing = 0;
head->processing = false;
delayed_refs->num_heads_ready++;
spin_unlock(&delayed_refs->lock);
btrfs_delayed_ref_unlock(head);
@ -1900,7 +1897,7 @@ static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
struct btrfs_delayed_ref_root *delayed_refs;
struct btrfs_delayed_extent_op *extent_op;
struct btrfs_delayed_ref_node *ref;
int must_insert_reserved = 0;
bool must_insert_reserved;
int ret;
delayed_refs = &trans->transaction->delayed_refs;
@ -1916,7 +1913,6 @@ static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
return -EAGAIN;
}
ref->in_tree = 0;
rb_erase_cached(&ref->ref_node, &locked_ref->ref_tree);
RB_CLEAR_NODE(&ref->ref_node);
if (!list_empty(&ref->add_list))
@ -1943,7 +1939,7 @@ static int btrfs_run_delayed_refs_for_head(struct btrfs_trans_handle *trans,
* spin lock.
*/
must_insert_reserved = locked_ref->must_insert_reserved;
locked_ref->must_insert_reserved = 0;
locked_ref->must_insert_reserved = false;
extent_op = locked_ref->extent_op;
locked_ref->extent_op = NULL;
@ -2155,10 +2151,10 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
}
int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
struct extent_buffer *eb, u64 flags,
int level)
struct extent_buffer *eb, u64 flags)
{
struct btrfs_delayed_extent_op *extent_op;
int level = btrfs_header_level(eb);
int ret;
extent_op = btrfs_alloc_delayed_extent_op();
@ -2510,8 +2506,8 @@ static int pin_down_extent(struct btrfs_trans_handle *trans,
spin_unlock(&cache->lock);
spin_unlock(&cache->space_info->lock);
set_extent_dirty(&trans->transaction->pinned_extents, bytenr,
bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL);
set_extent_bit(&trans->transaction->pinned_extents, bytenr,
bytenr + num_bytes - 1, EXTENT_DIRTY, NULL);
return 0;
}
@ -2838,6 +2834,13 @@ static int do_free_extent_accounting(struct btrfs_trans_handle *trans,
return ret;
}
#define abort_and_dump(trans, path, fmt, args...) \
({ \
btrfs_abort_transaction(trans, -EUCLEAN); \
btrfs_print_leaf(path->nodes[0]); \
btrfs_crit(trans->fs_info, fmt, ##args); \
})
/*
* Drop one or more refs of @node.
*
@ -2978,10 +2981,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
if (!found_extent) {
if (iref) {
btrfs_crit(info,
"invalid iref, no EXTENT/METADATA_ITEM found but has inline extent ref");
btrfs_abort_transaction(trans, -EUCLEAN);
goto err_dump;
abort_and_dump(trans, path,
"invalid iref slot %u, no EXTENT/METADATA_ITEM found but has inline extent ref",
path->slots[0]);
ret = -EUCLEAN;
goto out;
}
/* Must be SHARED_* item, remove the backref first */
ret = remove_extent_backref(trans, extent_root, path,
@ -3029,11 +3033,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
}
if (ret) {
btrfs_err(info,
"umm, got %d back from search, was looking for %llu",
ret, bytenr);
if (ret > 0)
btrfs_print_leaf(path->nodes[0]);
btrfs_err(info,
"umm, got %d back from search, was looking for %llu, slot %d",
ret, bytenr, path->slots[0]);
}
if (ret < 0) {
btrfs_abort_transaction(trans, ret);
@ -3042,12 +3046,10 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
extent_slot = path->slots[0];
}
} else if (WARN_ON(ret == -ENOENT)) {
btrfs_print_leaf(path->nodes[0]);
btrfs_err(info,
"unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu",
bytenr, parent, root_objectid, owner_objectid,
owner_offset);
btrfs_abort_transaction(trans, ret);
abort_and_dump(trans, path,
"unable to find ref byte nr %llu parent %llu root %llu owner %llu offset %llu slot %d",
bytenr, parent, root_objectid, owner_objectid,
owner_offset, path->slots[0]);
goto out;
} else {
btrfs_abort_transaction(trans, ret);
@ -3067,14 +3069,15 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID &&
key.type == BTRFS_EXTENT_ITEM_KEY) {
struct btrfs_tree_block_info *bi;
if (item_size < sizeof(*ei) + sizeof(*bi)) {
btrfs_crit(info,
"invalid extent item size for key (%llu, %u, %llu) owner %llu, has %u expect >= %zu",
key.objectid, key.type, key.offset,
owner_objectid, item_size,
sizeof(*ei) + sizeof(*bi));
btrfs_abort_transaction(trans, -EUCLEAN);
goto err_dump;
abort_and_dump(trans, path,
"invalid extent item size for key (%llu, %u, %llu) slot %u owner %llu, has %u expect >= %zu",
key.objectid, key.type, key.offset,
path->slots[0], owner_objectid, item_size,
sizeof(*ei) + sizeof(*bi));
ret = -EUCLEAN;
goto out;
}
bi = (struct btrfs_tree_block_info *)(ei + 1);
WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi));
@ -3082,11 +3085,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
refs = btrfs_extent_refs(leaf, ei);
if (refs < refs_to_drop) {
btrfs_crit(info,
"trying to drop %d refs but we only have %llu for bytenr %llu",
refs_to_drop, refs, bytenr);
btrfs_abort_transaction(trans, -EUCLEAN);
goto err_dump;
abort_and_dump(trans, path,
"trying to drop %d refs but we only have %llu for bytenr %llu slot %u",
refs_to_drop, refs, bytenr, path->slots[0]);
ret = -EUCLEAN;
goto out;
}
refs -= refs_to_drop;
@ -3099,10 +3102,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
*/
if (iref) {
if (!found_extent) {
btrfs_crit(info,
"invalid iref, got inlined extent ref but no EXTENT/METADATA_ITEM found");
btrfs_abort_transaction(trans, -EUCLEAN);
goto err_dump;
abort_and_dump(trans, path,
"invalid iref, got inlined extent ref but no EXTENT/METADATA_ITEM found, slot %u",
path->slots[0]);
ret = -EUCLEAN;
goto out;
}
} else {
btrfs_set_extent_refs(leaf, ei, refs);
@ -3121,21 +3125,21 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
if (found_extent) {
if (is_data && refs_to_drop !=
extent_data_ref_count(path, iref)) {
btrfs_crit(info,
"invalid refs_to_drop, current refs %u refs_to_drop %u",
extent_data_ref_count(path, iref),
refs_to_drop);
btrfs_abort_transaction(trans, -EUCLEAN);
goto err_dump;
abort_and_dump(trans, path,
"invalid refs_to_drop, current refs %u refs_to_drop %u slot %u",
extent_data_ref_count(path, iref),
refs_to_drop, path->slots[0]);
ret = -EUCLEAN;
goto out;
}
if (iref) {
if (path->slots[0] != extent_slot) {
btrfs_crit(info,
"invalid iref, extent item key (%llu %u %llu) doesn't have wanted iref",
key.objectid, key.type,
key.offset);
btrfs_abort_transaction(trans, -EUCLEAN);
goto err_dump;
abort_and_dump(trans, path,
"invalid iref, extent item key (%llu %u %llu) slot %u doesn't have wanted iref",
key.objectid, key.type,
key.offset, path->slots[0]);
ret = -EUCLEAN;
goto out;
}
} else {
/*
@ -3145,10 +3149,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
* [ EXTENT/METADATA_ITEM ][ SHARED_* ITEM ]
*/
if (path->slots[0] != extent_slot + 1) {
btrfs_crit(info,
"invalid SHARED_* item, previous item is not EXTENT/METADATA_ITEM");
btrfs_abort_transaction(trans, -EUCLEAN);
goto err_dump;
abort_and_dump(trans, path,
"invalid SHARED_* item slot %u, previous item is not EXTENT/METADATA_ITEM",
path->slots[0]);
ret = -EUCLEAN;
goto out;
}
path->slots[0] = extent_slot;
num_to_del = 2;
@ -3170,19 +3175,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
out:
btrfs_free_path(path);
return ret;
err_dump:
/*
* Leaf dump can take up a lot of log buffer, so we only do full leaf
* dump for debug build.
*/
if (IS_ENABLED(CONFIG_BTRFS_DEBUG)) {
btrfs_crit(info, "path->slots[0]=%d extent_slot=%d",
path->slots[0], extent_slot);
btrfs_print_leaf(path->nodes[0]);
}
btrfs_free_path(path);
return -EUCLEAN;
}
/*
@ -3219,7 +3211,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans,
goto out;
btrfs_delete_ref_head(delayed_refs, head);
head->processing = 0;
head->processing = false;
spin_unlock(&head->lock);
spin_unlock(&delayed_refs->lock);
@ -4804,7 +4796,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
!test_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &root->state))
lockdep_owner = BTRFS_FS_TREE_OBJECTID;
/* btrfs_clean_tree_block() accesses generation field. */
/* btrfs_clear_buffer_dirty() accesses generation field. */
btrfs_set_header_generation(buf, trans->transid);
/*
@ -4836,15 +4828,17 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
* EXTENT bit to differentiate dirty pages.
*/
if (buf->log_index == 0)
set_extent_dirty(&root->dirty_log_pages, buf->start,
buf->start + buf->len - 1, GFP_NOFS);
set_extent_bit(&root->dirty_log_pages, buf->start,
buf->start + buf->len - 1,
EXTENT_DIRTY, NULL);
else
set_extent_new(&root->dirty_log_pages, buf->start,
buf->start + buf->len - 1);
set_extent_bit(&root->dirty_log_pages, buf->start,
buf->start + buf->len - 1,
EXTENT_NEW, NULL);
} else {
buf->log_index = -1;
set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
buf->start + buf->len - 1, GFP_NOFS);
set_extent_bit(&trans->transaction->dirty_pages, buf->start,
buf->start + buf->len - 1, EXTENT_DIRTY, NULL);
}
/* this returns a buffer locked for blocking */
return buf;
@ -5102,8 +5096,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans,
BUG_ON(ret); /* -ENOMEM */
ret = btrfs_dec_ref(trans, root, eb, 0);
BUG_ON(ret); /* -ENOMEM */
ret = btrfs_set_disk_extent_flags(trans, eb, flag,
btrfs_header_level(eb));
ret = btrfs_set_disk_extent_flags(trans, eb, flag);
BUG_ON(ret); /* -ENOMEM */
wc->flags[level] |= flag;
}
@ -5985,9 +5978,8 @@ static int btrfs_trim_free_extents(struct btrfs_device *device, u64 *trimmed)
ret = btrfs_issue_discard(device->bdev, start, len,
&bytes);
if (!ret)
set_extent_bits(&device->alloc_state, start,
start + bytes - 1,
CHUNK_TRIMMED);
set_extent_bit(&device->alloc_state, start,
start + bytes - 1, CHUNK_TRIMMED, NULL);
mutex_unlock(&fs_info->chunk_mutex);
if (ret)

View File

@ -141,7 +141,7 @@ int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf, int full_backref);
int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
struct extent_buffer *eb, u64 flags, int level);
struct extent_buffer *eb, u64 flags);
int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref);
int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info,

File diff suppressed because it is too large Load Diff

View File

@ -29,6 +29,8 @@ enum {
/* write IO error */
EXTENT_BUFFER_WRITE_ERR,
EXTENT_BUFFER_NO_CHECK,
/* Indicate that extent buffer pages a being read */
EXTENT_BUFFER_READING,
};
/* these are flags for __process_pages_contig */
@ -38,7 +40,6 @@ enum {
ENUM_BIT(PAGE_START_WRITEBACK),
ENUM_BIT(PAGE_END_WRITEBACK),
ENUM_BIT(PAGE_SET_ORDERED),
ENUM_BIT(PAGE_SET_ERROR),
ENUM_BIT(PAGE_LOCK),
};
@ -79,7 +80,6 @@ struct extent_buffer {
struct btrfs_fs_info *fs_info;
spinlock_t refs_lock;
atomic_t refs;
atomic_t io_pages;
int read_mirror;
struct rcu_head rcu_head;
pid_t lock_owner;
@ -89,7 +89,6 @@ struct extent_buffer {
struct rw_semaphore lock;
struct page *pages[INLINE_EXTENT_BUFFER_PAGES];
struct list_head release_list;
#ifdef CONFIG_BTRFS_DEBUG
struct list_head leak_list;
#endif
@ -179,7 +178,8 @@ int try_release_extent_mapping(struct page *page, gfp_t mask);
int try_release_extent_buffer(struct page *page);
int btrfs_read_folio(struct file *file, struct folio *folio);
int extent_write_locked_range(struct inode *inode, u64 start, u64 end);
int extent_write_locked_range(struct inode *inode, u64 start, u64 end,
struct writeback_control *wbc);
int extent_writepages(struct address_space *mapping,
struct writeback_control *wbc);
int btree_write_cache_pages(struct address_space *mapping,
@ -262,10 +262,9 @@ void extent_buffer_bitmap_set(const struct extent_buffer *eb, unsigned long star
void extent_buffer_bitmap_clear(const struct extent_buffer *eb,
unsigned long start, unsigned long pos,
unsigned long len);
bool set_extent_buffer_dirty(struct extent_buffer *eb);
void set_extent_buffer_dirty(struct extent_buffer *eb);
void set_extent_buffer_uptodate(struct extent_buffer *eb);
void clear_extent_buffer_uptodate(struct extent_buffer *eb);
int extent_buffer_under_io(const struct extent_buffer *eb);
void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end,

View File

@ -364,8 +364,9 @@ static void extent_map_device_set_bits(struct extent_map *em, unsigned bits)
struct btrfs_io_stripe *stripe = &map->stripes[i];
struct btrfs_device *device = stripe->dev;
set_extent_bits_nowait(&device->alloc_state, stripe->physical,
stripe->physical + stripe_size - 1, bits);
set_extent_bit(&device->alloc_state, stripe->physical,
stripe->physical + stripe_size - 1,
bits | EXTENT_NOWAIT, NULL);
}
}
@ -380,8 +381,9 @@ static void extent_map_device_clear_bits(struct extent_map *em, unsigned bits)
struct btrfs_device *device = stripe->dev;
__clear_extent_bit(&device->alloc_state, stripe->physical,
stripe->physical + stripe_size - 1, bits,
NULL, GFP_NOWAIT, NULL);
stripe->physical + stripe_size - 1,
bits | EXTENT_NOWAIT,
NULL, NULL);
}
}
@ -502,10 +504,10 @@ void remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em)
RB_CLEAR_NODE(&em->rb_node);
}
void replace_extent_mapping(struct extent_map_tree *tree,
struct extent_map *cur,
struct extent_map *new,
int modified)
static void replace_extent_mapping(struct extent_map_tree *tree,
struct extent_map *cur,
struct extent_map *new,
int modified)
{
lockdep_assert_held_write(&tree->lock);
@ -959,3 +961,95 @@ int btrfs_replace_extent_map_range(struct btrfs_inode *inode,
return ret;
}
/*
* Split off the first pre bytes from the extent_map at [start, start + len],
* and set the block_start for it to new_logical.
*
* This function is used when an ordered_extent needs to be split.
*/
int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
u64 new_logical)
{
struct extent_map_tree *em_tree = &inode->extent_tree;
struct extent_map *em;
struct extent_map *split_pre = NULL;
struct extent_map *split_mid = NULL;
int ret = 0;
unsigned long flags;
ASSERT(pre != 0);
ASSERT(pre < len);
split_pre = alloc_extent_map();
if (!split_pre)
return -ENOMEM;
split_mid = alloc_extent_map();
if (!split_mid) {
ret = -ENOMEM;
goto out_free_pre;
}
lock_extent(&inode->io_tree, start, start + len - 1, NULL);
write_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, len);
if (!em) {
ret = -EIO;
goto out_unlock;
}
ASSERT(em->len == len);
ASSERT(!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags));
ASSERT(em->block_start < EXTENT_MAP_LAST_BYTE);
ASSERT(test_bit(EXTENT_FLAG_PINNED, &em->flags));
ASSERT(!test_bit(EXTENT_FLAG_LOGGING, &em->flags));
ASSERT(!list_empty(&em->list));
flags = em->flags;
clear_bit(EXTENT_FLAG_PINNED, &em->flags);
/* First, replace the em with a new extent_map starting from * em->start */
split_pre->start = em->start;
split_pre->len = pre;
split_pre->orig_start = split_pre->start;
split_pre->block_start = new_logical;
split_pre->block_len = split_pre->len;
split_pre->orig_block_len = split_pre->block_len;
split_pre->ram_bytes = split_pre->len;
split_pre->flags = flags;
split_pre->compress_type = em->compress_type;
split_pre->generation = em->generation;
replace_extent_mapping(em_tree, em, split_pre, 1);
/*
* Now we only have an extent_map at:
* [em->start, em->start + pre]
*/
/* Insert the middle extent_map. */
split_mid->start = em->start + pre;
split_mid->len = em->len - pre;
split_mid->orig_start = split_mid->start;
split_mid->block_start = em->block_start + pre;
split_mid->block_len = split_mid->len;
split_mid->orig_block_len = split_mid->block_len;
split_mid->ram_bytes = split_mid->len;
split_mid->flags = flags;
split_mid->compress_type = em->compress_type;
split_mid->generation = em->generation;
add_extent_mapping(em_tree, split_mid, 1);
/* Once for us */
free_extent_map(em);
/* Once for the tree */
free_extent_map(em);
out_unlock:
write_unlock(&em_tree->lock);
unlock_extent(&inode->io_tree, start, start + len - 1, NULL);
free_extent_map(split_mid);
out_free_pre:
free_extent_map(split_pre);
return ret;
}

View File

@ -90,10 +90,8 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
int add_extent_mapping(struct extent_map_tree *tree,
struct extent_map *em, int modified);
void remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em);
void replace_extent_mapping(struct extent_map_tree *tree,
struct extent_map *cur,
struct extent_map *new,
int modified);
int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre,
u64 new_logical);
struct extent_map *alloc_extent_map(void);
void free_extent_map(struct extent_map *em);

View File

@ -94,8 +94,8 @@ int btrfs_inode_set_file_extent_range(struct btrfs_inode *inode, u64 start,
if (btrfs_fs_incompat(inode->root->fs_info, NO_HOLES))
return 0;
return set_extent_bits(&inode->file_extent_tree, start, start + len - 1,
EXTENT_DIRTY);
return set_extent_bit(&inode->file_extent_tree, start, start + len - 1,
EXTENT_DIRTY, NULL);
}
/*
@ -438,9 +438,9 @@ blk_status_t btrfs_lookup_bio_sums(struct btrfs_bio *bbio)
BTRFS_DATA_RELOC_TREE_OBJECTID) {
u64 file_offset = bbio->file_offset + bio_offset;
set_extent_bits(&inode->io_tree, file_offset,
file_offset + sectorsize - 1,
EXTENT_NODATASUM);
set_extent_bit(&inode->io_tree, file_offset,
file_offset + sectorsize - 1,
EXTENT_NODATASUM, NULL);
} else {
btrfs_warn_rl(fs_info,
"csum hole found for disk bytenr range [%llu, %llu)",
@ -560,8 +560,8 @@ int btrfs_lookup_csums_list(struct btrfs_root *root, u64 start, u64 end,
goto fail;
}
sums->bytenr = start;
sums->len = (int)size;
sums->logical = start;
sums->len = size;
offset = bytes_to_csum_size(fs_info, start - key.offset);
@ -721,20 +721,17 @@ int btrfs_lookup_csums_bitmap(struct btrfs_root *root, u64 start, u64 end,
*/
blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio)
{
struct btrfs_ordered_extent *ordered = bbio->ordered;
struct btrfs_inode *inode = bbio->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
struct bio *bio = &bbio->bio;
u64 offset = bbio->file_offset;
struct btrfs_ordered_sum *sums;
struct btrfs_ordered_extent *ordered = NULL;
char *data;
struct bvec_iter iter;
struct bio_vec bvec;
int index;
unsigned int blockcount;
unsigned long total_bytes = 0;
unsigned long this_sum_bytes = 0;
int i;
unsigned nofs_flag;
@ -749,61 +746,17 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio)
sums->len = bio->bi_iter.bi_size;
INIT_LIST_HEAD(&sums->list);
sums->bytenr = bio->bi_iter.bi_sector << 9;
sums->logical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
index = 0;
shash->tfm = fs_info->csum_shash;
bio_for_each_segment(bvec, bio, iter) {
if (!ordered) {
ordered = btrfs_lookup_ordered_extent(inode, offset);
/*
* The bio range is not covered by any ordered extent,
* must be a code logic error.
*/
if (unlikely(!ordered)) {
WARN(1, KERN_WARNING
"no ordered extent for root %llu ino %llu offset %llu\n",
inode->root->root_key.objectid,
btrfs_ino(inode), offset);
kvfree(sums);
return BLK_STS_IOERR;
}
}
blockcount = BTRFS_BYTES_TO_BLKS(fs_info,
bvec.bv_len + fs_info->sectorsize
- 1);
for (i = 0; i < blockcount; i++) {
if (!(bio->bi_opf & REQ_BTRFS_ONE_ORDERED) &&
!in_range(offset, ordered->file_offset,
ordered->num_bytes)) {
unsigned long bytes_left;
sums->len = this_sum_bytes;
this_sum_bytes = 0;
btrfs_add_ordered_sum(ordered, sums);
btrfs_put_ordered_extent(ordered);
bytes_left = bio->bi_iter.bi_size - total_bytes;
nofs_flag = memalloc_nofs_save();
sums = kvzalloc(btrfs_ordered_sum_size(fs_info,
bytes_left), GFP_KERNEL);
memalloc_nofs_restore(nofs_flag);
if (!sums)
return BLK_STS_RESOURCE;
sums->len = bytes_left;
ordered = btrfs_lookup_ordered_extent(inode,
offset);
ASSERT(ordered); /* Logic error */
sums->bytenr = (bio->bi_iter.bi_sector << 9)
+ total_bytes;
index = 0;
}
data = bvec_kmap_local(&bvec);
crypto_shash_digest(shash,
data + (i * fs_info->sectorsize),
@ -811,15 +764,28 @@ blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio)
sums->sums + index);
kunmap_local(data);
index += fs_info->csum_size;
offset += fs_info->sectorsize;
this_sum_bytes += fs_info->sectorsize;
total_bytes += fs_info->sectorsize;
}
}
this_sum_bytes = 0;
bbio->sums = sums;
btrfs_add_ordered_sum(ordered, sums);
btrfs_put_ordered_extent(ordered);
return 0;
}
/*
* Nodatasum I/O on zoned file systems still requires an btrfs_ordered_sum to
* record the updated logical address on Zone Append completion.
* Allocate just the structure with an empty sums array here for that case.
*/
blk_status_t btrfs_alloc_dummy_sum(struct btrfs_bio *bbio)
{
bbio->sums = kmalloc(sizeof(*bbio->sums), GFP_NOFS);
if (!bbio->sums)
return BLK_STS_RESOURCE;
bbio->sums->len = bbio->bio.bi_iter.bi_size;
bbio->sums->logical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT;
btrfs_add_ordered_sum(bbio->ordered, bbio->sums);
return 0;
}
@ -1086,7 +1052,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
again:
next_offset = (u64)-1;
found_next = 0;
bytenr = sums->bytenr + total_bytes;
bytenr = sums->logical + total_bytes;
file_key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
file_key.offset = bytenr;
file_key.type = BTRFS_EXTENT_CSUM_KEY;

View File

@ -50,6 +50,7 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_ordered_sum *sums);
blk_status_t btrfs_csum_one_bio(struct btrfs_bio *bbio);
blk_status_t btrfs_alloc_dummy_sum(struct btrfs_bio *bbio);
int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
struct list_head *list, int search_commit,
bool nowait);

View File

@ -1651,7 +1651,6 @@ ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
struct file *file = iocb->ki_filp;
struct btrfs_inode *inode = BTRFS_I(file_inode(file));
ssize_t num_written, num_sync;
const bool sync = iocb_is_dsync(iocb);
/*
* If the fs flips readonly due to some impossible error, although we
@ -1664,9 +1663,6 @@ ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
if (encoded && (iocb->ki_flags & IOCB_NOWAIT))
return -EOPNOTSUPP;
if (sync)
atomic_inc(&inode->sync_writers);
if (encoded) {
num_written = btrfs_encoded_write(iocb, from, encoded);
num_sync = encoded->len;
@ -1686,9 +1682,6 @@ ssize_t btrfs_do_write_iter(struct kiocb *iocb, struct iov_iter *from,
num_written = num_sync;
}
if (sync)
atomic_dec(&inode->sync_writers);
current->backing_dev_info = NULL;
return num_written;
}
@ -1733,9 +1726,7 @@ static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end)
* several segments of stripe length (currently 64K).
*/
blk_start_plug(&plug);
atomic_inc(&BTRFS_I(inode)->sync_writers);
ret = btrfs_fdatawrite_range(inode, start, end);
atomic_dec(&BTRFS_I(inode)->sync_writers);
blk_finish_plug(&plug);
return ret;
@ -3709,7 +3700,8 @@ static int btrfs_file_open(struct inode *inode, struct file *filp)
{
int ret;
filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | FMODE_BUF_WASYNC;
filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | FMODE_BUF_WASYNC |
FMODE_CAN_ODIRECT;
ret = fsverity_file_open(inode, filp);
if (ret)

View File

@ -292,25 +292,6 @@ int btrfs_remove_free_space_inode(struct btrfs_trans_handle *trans,
return ret;
}
int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv)
{
u64 needed_bytes;
int ret;
/* 1 for slack space, 1 for updating the inode */
needed_bytes = btrfs_calc_insert_metadata_size(fs_info, 1) +
btrfs_calc_metadata_size(fs_info, 1);
spin_lock(&rsv->lock);
if (rsv->reserved < needed_bytes)
ret = -ENOSPC;
else
ret = 0;
spin_unlock(&rsv->lock);
return ret;
}
int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group,
struct inode *vfs_inode)
@ -923,27 +904,31 @@ static int copy_free_space_cache(struct btrfs_block_group *block_group,
while (!ret && (n = rb_first(&ctl->free_space_offset)) != NULL) {
info = rb_entry(n, struct btrfs_free_space, offset_index);
if (!info->bitmap) {
const u64 offset = info->offset;
const u64 bytes = info->bytes;
unlink_free_space(ctl, info, true);
ret = btrfs_add_free_space(block_group, info->offset,
info->bytes);
spin_unlock(&ctl->tree_lock);
kmem_cache_free(btrfs_free_space_cachep, info);
ret = btrfs_add_free_space(block_group, offset, bytes);
spin_lock(&ctl->tree_lock);
} else {
u64 offset = info->offset;
u64 bytes = ctl->unit;
while (search_bitmap(ctl, info, &offset, &bytes,
false) == 0) {
ret = search_bitmap(ctl, info, &offset, &bytes, false);
if (ret == 0) {
bitmap_clear_bits(ctl, info, offset, bytes, true);
spin_unlock(&ctl->tree_lock);
ret = btrfs_add_free_space(block_group, offset,
bytes);
if (ret)
break;
bitmap_clear_bits(ctl, info, offset, bytes, true);
offset = info->offset;
bytes = ctl->unit;
spin_lock(&ctl->tree_lock);
} else {
free_bitmap(ctl, info);
ret = 0;
}
free_bitmap(ctl, info);
}
cond_resched();
cond_resched_lock(&ctl->tree_lock);
}
return ret;
}
@ -1037,7 +1022,9 @@ int load_free_space_cache(struct btrfs_block_group *block_group)
block_group->bytes_super));
if (matched) {
spin_lock(&tmp_ctl.tree_lock);
ret = copy_free_space_cache(block_group, &tmp_ctl);
spin_unlock(&tmp_ctl.tree_lock);
/*
* ret == 1 means we successfully loaded the free space cache,
* so we need to re-set it here.
@ -1596,20 +1583,34 @@ static inline u64 offset_to_bitmap(struct btrfs_free_space_ctl *ctl,
return bitmap_start;
}
static int tree_insert_offset(struct rb_root *root, u64 offset,
struct rb_node *node, int bitmap)
static int tree_insert_offset(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_cluster *cluster,
struct btrfs_free_space *new_entry)
{
struct rb_node **p = &root->rb_node;
struct rb_root *root;
struct rb_node **p;
struct rb_node *parent = NULL;
struct btrfs_free_space *info;
lockdep_assert_held(&ctl->tree_lock);
if (cluster) {
lockdep_assert_held(&cluster->lock);
root = &cluster->root;
} else {
root = &ctl->free_space_offset;
}
p = &root->rb_node;
while (*p) {
struct btrfs_free_space *info;
parent = *p;
info = rb_entry(parent, struct btrfs_free_space, offset_index);
if (offset < info->offset) {
if (new_entry->offset < info->offset) {
p = &(*p)->rb_left;
} else if (offset > info->offset) {
} else if (new_entry->offset > info->offset) {
p = &(*p)->rb_right;
} else {
/*
@ -1625,7 +1626,7 @@ static int tree_insert_offset(struct rb_root *root, u64 offset,
* found a bitmap, we want to go left, or before
* logically.
*/
if (bitmap) {
if (new_entry->bitmap) {
if (info->bitmap) {
WARN_ON_ONCE(1);
return -EEXIST;
@ -1641,8 +1642,8 @@ static int tree_insert_offset(struct rb_root *root, u64 offset,
}
}
rb_link_node(node, parent, p);
rb_insert_color(node, root);
rb_link_node(&new_entry->offset_index, parent, p);
rb_insert_color(&new_entry->offset_index, root);
return 0;
}
@ -1705,6 +1706,8 @@ tree_search_offset(struct btrfs_free_space_ctl *ctl,
struct rb_node *n = ctl->free_space_offset.rb_node;
struct btrfs_free_space *entry = NULL, *prev = NULL;
lockdep_assert_held(&ctl->tree_lock);
/* find entry that is closest to the 'offset' */
while (n) {
entry = rb_entry(n, struct btrfs_free_space, offset_index);
@ -1814,6 +1817,8 @@ static inline void unlink_free_space(struct btrfs_free_space_ctl *ctl,
struct btrfs_free_space *info,
bool update_stat)
{
lockdep_assert_held(&ctl->tree_lock);
rb_erase(&info->offset_index, &ctl->free_space_offset);
rb_erase_cached(&info->bytes_index, &ctl->free_space_bytes);
ctl->free_extents--;
@ -1832,9 +1837,10 @@ static int link_free_space(struct btrfs_free_space_ctl *ctl,
{
int ret = 0;
lockdep_assert_held(&ctl->tree_lock);
ASSERT(info->bytes || info->bitmap);
ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
&info->offset_index, (info->bitmap != NULL));
ret = tree_insert_offset(ctl, NULL, info);
if (ret)
return ret;
@ -1862,6 +1868,8 @@ static void relink_bitmap_entry(struct btrfs_free_space_ctl *ctl,
if (RB_EMPTY_NODE(&info->bytes_index))
return;
lockdep_assert_held(&ctl->tree_lock);
rb_erase_cached(&info->bytes_index, &ctl->free_space_bytes);
rb_add_cached(&info->bytes_index, &ctl->free_space_bytes, entry_less);
}
@ -2447,6 +2455,7 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
u64 offset = info->offset;
u64 bytes = info->bytes;
const bool is_trimmed = btrfs_free_space_trimmed(info);
struct rb_node *right_prev = NULL;
/*
* first we want to see if there is free space adjacent to the range we
@ -2454,9 +2463,11 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl,
* cover the entire range
*/
right_info = tree_search_offset(ctl, offset + bytes, 0, 0);
if (right_info && rb_prev(&right_info->offset_index))
left_info = rb_entry(rb_prev(&right_info->offset_index),
struct btrfs_free_space, offset_index);
if (right_info)
right_prev = rb_prev(&right_info->offset_index);
if (right_prev)
left_info = rb_entry(right_prev, struct btrfs_free_space, offset_index);
else if (!right_info)
left_info = tree_search_offset(ctl, offset - 1, 0, 0);
@ -2969,9 +2980,10 @@ static void __btrfs_return_cluster_to_free_space(
struct btrfs_free_cluster *cluster)
{
struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
struct btrfs_free_space *entry;
struct rb_node *node;
lockdep_assert_held(&ctl->tree_lock);
spin_lock(&cluster->lock);
if (cluster->block_group != block_group) {
spin_unlock(&cluster->lock);
@ -2984,15 +2996,14 @@ static void __btrfs_return_cluster_to_free_space(
node = rb_first(&cluster->root);
while (node) {
bool bitmap;
struct btrfs_free_space *entry;
entry = rb_entry(node, struct btrfs_free_space, offset_index);
node = rb_next(&entry->offset_index);
rb_erase(&entry->offset_index, &cluster->root);
RB_CLEAR_NODE(&entry->offset_index);
bitmap = (entry->bitmap != NULL);
if (!bitmap) {
if (!entry->bitmap) {
/* Merging treats extents as if they were new */
if (!btrfs_free_space_trimmed(entry)) {
ctl->discardable_extents[BTRFS_STAT_CURR]--;
@ -3010,8 +3021,7 @@ static void __btrfs_return_cluster_to_free_space(
entry->bytes;
}
}
tree_insert_offset(&ctl->free_space_offset,
entry->offset, &entry->offset_index, bitmap);
tree_insert_offset(ctl, NULL, entry);
rb_add_cached(&entry->bytes_index, &ctl->free_space_bytes,
entry_less);
}
@ -3324,6 +3334,8 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group *block_group,
unsigned long total_found = 0;
int ret;
lockdep_assert_held(&ctl->tree_lock);
i = offset_to_bit(entry->offset, ctl->unit,
max_t(u64, offset, entry->offset));
want_bits = bytes_to_bits(bytes, ctl->unit);
@ -3385,8 +3397,7 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group *block_group,
*/
RB_CLEAR_NODE(&entry->bytes_index);
ret = tree_insert_offset(&cluster->root, entry->offset,
&entry->offset_index, 1);
ret = tree_insert_offset(ctl, cluster, entry);
ASSERT(!ret); /* -EEXIST; Logic error */
trace_btrfs_setup_cluster(block_group, cluster,
@ -3414,6 +3425,8 @@ setup_cluster_no_bitmap(struct btrfs_block_group *block_group,
u64 max_extent;
u64 total_size = 0;
lockdep_assert_held(&ctl->tree_lock);
entry = tree_search_offset(ctl, offset, 0, 1);
if (!entry)
return -ENOSPC;
@ -3476,8 +3489,7 @@ setup_cluster_no_bitmap(struct btrfs_block_group *block_group,
rb_erase(&entry->offset_index, &ctl->free_space_offset);
rb_erase_cached(&entry->bytes_index, &ctl->free_space_bytes);
ret = tree_insert_offset(&cluster->root, entry->offset,
&entry->offset_index, 0);
ret = tree_insert_offset(ctl, cluster, entry);
total_size += entry->bytes;
ASSERT(!ret); /* -EEXIST; Logic error */
} while (node && entry != last);
@ -3671,7 +3683,7 @@ static int do_trimming(struct btrfs_block_group *block_group,
__btrfs_add_free_space(block_group, reserved_start,
start - reserved_start,
reserved_trim_state);
if (start + bytes < reserved_start + reserved_bytes)
if (end < reserved_end)
__btrfs_add_free_space(block_group, end, reserved_end - end,
reserved_trim_state);
__btrfs_add_free_space(block_group, start, bytes, trim_state);

View File

@ -101,8 +101,6 @@ int btrfs_remove_free_space_inode(struct btrfs_trans_handle *trans,
struct inode *inode,
struct btrfs_block_group *block_group);
int btrfs_check_trunc_cache_free_space(struct btrfs_fs_info *fs_info,
struct btrfs_block_rsv *rsv);
int btrfs_truncate_free_space_cache(struct btrfs_trans_handle *trans,
struct btrfs_block_group *block_group,
struct inode *inode);

View File

@ -1280,7 +1280,10 @@ int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info)
goto abort;
btrfs_global_root_delete(free_space_root);
spin_lock(&fs_info->trans_lock);
list_del(&free_space_root->dirty_list);
spin_unlock(&fs_info->trans_lock);
btrfs_tree_lock(free_space_root->node);
btrfs_clear_buffer_dirty(trans, free_space_root->node);

View File

@ -543,7 +543,6 @@ struct btrfs_fs_info {
* A third pool does submit_bio to avoid deadlocking with the other two.
*/
struct btrfs_workqueue *workers;
struct btrfs_workqueue *hipri_workers;
struct btrfs_workqueue *delalloc_workers;
struct btrfs_workqueue *flush_workers;
struct workqueue_struct *endio_workers;
@ -577,6 +576,7 @@ struct btrfs_fs_info {
s32 dirty_metadata_batch;
s32 delalloc_batch;
/* Protected by 'trans_lock'. */
struct list_head dirty_cowonly_roots;
struct btrfs_fs_devices *fs_devices;
@ -643,7 +643,6 @@ struct btrfs_fs_info {
*/
refcount_t scrub_workers_refcnt;
struct workqueue_struct *scrub_workers;
struct workqueue_struct *scrub_wr_completion_workers;
struct btrfs_subpage_info *subpage_info;
struct btrfs_discard_ctl discard_ctl;
@ -854,7 +853,7 @@ static inline u64 btrfs_calc_metadata_size(const struct btrfs_fs_info *fs_info,
static inline bool btrfs_is_zoned(const struct btrfs_fs_info *fs_info)
{
return fs_info->zone_size > 0;
return IS_ENABLED(CONFIG_BLK_DEV_ZONED) && fs_info->zone_size > 0;
}
/*

View File

@ -60,6 +60,22 @@ struct btrfs_truncate_control {
bool clear_extent_range;
};
/*
* btrfs_inode_item stores flags in a u64, btrfs_inode stores them in two
* separate u32s. These two functions convert between the two representations.
*/
static inline u64 btrfs_inode_combine_flags(u32 flags, u32 ro_flags)
{
return (flags | ((u64)ro_flags << 32));
}
static inline void btrfs_inode_split_flags(u64 inode_item_flags,
u32 *flags, u32 *ro_flags)
{
*flags = (u32)inode_item_flags;
*ro_flags = (u32)(inode_item_flags >> 32);
}
int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_truncate_control *control);

View File

@ -70,6 +70,7 @@
#include "verity.h"
#include "super.h"
#include "orphan.h"
#include "backref.h"
struct btrfs_iget_args {
u64 ino;
@ -100,6 +101,18 @@ struct btrfs_rename_ctx {
u64 index;
};
/*
* Used by data_reloc_print_warning_inode() to pass needed info for filename
* resolution and output of error message.
*/
struct data_reloc_warn {
struct btrfs_path path;
struct btrfs_fs_info *fs_info;
u64 extent_item_size;
u64 logical;
int mirror_num;
};
static const struct inode_operations btrfs_dir_inode_operations;
static const struct inode_operations btrfs_symlink_inode_operations;
static const struct inode_operations btrfs_special_inode_operations;
@ -122,12 +135,198 @@ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
u64 ram_bytes, int compress_type,
int type);
static int data_reloc_print_warning_inode(u64 inum, u64 offset, u64 num_bytes,
u64 root, void *warn_ctx)
{
struct data_reloc_warn *warn = warn_ctx;
struct btrfs_fs_info *fs_info = warn->fs_info;
struct extent_buffer *eb;
struct btrfs_inode_item *inode_item;
struct inode_fs_paths *ipath = NULL;
struct btrfs_root *local_root;
struct btrfs_key key;
unsigned int nofs_flag;
u32 nlink;
int ret;
local_root = btrfs_get_fs_root(fs_info, root, true);
if (IS_ERR(local_root)) {
ret = PTR_ERR(local_root);
goto err;
}
/* This makes the path point to (inum INODE_ITEM ioff). */
key.objectid = inum;
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, local_root, &key, &warn->path, 0, 0);
if (ret) {
btrfs_put_root(local_root);
btrfs_release_path(&warn->path);
goto err;
}
eb = warn->path.nodes[0];
inode_item = btrfs_item_ptr(eb, warn->path.slots[0], struct btrfs_inode_item);
nlink = btrfs_inode_nlink(eb, inode_item);
btrfs_release_path(&warn->path);
nofs_flag = memalloc_nofs_save();
ipath = init_ipath(4096, local_root, &warn->path);
memalloc_nofs_restore(nofs_flag);
if (IS_ERR(ipath)) {
btrfs_put_root(local_root);
ret = PTR_ERR(ipath);
ipath = NULL;
/*
* -ENOMEM, not a critical error, just output an generic error
* without filename.
*/
btrfs_warn(fs_info,
"checksum error at logical %llu mirror %u root %llu, inode %llu offset %llu",
warn->logical, warn->mirror_num, root, inum, offset);
return ret;
}
ret = paths_from_inode(inum, ipath);
if (ret < 0)
goto err;
/*
* We deliberately ignore the bit ipath might have been too small to
* hold all of the paths here
*/
for (int i = 0; i < ipath->fspath->elem_cnt; i++) {
btrfs_warn(fs_info,
"checksum error at logical %llu mirror %u root %llu inode %llu offset %llu length %u links %u (path: %s)",
warn->logical, warn->mirror_num, root, inum, offset,
fs_info->sectorsize, nlink,
(char *)(unsigned long)ipath->fspath->val[i]);
}
btrfs_put_root(local_root);
free_ipath(ipath);
return 0;
err:
btrfs_warn(fs_info,
"checksum error at logical %llu mirror %u root %llu inode %llu offset %llu, path resolving failed with ret=%d",
warn->logical, warn->mirror_num, root, inum, offset, ret);
free_ipath(ipath);
return ret;
}
/*
* Do extra user-friendly error output (e.g. lookup all the affected files).
*
* Return true if we succeeded doing the backref lookup.
* Return false if such lookup failed, and has to fallback to the old error message.
*/
static void print_data_reloc_error(const struct btrfs_inode *inode, u64 file_off,
const u8 *csum, const u8 *csum_expected,
int mirror_num)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_path path = { 0 };
struct btrfs_key found_key = { 0 };
struct extent_buffer *eb;
struct btrfs_extent_item *ei;
const u32 csum_size = fs_info->csum_size;
u64 logical;
u64 flags;
u32 item_size;
int ret;
mutex_lock(&fs_info->reloc_mutex);
logical = btrfs_get_reloc_bg_bytenr(fs_info);
mutex_unlock(&fs_info->reloc_mutex);
if (logical == U64_MAX) {
btrfs_warn_rl(fs_info, "has data reloc tree but no running relocation");
btrfs_warn_rl(fs_info,
"csum failed root %lld ino %llu off %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d",
inode->root->root_key.objectid, btrfs_ino(inode), file_off,
CSUM_FMT_VALUE(csum_size, csum),
CSUM_FMT_VALUE(csum_size, csum_expected),
mirror_num);
return;
}
logical += file_off;
btrfs_warn_rl(fs_info,
"csum failed root %lld ino %llu off %llu logical %llu csum " CSUM_FMT " expected csum " CSUM_FMT " mirror %d",
inode->root->root_key.objectid,
btrfs_ino(inode), file_off, logical,
CSUM_FMT_VALUE(csum_size, csum),
CSUM_FMT_VALUE(csum_size, csum_expected),
mirror_num);
ret = extent_from_logical(fs_info, logical, &path, &found_key, &flags);
if (ret < 0) {
btrfs_err_rl(fs_info, "failed to lookup extent item for logical %llu: %d",
logical, ret);
return;
}
eb = path.nodes[0];
ei = btrfs_item_ptr(eb, path.slots[0], struct btrfs_extent_item);
item_size = btrfs_item_size(eb, path.slots[0]);
if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
unsigned long ptr = 0;
u64 ref_root;
u8 ref_level;
while (true) {
ret = tree_backref_for_extent(&ptr, eb, &found_key, ei,
item_size, &ref_root,
&ref_level);
if (ret < 0) {
btrfs_warn_rl(fs_info,
"failed to resolve tree backref for logical %llu: %d",
logical, ret);
break;
}
if (ret > 0)
break;
btrfs_warn_rl(fs_info,
"csum error at logical %llu mirror %u: metadata %s (level %d) in tree %llu",
logical, mirror_num,
(ref_level ? "node" : "leaf"),
ref_level, ref_root);
}
btrfs_release_path(&path);
} else {
struct btrfs_backref_walk_ctx ctx = { 0 };
struct data_reloc_warn reloc_warn = { 0 };
btrfs_release_path(&path);
ctx.bytenr = found_key.objectid;
ctx.extent_item_pos = logical - found_key.objectid;
ctx.fs_info = fs_info;
reloc_warn.logical = logical;
reloc_warn.extent_item_size = found_key.offset;
reloc_warn.mirror_num = mirror_num;
reloc_warn.fs_info = fs_info;
iterate_extent_inodes(&ctx, true,
data_reloc_print_warning_inode, &reloc_warn);
}
}
static void __cold btrfs_print_data_csum_error(struct btrfs_inode *inode,
u64 logical_start, u8 *csum, u8 *csum_expected, int mirror_num)
{
struct btrfs_root *root = inode->root;
const u32 csum_size = root->fs_info->csum_size;
/* For data reloc tree, it's better to do a backref lookup instead. */
if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID)
return print_data_reloc_error(inode, logical_start, csum,
csum_expected, mirror_num);
/* Output without objectid, which is more meaningful */
if (root->root_key.objectid >= BTRFS_LAST_FREE_OBJECTID) {
btrfs_warn_rl(root->fs_info,
@ -636,6 +835,7 @@ static noinline int compress_file_range(struct async_chunk *async_chunk)
{
struct btrfs_inode *inode = async_chunk->inode;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct address_space *mapping = inode->vfs_inode.i_mapping;
u64 blocksize = fs_info->sectorsize;
u64 start = async_chunk->start;
u64 end = async_chunk->end;
@ -750,7 +950,7 @@ static noinline int compress_file_range(struct async_chunk *async_chunk)
/* Compression level is applied here and only here */
ret = btrfs_compress_pages(
compress_type | (fs_info->compress_level << 4),
inode->vfs_inode.i_mapping, start,
mapping, start,
pages,
&nr_pages,
&total_in,
@ -793,9 +993,9 @@ static noinline int compress_file_range(struct async_chunk *async_chunk)
unsigned long clear_flags = EXTENT_DELALLOC |
EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
EXTENT_DO_ACCOUNTING;
unsigned long page_error_op;
page_error_op = ret < 0 ? PAGE_SET_ERROR : 0;
if (ret < 0)
mapping_set_error(mapping, -EIO);
/*
* inline extent creation worked or returned error,
@ -812,7 +1012,6 @@ static noinline int compress_file_range(struct async_chunk *async_chunk)
clear_flags,
PAGE_UNLOCK |
PAGE_START_WRITEBACK |
page_error_op |
PAGE_END_WRITEBACK);
/*
@ -934,6 +1133,12 @@ static int submit_uncompressed_range(struct btrfs_inode *inode,
unsigned long nr_written = 0;
int page_started = 0;
int ret;
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
.range_start = start,
.range_end = end,
.no_cgroup_owner = 1,
};
/*
* Call cow_file_range() to run the delalloc range directly, since we
@ -954,8 +1159,6 @@ static int submit_uncompressed_range(struct btrfs_inode *inode,
const u64 page_start = page_offset(locked_page);
const u64 page_end = page_start + PAGE_SIZE - 1;
btrfs_page_set_error(inode->root->fs_info, locked_page,
page_start, PAGE_SIZE);
set_page_writeback(locked_page);
end_page_writeback(locked_page);
end_extent_writepage(locked_page, ret, page_start, page_end);
@ -965,7 +1168,10 @@ static int submit_uncompressed_range(struct btrfs_inode *inode,
}
/* All pages will be unlocked, including @locked_page */
return extent_write_locked_range(&inode->vfs_inode, start, end);
wbc_attach_fdatawrite_inode(&wbc, &inode->vfs_inode);
ret = extent_write_locked_range(&inode->vfs_inode, start, end, &wbc);
wbc_detach_inode(&wbc);
return ret;
}
static int submit_one_async_extent(struct btrfs_inode *inode,
@ -976,6 +1182,7 @@ static int submit_one_async_extent(struct btrfs_inode *inode,
struct extent_io_tree *io_tree = &inode->io_tree;
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_ordered_extent *ordered;
struct btrfs_key ins;
struct page *locked_page = NULL;
struct extent_map *em;
@ -1037,7 +1244,7 @@ static int submit_one_async_extent(struct btrfs_inode *inode,
}
free_extent_map(em);
ret = btrfs_add_ordered_extent(inode, start, /* file_offset */
ordered = btrfs_alloc_ordered_extent(inode, start, /* file_offset */
async_extent->ram_size, /* num_bytes */
async_extent->ram_size, /* ram_bytes */
ins.objectid, /* disk_bytenr */
@ -1045,8 +1252,9 @@ static int submit_one_async_extent(struct btrfs_inode *inode,
0, /* offset */
1 << BTRFS_ORDERED_COMPRESSED,
async_extent->compress_type);
if (ret) {
if (IS_ERR(ordered)) {
btrfs_drop_extent_map_range(inode, start, end, false);
ret = PTR_ERR(ordered);
goto out_free_reserve;
}
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
@ -1055,11 +1263,7 @@ static int submit_one_async_extent(struct btrfs_inode *inode,
extent_clear_unlock_delalloc(inode, start, end,
NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
PAGE_UNLOCK | PAGE_START_WRITEBACK);
btrfs_submit_compressed_write(inode, start, /* file_offset */
async_extent->ram_size, /* num_bytes */
ins.objectid, /* disk_bytenr */
ins.offset, /* compressed_len */
btrfs_submit_compressed_write(ordered,
async_extent->pages, /* compressed_pages */
async_extent->nr_pages,
async_chunk->write_flags, true);
@ -1074,12 +1278,13 @@ static int submit_one_async_extent(struct btrfs_inode *inode,
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
out_free:
mapping_set_error(inode->vfs_inode.i_mapping, -EIO);
extent_clear_unlock_delalloc(inode, start, end,
NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
EXTENT_DELALLOC_NEW |
EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
PAGE_UNLOCK | PAGE_START_WRITEBACK |
PAGE_END_WRITEBACK | PAGE_SET_ERROR);
PAGE_END_WRITEBACK);
free_async_extent_pages(async_extent);
goto done;
}
@ -1287,6 +1492,8 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
min_alloc_size = fs_info->sectorsize;
while (num_bytes > 0) {
struct btrfs_ordered_extent *ordered;
cur_alloc_size = num_bytes;
ret = btrfs_reserve_extent(root, cur_alloc_size, cur_alloc_size,
min_alloc_size, 0, alloc_hint,
@ -1311,16 +1518,18 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
}
free_extent_map(em);
ret = btrfs_add_ordered_extent(inode, start, ram_size, ram_size,
ins.objectid, cur_alloc_size, 0,
1 << BTRFS_ORDERED_REGULAR,
BTRFS_COMPRESS_NONE);
if (ret)
ordered = btrfs_alloc_ordered_extent(inode, start, ram_size,
ram_size, ins.objectid, cur_alloc_size,
0, 1 << BTRFS_ORDERED_REGULAR,
BTRFS_COMPRESS_NONE);
if (IS_ERR(ordered)) {
ret = PTR_ERR(ordered);
goto out_drop_extent_cache;
}
if (btrfs_is_data_reloc_root(root)) {
ret = btrfs_reloc_clone_csums(inode, start,
cur_alloc_size);
ret = btrfs_reloc_clone_csums(ordered);
/*
* Only drop cache here, and process as normal.
*
@ -1337,6 +1546,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
start + ram_size - 1,
false);
}
btrfs_put_ordered_extent(ordered);
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
@ -1494,7 +1704,7 @@ static noinline void async_cow_submit(struct btrfs_work *work)
* ->inode could be NULL if async_chunk_start has failed to compress,
* in which case we don't have anything to submit, yet we need to
* always adjust ->async_delalloc_pages as its paired with the init
* happening in cow_file_range_async
* happening in run_delalloc_compressed
*/
if (async_chunk->inode)
submit_compressed_extents(async_chunk);
@ -1521,58 +1731,36 @@ static noinline void async_cow_free(struct btrfs_work *work)
kvfree(async_cow);
}
static int cow_file_range_async(struct btrfs_inode *inode,
struct writeback_control *wbc,
struct page *locked_page,
u64 start, u64 end, int *page_started,
unsigned long *nr_written)
static bool run_delalloc_compressed(struct btrfs_inode *inode,
struct writeback_control *wbc,
struct page *locked_page,
u64 start, u64 end, int *page_started,
unsigned long *nr_written)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct cgroup_subsys_state *blkcg_css = wbc_blkcg_css(wbc);
struct async_cow *ctx;
struct async_chunk *async_chunk;
unsigned long nr_pages;
u64 cur_end;
u64 num_chunks = DIV_ROUND_UP(end - start, SZ_512K);
int i;
bool should_compress;
unsigned nofs_flag;
const blk_opf_t write_flags = wbc_to_write_flags(wbc);
unlock_extent(&inode->io_tree, start, end, NULL);
if (inode->flags & BTRFS_INODE_NOCOMPRESS &&
!btrfs_test_opt(fs_info, FORCE_COMPRESS)) {
num_chunks = 1;
should_compress = false;
} else {
should_compress = true;
}
nofs_flag = memalloc_nofs_save();
ctx = kvmalloc(struct_size(ctx, chunks, num_chunks), GFP_KERNEL);
memalloc_nofs_restore(nofs_flag);
if (!ctx)
return false;
if (!ctx) {
unsigned clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC |
EXTENT_DELALLOC_NEW | EXTENT_DEFRAG |
EXTENT_DO_ACCOUNTING;
unsigned long page_ops = PAGE_UNLOCK | PAGE_START_WRITEBACK |
PAGE_END_WRITEBACK | PAGE_SET_ERROR;
extent_clear_unlock_delalloc(inode, start, end, locked_page,
clear_bits, page_ops);
return -ENOMEM;
}
unlock_extent(&inode->io_tree, start, end, NULL);
set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags);
async_chunk = ctx->chunks;
atomic_set(&ctx->num_chunks, num_chunks);
for (i = 0; i < num_chunks; i++) {
if (should_compress)
cur_end = min(end, start + SZ_512K - 1);
else
cur_end = end;
u64 cur_end = min(end, start + SZ_512K - 1);
/*
* igrab is called higher up in the call chain, take only the
@ -1633,13 +1821,14 @@ static int cow_file_range_async(struct btrfs_inode *inode,
start = cur_end + 1;
}
*page_started = 1;
return 0;
return true;
}
static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
struct page *locked_page, u64 start,
u64 end, int *page_started,
unsigned long *nr_written)
unsigned long *nr_written,
struct writeback_control *wbc)
{
u64 done_offset = end;
int ret;
@ -1671,8 +1860,8 @@ static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
account_page_redirty(locked_page);
}
locked_page_done = true;
extent_write_locked_range(&inode->vfs_inode, start, done_offset);
extent_write_locked_range(&inode->vfs_inode, start, done_offset,
wbc);
start = done_offset + 1;
}
@ -1947,6 +2136,7 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
nocow_args.writeback_path = true;
while (1) {
struct btrfs_ordered_extent *ordered;
struct btrfs_key found_key;
struct btrfs_file_extent_item *fi;
struct extent_buffer *leaf;
@ -1954,6 +2144,7 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
u64 ram_bytes;
u64 nocow_end;
int extent_type;
bool is_prealloc;
nocow = false;
@ -2092,8 +2283,8 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
}
nocow_end = cur_offset + nocow_args.num_bytes - 1;
if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
is_prealloc = extent_type == BTRFS_FILE_EXTENT_PREALLOC;
if (is_prealloc) {
u64 orig_start = found_key.offset - nocow_args.extent_offset;
struct extent_map *em;
@ -2109,29 +2300,22 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
goto error;
}
free_extent_map(em);
ret = btrfs_add_ordered_extent(inode,
cur_offset, nocow_args.num_bytes,
nocow_args.num_bytes,
nocow_args.disk_bytenr,
nocow_args.num_bytes, 0,
1 << BTRFS_ORDERED_PREALLOC,
BTRFS_COMPRESS_NONE);
if (ret) {
}
ordered = btrfs_alloc_ordered_extent(inode, cur_offset,
nocow_args.num_bytes, nocow_args.num_bytes,
nocow_args.disk_bytenr, nocow_args.num_bytes, 0,
is_prealloc
? (1 << BTRFS_ORDERED_PREALLOC)
: (1 << BTRFS_ORDERED_NOCOW),
BTRFS_COMPRESS_NONE);
if (IS_ERR(ordered)) {
if (is_prealloc) {
btrfs_drop_extent_map_range(inode, cur_offset,
nocow_end, false);
goto error;
}
} else {
ret = btrfs_add_ordered_extent(inode, cur_offset,
nocow_args.num_bytes,
nocow_args.num_bytes,
nocow_args.disk_bytenr,
nocow_args.num_bytes,
0,
1 << BTRFS_ORDERED_NOCOW,
BTRFS_COMPRESS_NONE);
if (ret)
goto error;
ret = PTR_ERR(ordered);
goto error;
}
if (nocow) {
@ -2145,8 +2329,8 @@ static noinline int run_delalloc_nocow(struct btrfs_inode *inode,
* extent_clear_unlock_delalloc() in error handler
* from freeing metadata of created ordered extent.
*/
ret = btrfs_reloc_clone_csums(inode, cur_offset,
nocow_args.num_bytes);
ret = btrfs_reloc_clone_csums(ordered);
btrfs_put_ordered_extent(ordered);
extent_clear_unlock_delalloc(inode, cur_offset, nocow_end,
locked_page, EXTENT_LOCKED |
@ -2214,7 +2398,7 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
u64 start, u64 end, int *page_started, unsigned long *nr_written,
struct writeback_control *wbc)
{
int ret;
int ret = 0;
const bool zoned = btrfs_is_zoned(inode->root->fs_info);
/*
@ -2235,19 +2419,23 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
ASSERT(!zoned || btrfs_is_data_reloc_root(inode->root));
ret = run_delalloc_nocow(inode, locked_page, start, end,
page_started, nr_written);
} else if (!btrfs_inode_can_compress(inode) ||
!inode_need_compress(inode, start, end)) {
if (zoned)
ret = run_delalloc_zoned(inode, locked_page, start, end,
page_started, nr_written);
else
ret = cow_file_range(inode, locked_page, start, end,
page_started, nr_written, 1, NULL);
} else {
set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags);
ret = cow_file_range_async(inode, wbc, locked_page, start, end,
page_started, nr_written);
goto out;
}
if (btrfs_inode_can_compress(inode) &&
inode_need_compress(inode, start, end) &&
run_delalloc_compressed(inode, wbc, locked_page, start,
end, page_started, nr_written))
goto out;
if (zoned)
ret = run_delalloc_zoned(inode, locked_page, start, end,
page_started, nr_written, wbc);
else
ret = cow_file_range(inode, locked_page, start, end,
page_started, nr_written, 1, NULL);
out:
ASSERT(ret <= 0);
if (ret)
btrfs_cleanup_ordered_extents(inode, locked_page, start,
@ -2515,125 +2703,42 @@ void btrfs_clear_delalloc_extent(struct btrfs_inode *inode,
}
}
/*
* Split off the first pre bytes from the extent_map at [start, start + len]
*
* This function is intended to be used only for extract_ordered_extent().
*/
static int split_extent_map(struct btrfs_inode *inode, u64 start, u64 len, u64 pre)
{
struct extent_map_tree *em_tree = &inode->extent_tree;
struct extent_map *em;
struct extent_map *split_pre = NULL;
struct extent_map *split_mid = NULL;
int ret = 0;
unsigned long flags;
ASSERT(pre != 0);
ASSERT(pre < len);
split_pre = alloc_extent_map();
if (!split_pre)
return -ENOMEM;
split_mid = alloc_extent_map();
if (!split_mid) {
ret = -ENOMEM;
goto out_free_pre;
}
lock_extent(&inode->io_tree, start, start + len - 1, NULL);
write_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, len);
if (!em) {
ret = -EIO;
goto out_unlock;
}
ASSERT(em->len == len);
ASSERT(!test_bit(EXTENT_FLAG_COMPRESSED, &em->flags));
ASSERT(em->block_start < EXTENT_MAP_LAST_BYTE);
ASSERT(test_bit(EXTENT_FLAG_PINNED, &em->flags));
ASSERT(!test_bit(EXTENT_FLAG_LOGGING, &em->flags));
ASSERT(!list_empty(&em->list));
flags = em->flags;
clear_bit(EXTENT_FLAG_PINNED, &em->flags);
/* First, replace the em with a new extent_map starting from * em->start */
split_pre->start = em->start;
split_pre->len = pre;
split_pre->orig_start = split_pre->start;
split_pre->block_start = em->block_start;
split_pre->block_len = split_pre->len;
split_pre->orig_block_len = split_pre->block_len;
split_pre->ram_bytes = split_pre->len;
split_pre->flags = flags;
split_pre->compress_type = em->compress_type;
split_pre->generation = em->generation;
replace_extent_mapping(em_tree, em, split_pre, 1);
/*
* Now we only have an extent_map at:
* [em->start, em->start + pre]
*/
/* Insert the middle extent_map. */
split_mid->start = em->start + pre;
split_mid->len = em->len - pre;
split_mid->orig_start = split_mid->start;
split_mid->block_start = em->block_start + pre;
split_mid->block_len = split_mid->len;
split_mid->orig_block_len = split_mid->block_len;
split_mid->ram_bytes = split_mid->len;
split_mid->flags = flags;
split_mid->compress_type = em->compress_type;
split_mid->generation = em->generation;
add_extent_mapping(em_tree, split_mid, 1);
/* Once for us */
free_extent_map(em);
/* Once for the tree */
free_extent_map(em);
out_unlock:
write_unlock(&em_tree->lock);
unlock_extent(&inode->io_tree, start, start + len - 1, NULL);
free_extent_map(split_mid);
out_free_pre:
free_extent_map(split_pre);
return ret;
}
int btrfs_extract_ordered_extent(struct btrfs_bio *bbio,
struct btrfs_ordered_extent *ordered)
static int btrfs_extract_ordered_extent(struct btrfs_bio *bbio,
struct btrfs_ordered_extent *ordered)
{
u64 start = (u64)bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT;
u64 len = bbio->bio.bi_iter.bi_size;
struct btrfs_inode *inode = bbio->inode;
u64 ordered_len = ordered->num_bytes;
int ret = 0;
struct btrfs_ordered_extent *new;
int ret;
/* Must always be called for the beginning of an ordered extent. */
if (WARN_ON_ONCE(start != ordered->disk_bytenr))
return -EINVAL;
/* No need to split if the ordered extent covers the entire bio. */
if (ordered->disk_num_bytes == len)
if (ordered->disk_num_bytes == len) {
refcount_inc(&ordered->refs);
bbio->ordered = ordered;
return 0;
ret = btrfs_split_ordered_extent(ordered, len);
if (ret)
return ret;
}
/*
* Don't split the extent_map for NOCOW extents, as we're writing into
* a pre-existing one.
*/
if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags))
return 0;
if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) {
ret = split_extent_map(bbio->inode, bbio->file_offset,
ordered->num_bytes, len,
ordered->disk_bytenr);
if (ret)
return ret;
}
return split_extent_map(inode, bbio->file_offset, ordered_len, len);
new = btrfs_split_ordered_extent(ordered, len);
if (IS_ERR(new))
return PTR_ERR(new);
bbio->ordered = new;
return 0;
}
/*
@ -2651,7 +2756,7 @@ static int add_pending_csums(struct btrfs_trans_handle *trans,
trans->adding_csums = true;
if (!csum_root)
csum_root = btrfs_csum_root(trans->fs_info,
sum->bytenr);
sum->logical);
ret = btrfs_csum_file_blocks(trans, csum_root, sum);
trans->adding_csums = false;
if (ret)
@ -2689,8 +2794,7 @@ static int btrfs_find_new_delalloc_bytes(struct btrfs_inode *inode,
ret = set_extent_bit(&inode->io_tree, search_start,
search_start + em_len - 1,
EXTENT_DELALLOC_NEW, cached_state,
GFP_NOFS);
EXTENT_DELALLOC_NEW, cached_state);
next:
search_start = extent_map_end(em);
free_extent_map(em);
@ -2723,8 +2827,8 @@ int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
return ret;
}
return set_extent_delalloc(&inode->io_tree, start, end, extra_bits,
cached_state);
return set_extent_bit(&inode->io_tree, start, end,
EXTENT_DELALLOC | extra_bits, cached_state);
}
/* see btrfs_writepage_start_hook for details on why this is required */
@ -2847,7 +2951,6 @@ static void btrfs_writepage_fixup_worker(struct btrfs_work *work)
mapping_set_error(page->mapping, ret);
end_extent_writepage(page, ret, page_start, page_end);
clear_page_dirty_for_io(page);
SetPageError(page);
}
btrfs_page_clear_checked(inode->root->fs_info, page, page_start, PAGE_SIZE);
unlock_page(page);
@ -3068,7 +3171,7 @@ static int insert_ordered_extent_file_extent(struct btrfs_trans_handle *trans,
* an ordered extent if the range of bytes in the file it covers are
* fully written.
*/
int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent)
{
struct btrfs_inode *inode = BTRFS_I(ordered_extent->inode);
struct btrfs_root *root = inode->root;
@ -3103,15 +3206,9 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
goto out;
}
/* A valid ->physical implies a write on a sequential zone. */
if (ordered_extent->physical != (u64)-1) {
btrfs_rewrite_logical_zoned(ordered_extent);
if (btrfs_is_zoned(fs_info))
btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr,
ordered_extent->disk_num_bytes);
} else if (btrfs_is_data_reloc_root(inode->root)) {
btrfs_zone_finish_endio(fs_info, ordered_extent->disk_bytenr,
ordered_extent->disk_num_bytes);
}
if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
truncated = true;
@ -3279,6 +3376,14 @@ int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
return ret;
}
int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered)
{
if (btrfs_is_zoned(btrfs_sb(ordered->inode->i_sb)) &&
!test_bit(BTRFS_ORDERED_IOERR, &ordered->flags))
btrfs_finish_ordered_zoned(ordered);
return btrfs_finish_one_ordered(ordered);
}
void btrfs_writepage_endio_finish_ordered(struct btrfs_inode *inode,
struct page *page, u64 start,
u64 end, bool uptodate)
@ -4226,7 +4331,7 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
}
btrfs_record_unlink_dir(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
0);
false);
ret = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
&fname.disk_name);
@ -4801,7 +4906,7 @@ int btrfs_truncate_block(struct btrfs_inode *inode, loff_t from, loff_t len,
if (only_release_metadata)
set_extent_bit(&inode->io_tree, block_start, block_end,
EXTENT_NORESERVE, NULL, GFP_NOFS);
EXTENT_NORESERVE, NULL);
out_unlock:
if (ret) {
@ -7670,8 +7775,8 @@ static int btrfs_dio_iomap_end(struct inode *inode, loff_t pos, loff_t length,
pos += submitted;
length -= submitted;
if (write)
btrfs_mark_ordered_io_finished(BTRFS_I(inode), NULL,
pos, length, false);
btrfs_finish_ordered_extent(dio_data->ordered, NULL,
pos, length, false);
else
unlock_extent(&BTRFS_I(inode)->io_tree, pos,
pos + length - 1, NULL);
@ -7701,12 +7806,14 @@ static void btrfs_dio_end_io(struct btrfs_bio *bbio)
dip->file_offset, dip->bytes, bio->bi_status);
}
if (btrfs_op(bio) == BTRFS_MAP_WRITE)
btrfs_mark_ordered_io_finished(inode, NULL, dip->file_offset,
dip->bytes, !bio->bi_status);
else
if (btrfs_op(bio) == BTRFS_MAP_WRITE) {
btrfs_finish_ordered_extent(bbio->ordered, NULL,
dip->file_offset, dip->bytes,
!bio->bi_status);
} else {
unlock_extent(&inode->io_tree, dip->file_offset,
dip->file_offset + dip->bytes - 1, NULL);
}
bbio->bio.bi_private = bbio->private;
iomap_dio_bio_end_io(bio);
@ -7742,7 +7849,8 @@ static void btrfs_dio_submit_io(const struct iomap_iter *iter, struct bio *bio,
ret = btrfs_extract_ordered_extent(bbio, dio_data->ordered);
if (ret) {
btrfs_bio_end_io(bbio, errno_to_blk_status(ret));
bbio->bio.bi_status = errno_to_blk_status(ret);
btrfs_dio_end_io(bbio);
return;
}
}
@ -8236,7 +8344,7 @@ static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback)
int ret;
struct btrfs_trans_handle *trans;
u64 mask = fs_info->sectorsize - 1;
u64 min_size = btrfs_calc_metadata_size(fs_info, 1);
const u64 min_size = btrfs_calc_metadata_size(fs_info, 1);
if (!skip_writeback) {
ret = btrfs_wait_ordered_range(&inode->vfs_inode,
@ -8293,7 +8401,15 @@ static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback)
/* Migrate the slack space for the truncate to our reserve */
ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv,
min_size, false);
BUG_ON(ret);
/*
* We have reserved 2 metadata units when we started the transaction and
* min_size matches 1 unit, so this should never fail, but if it does,
* it's not critical we just fail truncation.
*/
if (WARN_ON(ret)) {
btrfs_end_transaction(trans);
goto out;
}
trans->block_rsv = rsv;
@ -8341,7 +8457,14 @@ static int btrfs_truncate(struct btrfs_inode *inode, bool skip_writeback)
btrfs_block_rsv_release(fs_info, rsv, -1, NULL);
ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv,
rsv, min_size, false);
BUG_ON(ret); /* shouldn't happen */
/*
* We have reserved 2 metadata units when we started the
* transaction and min_size matches 1 unit, so this should never
* fail, but if it does, it's not critical we just fail truncation.
*/
if (WARN_ON(ret))
break;
trans->block_rsv = rsv;
}
@ -8468,7 +8591,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
ei->io_tree.inode = ei;
extent_io_tree_init(fs_info, &ei->file_extent_tree,
IO_TREE_INODE_FILE_EXTENT);
atomic_set(&ei->sync_writers, 0);
mutex_init(&ei->log_mutex);
btrfs_ordered_inode_tree_init(&ei->ordered_tree);
INIT_LIST_HEAD(&ei->delalloc_inodes);
@ -8639,7 +8761,7 @@ static int btrfs_getattr(struct mnt_idmap *idmap,
inode_bytes = inode_get_bytes(inode);
spin_unlock(&BTRFS_I(inode)->lock);
stat->blocks = (ALIGN(inode_bytes, blocksize) +
ALIGN(delalloc_bytes, blocksize)) >> 9;
ALIGN(delalloc_bytes, blocksize)) >> SECTOR_SHIFT;
return 0;
}
@ -8795,9 +8917,9 @@ static int btrfs_rename_exchange(struct inode *old_dir,
if (old_dentry->d_parent != new_dentry->d_parent) {
btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
BTRFS_I(old_inode), 1);
BTRFS_I(old_inode), true);
btrfs_record_unlink_dir(trans, BTRFS_I(new_dir),
BTRFS_I(new_inode), 1);
BTRFS_I(new_inode), true);
}
/* src is a subvolume */
@ -9063,7 +9185,7 @@ static int btrfs_rename(struct mnt_idmap *idmap,
if (old_dentry->d_parent != new_dentry->d_parent)
btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
BTRFS_I(old_inode), 1);
BTRFS_I(old_inode), true);
if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {
ret = btrfs_unlink_subvol(trans, BTRFS_I(old_dir), old_dentry);
@ -10170,6 +10292,7 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
struct extent_io_tree *io_tree = &inode->io_tree;
struct extent_changeset *data_reserved = NULL;
struct extent_state *cached_state = NULL;
struct btrfs_ordered_extent *ordered;
int compression;
size_t orig_count;
u64 start, end;
@ -10346,14 +10469,15 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
}
free_extent_map(em);
ret = btrfs_add_ordered_extent(inode, start, num_bytes, ram_bytes,
ordered = btrfs_alloc_ordered_extent(inode, start, num_bytes, ram_bytes,
ins.objectid, ins.offset,
encoded->unencoded_offset,
(1 << BTRFS_ORDERED_ENCODED) |
(1 << BTRFS_ORDERED_COMPRESSED),
compression);
if (ret) {
if (IS_ERR(ordered)) {
btrfs_drop_extent_map_range(inode, start, end, false);
ret = PTR_ERR(ordered);
goto out_free_reserved;
}
btrfs_dec_block_group_reservations(fs_info, ins.objectid);
@ -10365,8 +10489,7 @@ ssize_t btrfs_do_encoded_write(struct kiocb *iocb, struct iov_iter *from,
btrfs_delalloc_release_extents(inode, num_bytes);
btrfs_submit_compressed_write(inode, start, num_bytes, ins.objectid,
ins.offset, pages, nr_pages, 0, false);
btrfs_submit_compressed_write(ordered, pages, nr_pages, 0, false);
ret = orig_count;
goto out;
@ -10903,7 +11026,6 @@ static const struct address_space_operations btrfs_aops = {
.read_folio = btrfs_read_folio,
.writepages = btrfs_writepages,
.readahead = btrfs_readahead,
.direct_IO = noop_direct_IO,
.invalidate_folio = btrfs_invalidate_folio,
.release_folio = btrfs_release_folio,
.migrate_folio = btrfs_migrate_folio,

View File

@ -649,6 +649,8 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
}
trans->block_rsv = &block_rsv;
trans->bytes_reserved = block_rsv.size;
/* Tree log can't currently deal with an inode which is a new root. */
btrfs_set_log_full_commit(trans);
ret = btrfs_qgroup_inherit(trans, 0, objectid, inherit);
if (ret)
@ -757,10 +759,7 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
trans->bytes_reserved = 0;
btrfs_subvolume_release_metadata(root, &block_rsv);
if (ret)
btrfs_end_transaction(trans);
else
ret = btrfs_commit_transaction(trans);
btrfs_end_transaction(trans);
out_new_inode_args:
btrfs_new_inode_args_destroy(&new_inode_args);
out_inode:
@ -3113,6 +3112,13 @@ static noinline long btrfs_ioctl_start_sync(struct btrfs_root *root,
struct btrfs_trans_handle *trans;
u64 transid;
/*
* Start orphan cleanup here for the given root in case it hasn't been
* started already by other means. Errors are handled in the other
* functions during transaction commit.
*/
btrfs_orphan_cleanup(root);
trans = btrfs_attach_transaction_barrier(root);
if (IS_ERR(trans)) {
if (PTR_ERR(trans) != -ENOENT)
@ -3134,14 +3140,13 @@ static noinline long btrfs_ioctl_start_sync(struct btrfs_root *root,
static noinline long btrfs_ioctl_wait_sync(struct btrfs_fs_info *fs_info,
void __user *argp)
{
u64 transid;
/* By default wait for the current transaction. */
u64 transid = 0;
if (argp) {
if (argp)
if (copy_from_user(&transid, argp, sizeof(transid)))
return -EFAULT;
} else {
transid = 0; /* current trans */
}
return btrfs_wait_for_commit(fs_info, transid);
}

View File

@ -57,8 +57,8 @@
static struct btrfs_lockdep_keyset {
u64 id; /* root objectid */
/* Longest entry: btrfs-free-space-00 */
char names[BTRFS_MAX_LEVEL][20];
/* Longest entry: btrfs-block-group-00 */
char names[BTRFS_MAX_LEVEL][24];
struct lock_class_key keys[BTRFS_MAX_LEVEL];
} btrfs_lockdep_keysets[] = {
{ .id = BTRFS_ROOT_TREE_OBJECTID, DEFINE_NAME("root") },
@ -72,6 +72,7 @@ static struct btrfs_lockdep_keyset {
{ .id = BTRFS_DATA_RELOC_TREE_OBJECTID, DEFINE_NAME("dreloc") },
{ .id = BTRFS_UUID_TREE_OBJECTID, DEFINE_NAME("uuid") },
{ .id = BTRFS_FREE_SPACE_TREE_OBJECTID, DEFINE_NAME("free-space") },
{ .id = BTRFS_BLOCK_GROUP_TREE_OBJECTID, DEFINE_NAME("block-group") },
{ .id = 0, DEFINE_NAME("tree") },
};

View File

@ -88,9 +88,9 @@ struct list_head *lzo_alloc_workspace(unsigned int level)
if (!workspace)
return ERR_PTR(-ENOMEM);
workspace->mem = kvmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
workspace->buf = kvmalloc(WORKSPACE_BUF_LENGTH, GFP_KERNEL);
workspace->cbuf = kvmalloc(WORKSPACE_CBUF_LENGTH, GFP_KERNEL);
workspace->mem = kvmalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL | __GFP_NOWARN);
workspace->buf = kvmalloc(WORKSPACE_BUF_LENGTH, GFP_KERNEL | __GFP_NOWARN);
workspace->cbuf = kvmalloc(WORKSPACE_CBUF_LENGTH, GFP_KERNEL | __GFP_NOWARN);
if (!workspace->mem || !workspace->buf || !workspace->cbuf)
goto fail;

View File

@ -252,14 +252,6 @@ void __cold _btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt,
}
#endif
#ifdef CONFIG_BTRFS_ASSERT
void __cold __noreturn btrfs_assertfail(const char *expr, const char *file, int line)
{
pr_err("assertion failed: %s, in %s:%d\n", expr, file, line);
BUG();
}
#endif
void __cold btrfs_print_v0_err(struct btrfs_fs_info *fs_info)
{
btrfs_err(fs_info,

View File

@ -4,14 +4,23 @@
#define BTRFS_MESSAGES_H
#include <linux/types.h>
#include <linux/printk.h>
#include <linux/bug.h>
struct btrfs_fs_info;
/*
* We want to be able to override this in btrfs-progs.
*/
#ifdef __KERNEL__
static inline __printf(2, 3) __cold
void btrfs_no_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
{
}
#endif
#ifdef CONFIG_PRINTK
#define btrfs_printk(fs_info, fmt, args...) \
@ -160,7 +169,11 @@ do { \
} while (0)
#ifdef CONFIG_BTRFS_ASSERT
void __cold __noreturn btrfs_assertfail(const char *expr, const char *file, int line);
#define btrfs_assertfail(expr, file, line) ({ \
pr_err("assertion failed: %s, in %s:%d\n", (expr), (file), (line)); \
BUG(); \
})
#define ASSERT(expr) \
(likely(expr) ? (void)0 : btrfs_assertfail(#expr, __FILE__, __LINE__))

View File

@ -143,4 +143,24 @@ static inline struct rb_node *rb_simple_insert(struct rb_root *root, u64 bytenr,
return NULL;
}
static inline bool bitmap_test_range_all_set(const unsigned long *addr,
unsigned long start,
unsigned long nbits)
{
unsigned long found_zero;
found_zero = find_next_zero_bit(addr, start + nbits, start);
return (found_zero == start + nbits);
}
static inline bool bitmap_test_range_all_zero(const unsigned long *addr,
unsigned long start,
unsigned long nbits)
{
unsigned long found_set;
found_set = find_next_bit(addr, start + nbits, start);
return (found_set == start + nbits);
}
#endif

View File

@ -146,6 +146,102 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree,
return ret;
}
static struct btrfs_ordered_extent *alloc_ordered_extent(
struct btrfs_inode *inode, u64 file_offset, u64 num_bytes,
u64 ram_bytes, u64 disk_bytenr, u64 disk_num_bytes,
u64 offset, unsigned long flags, int compress_type)
{
struct btrfs_ordered_extent *entry;
int ret;
if (flags &
((1 << BTRFS_ORDERED_NOCOW) | (1 << BTRFS_ORDERED_PREALLOC))) {
/* For nocow write, we can release the qgroup rsv right now */
ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes);
if (ret < 0)
return ERR_PTR(ret);
} else {
/*
* The ordered extent has reserved qgroup space, release now
* and pass the reserved number for qgroup_record to free.
*/
ret = btrfs_qgroup_release_data(inode, file_offset, num_bytes);
if (ret < 0)
return ERR_PTR(ret);
}
entry = kmem_cache_zalloc(btrfs_ordered_extent_cache, GFP_NOFS);
if (!entry)
return ERR_PTR(-ENOMEM);
entry->file_offset = file_offset;
entry->num_bytes = num_bytes;
entry->ram_bytes = ram_bytes;
entry->disk_bytenr = disk_bytenr;
entry->disk_num_bytes = disk_num_bytes;
entry->offset = offset;
entry->bytes_left = num_bytes;
entry->inode = igrab(&inode->vfs_inode);
entry->compress_type = compress_type;
entry->truncated_len = (u64)-1;
entry->qgroup_rsv = ret;
entry->flags = flags;
refcount_set(&entry->refs, 1);
init_waitqueue_head(&entry->wait);
INIT_LIST_HEAD(&entry->list);
INIT_LIST_HEAD(&entry->log_list);
INIT_LIST_HEAD(&entry->root_extent_list);
INIT_LIST_HEAD(&entry->work_list);
init_completion(&entry->completion);
/*
* We don't need the count_max_extents here, we can assume that all of
* that work has been done at higher layers, so this is truly the
* smallest the extent is going to get.
*/
spin_lock(&inode->lock);
btrfs_mod_outstanding_extents(inode, 1);
spin_unlock(&inode->lock);
return entry;
}
static void insert_ordered_extent(struct btrfs_ordered_extent *entry)
{
struct btrfs_inode *inode = BTRFS_I(entry->inode);
struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct rb_node *node;
trace_btrfs_ordered_extent_add(inode, entry);
percpu_counter_add_batch(&fs_info->ordered_bytes, entry->num_bytes,
fs_info->delalloc_batch);
/* One ref for the tree. */
refcount_inc(&entry->refs);
spin_lock_irq(&tree->lock);
node = tree_insert(&tree->tree, entry->file_offset, &entry->rb_node);
if (node)
btrfs_panic(fs_info, -EEXIST,
"inconsistency in ordered tree at offset %llu",
entry->file_offset);
spin_unlock_irq(&tree->lock);
spin_lock(&root->ordered_extent_lock);
list_add_tail(&entry->root_extent_list,
&root->ordered_extents);
root->nr_ordered_extents++;
if (root->nr_ordered_extents == 1) {
spin_lock(&fs_info->ordered_root_lock);
BUG_ON(!list_empty(&root->ordered_root));
list_add_tail(&root->ordered_root, &fs_info->ordered_roots);
spin_unlock(&fs_info->ordered_root_lock);
}
spin_unlock(&root->ordered_extent_lock);
}
/*
* Add an ordered extent to the per-inode tree.
*
@ -171,122 +267,18 @@ struct btrfs_ordered_extent *btrfs_alloc_ordered_extent(
u64 disk_num_bytes, u64 offset, unsigned long flags,
int compress_type)
{
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
struct rb_node *node;
struct btrfs_ordered_extent *entry;
int ret;
if (flags &
((1 << BTRFS_ORDERED_NOCOW) | (1 << BTRFS_ORDERED_PREALLOC))) {
/* For nocow write, we can release the qgroup rsv right now */
ret = btrfs_qgroup_free_data(inode, NULL, file_offset, num_bytes);
if (ret < 0)
return ERR_PTR(ret);
ret = 0;
} else {
/*
* The ordered extent has reserved qgroup space, release now
* and pass the reserved number for qgroup_record to free.
*/
ret = btrfs_qgroup_release_data(inode, file_offset, num_bytes);
if (ret < 0)
return ERR_PTR(ret);
}
entry = kmem_cache_zalloc(btrfs_ordered_extent_cache, GFP_NOFS);
if (!entry)
return ERR_PTR(-ENOMEM);
entry->file_offset = file_offset;
entry->num_bytes = num_bytes;
entry->ram_bytes = ram_bytes;
entry->disk_bytenr = disk_bytenr;
entry->disk_num_bytes = disk_num_bytes;
entry->offset = offset;
entry->bytes_left = num_bytes;
entry->inode = igrab(&inode->vfs_inode);
entry->compress_type = compress_type;
entry->truncated_len = (u64)-1;
entry->qgroup_rsv = ret;
entry->physical = (u64)-1;
ASSERT((flags & ~BTRFS_ORDERED_TYPE_FLAGS) == 0);
entry->flags = flags;
percpu_counter_add_batch(&fs_info->ordered_bytes, num_bytes,
fs_info->delalloc_batch);
/* one ref for the tree */
refcount_set(&entry->refs, 1);
init_waitqueue_head(&entry->wait);
INIT_LIST_HEAD(&entry->list);
INIT_LIST_HEAD(&entry->log_list);
INIT_LIST_HEAD(&entry->root_extent_list);
INIT_LIST_HEAD(&entry->work_list);
init_completion(&entry->completion);
trace_btrfs_ordered_extent_add(inode, entry);
spin_lock_irq(&tree->lock);
node = tree_insert(&tree->tree, file_offset,
&entry->rb_node);
if (node)
btrfs_panic(fs_info, -EEXIST,
"inconsistency in ordered tree at offset %llu",
file_offset);
spin_unlock_irq(&tree->lock);
spin_lock(&root->ordered_extent_lock);
list_add_tail(&entry->root_extent_list,
&root->ordered_extents);
root->nr_ordered_extents++;
if (root->nr_ordered_extents == 1) {
spin_lock(&fs_info->ordered_root_lock);
BUG_ON(!list_empty(&root->ordered_root));
list_add_tail(&root->ordered_root, &fs_info->ordered_roots);
spin_unlock(&fs_info->ordered_root_lock);
}
spin_unlock(&root->ordered_extent_lock);
/*
* We don't need the count_max_extents here, we can assume that all of
* that work has been done at higher layers, so this is truly the
* smallest the extent is going to get.
*/
spin_lock(&inode->lock);
btrfs_mod_outstanding_extents(inode, 1);
spin_unlock(&inode->lock);
/* One ref for the returned entry to match semantics of lookup. */
refcount_inc(&entry->refs);
entry = alloc_ordered_extent(inode, file_offset, num_bytes, ram_bytes,
disk_bytenr, disk_num_bytes, offset, flags,
compress_type);
if (!IS_ERR(entry))
insert_ordered_extent(entry);
return entry;
}
/*
* Add a new btrfs_ordered_extent for the range, but drop the reference instead
* of returning it to the caller.
*/
int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
u64 num_bytes, u64 ram_bytes, u64 disk_bytenr,
u64 disk_num_bytes, u64 offset, unsigned long flags,
int compress_type)
{
struct btrfs_ordered_extent *ordered;
ordered = btrfs_alloc_ordered_extent(inode, file_offset, num_bytes,
ram_bytes, disk_bytenr,
disk_num_bytes, offset, flags,
compress_type);
if (IS_ERR(ordered))
return PTR_ERR(ordered);
btrfs_put_ordered_extent(ordered);
return 0;
}
/*
* Add a struct btrfs_ordered_sum into the list of checksums to be inserted
* when an ordered extent is finished. If the list covers more than one
@ -311,6 +303,90 @@ static void finish_ordered_fn(struct btrfs_work *work)
btrfs_finish_ordered_io(ordered_extent);
}
static bool can_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
struct page *page, u64 file_offset,
u64 len, bool uptodate)
{
struct btrfs_inode *inode = BTRFS_I(ordered->inode);
struct btrfs_fs_info *fs_info = inode->root->fs_info;
lockdep_assert_held(&inode->ordered_tree.lock);
if (page) {
ASSERT(page->mapping);
ASSERT(page_offset(page) <= file_offset);
ASSERT(file_offset + len <= page_offset(page) + PAGE_SIZE);
/*
* Ordered (Private2) bit indicates whether we still have
* pending io unfinished for the ordered extent.
*
* If there's no such bit, we need to skip to next range.
*/
if (!btrfs_page_test_ordered(fs_info, page, file_offset, len))
return false;
btrfs_page_clear_ordered(fs_info, page, file_offset, len);
}
/* Now we're fine to update the accounting. */
if (WARN_ON_ONCE(len > ordered->bytes_left)) {
btrfs_crit(fs_info,
"bad ordered extent accounting, root=%llu ino=%llu OE offset=%llu OE len=%llu to_dec=%llu left=%llu",
inode->root->root_key.objectid, btrfs_ino(inode),
ordered->file_offset, ordered->num_bytes,
len, ordered->bytes_left);
ordered->bytes_left = 0;
} else {
ordered->bytes_left -= len;
}
if (!uptodate)
set_bit(BTRFS_ORDERED_IOERR, &ordered->flags);
if (ordered->bytes_left)
return false;
/*
* All the IO of the ordered extent is finished, we need to queue
* the finish_func to be executed.
*/
set_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags);
cond_wake_up(&ordered->wait);
refcount_inc(&ordered->refs);
trace_btrfs_ordered_extent_mark_finished(inode, ordered);
return true;
}
static void btrfs_queue_ordered_fn(struct btrfs_ordered_extent *ordered)
{
struct btrfs_inode *inode = BTRFS_I(ordered->inode);
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_workqueue *wq = btrfs_is_free_space_inode(inode) ?
fs_info->endio_freespace_worker : fs_info->endio_write_workers;
btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, NULL);
btrfs_queue_work(wq, &ordered->work);
}
bool btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
struct page *page, u64 file_offset, u64 len,
bool uptodate)
{
struct btrfs_inode *inode = BTRFS_I(ordered->inode);
unsigned long flags;
bool ret;
trace_btrfs_finish_ordered_extent(inode, file_offset, len, uptodate);
spin_lock_irqsave(&inode->ordered_tree.lock, flags);
ret = can_finish_ordered_extent(ordered, page, file_offset, len, uptodate);
spin_unlock_irqrestore(&inode->ordered_tree.lock, flags);
if (ret)
btrfs_queue_ordered_fn(ordered);
return ret;
}
/*
* Mark all ordered extents io inside the specified range finished.
*
@ -329,22 +405,11 @@ void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,
u64 num_bytes, bool uptodate)
{
struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_workqueue *wq;
struct rb_node *node;
struct btrfs_ordered_extent *entry = NULL;
unsigned long flags;
u64 cur = file_offset;
if (btrfs_is_free_space_inode(inode))
wq = fs_info->endio_freespace_worker;
else
wq = fs_info->endio_write_workers;
if (page)
ASSERT(page->mapping && page_offset(page) <= file_offset &&
file_offset + num_bytes <= page_offset(page) + PAGE_SIZE);
spin_lock_irqsave(&tree->lock, flags);
while (cur < file_offset + num_bytes) {
u64 entry_end;
@ -397,50 +462,9 @@ void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,
ASSERT(end + 1 - cur < U32_MAX);
len = end + 1 - cur;
if (page) {
/*
* Ordered (Private2) bit indicates whether we still
* have pending io unfinished for the ordered extent.
*
* If there's no such bit, we need to skip to next range.
*/
if (!btrfs_page_test_ordered(fs_info, page, cur, len)) {
cur += len;
continue;
}
btrfs_page_clear_ordered(fs_info, page, cur, len);
}
/* Now we're fine to update the accounting */
if (unlikely(len > entry->bytes_left)) {
WARN_ON(1);
btrfs_crit(fs_info,
"bad ordered extent accounting, root=%llu ino=%llu OE offset=%llu OE len=%llu to_dec=%u left=%llu",
inode->root->root_key.objectid,
btrfs_ino(inode),
entry->file_offset,
entry->num_bytes,
len, entry->bytes_left);
entry->bytes_left = 0;
} else {
entry->bytes_left -= len;
}
if (!uptodate)
set_bit(BTRFS_ORDERED_IOERR, &entry->flags);
/*
* All the IO of the ordered extent is finished, we need to queue
* the finish_func to be executed.
*/
if (entry->bytes_left == 0) {
set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
cond_wake_up(&entry->wait);
refcount_inc(&entry->refs);
trace_btrfs_ordered_extent_mark_finished(inode, entry);
if (can_finish_ordered_extent(entry, page, cur, len, uptodate)) {
spin_unlock_irqrestore(&tree->lock, flags);
btrfs_init_work(&entry->work, finish_ordered_fn, NULL, NULL);
btrfs_queue_work(wq, &entry->work);
btrfs_queue_ordered_fn(entry);
spin_lock_irqsave(&tree->lock, flags);
}
cur += len;
@ -564,7 +588,7 @@ void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
freespace_inode = btrfs_is_free_space_inode(btrfs_inode);
btrfs_lockdep_acquire(fs_info, btrfs_trans_pending_ordered);
/* This is paired with btrfs_add_ordered_extent. */
/* This is paired with btrfs_alloc_ordered_extent. */
spin_lock(&btrfs_inode->lock);
btrfs_mod_outstanding_extents(btrfs_inode, -1);
spin_unlock(&btrfs_inode->lock);
@ -1117,17 +1141,22 @@ bool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end,
}
/* Split out a new ordered extent for this first @len bytes of @ordered. */
int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 len)
struct btrfs_ordered_extent *btrfs_split_ordered_extent(
struct btrfs_ordered_extent *ordered, u64 len)
{
struct inode *inode = ordered->inode;
struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
struct btrfs_inode *inode = BTRFS_I(ordered->inode);
struct btrfs_ordered_inode_tree *tree = &inode->ordered_tree;
struct btrfs_root *root = inode->root;
struct btrfs_fs_info *fs_info = root->fs_info;
u64 file_offset = ordered->file_offset;
u64 disk_bytenr = ordered->disk_bytenr;
unsigned long flags = ordered->flags & BTRFS_ORDERED_TYPE_FLAGS;
unsigned long flags = ordered->flags;
struct btrfs_ordered_sum *sum, *tmpsum;
struct btrfs_ordered_extent *new;
struct rb_node *node;
u64 offset = 0;
trace_btrfs_ordered_extent_split(BTRFS_I(inode), ordered);
trace_btrfs_ordered_extent_split(inode, ordered);
ASSERT(!(flags & (1U << BTRFS_ORDERED_COMPRESSED)));
@ -1136,18 +1165,27 @@ int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 len)
* reduce the original extent to a zero length either.
*/
if (WARN_ON_ONCE(len >= ordered->num_bytes))
return -EINVAL;
/* We cannot split once ordered extent is past end_bio. */
if (WARN_ON_ONCE(ordered->bytes_left != ordered->disk_num_bytes))
return -EINVAL;
return ERR_PTR(-EINVAL);
/* We cannot split partially completed ordered extents. */
if (ordered->bytes_left) {
ASSERT(!(flags & ~BTRFS_ORDERED_TYPE_FLAGS));
if (WARN_ON_ONCE(ordered->bytes_left != ordered->disk_num_bytes))
return ERR_PTR(-EINVAL);
}
/* We cannot split a compressed ordered extent. */
if (WARN_ON_ONCE(ordered->disk_num_bytes != ordered->num_bytes))
return -EINVAL;
/* Checksum list should be empty. */
if (WARN_ON_ONCE(!list_empty(&ordered->list)))
return -EINVAL;
return ERR_PTR(-EINVAL);
spin_lock_irq(&tree->lock);
new = alloc_ordered_extent(inode, file_offset, len, len, disk_bytenr,
len, 0, flags, ordered->compress_type);
if (IS_ERR(new))
return new;
/* One ref for the tree. */
refcount_inc(&new->refs);
spin_lock_irq(&root->ordered_extent_lock);
spin_lock(&tree->lock);
/* Remove from tree once */
node = &ordered->rb_node;
rb_erase(node, &tree->tree);
@ -1159,26 +1197,48 @@ int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 len)
ordered->disk_bytenr += len;
ordered->num_bytes -= len;
ordered->disk_num_bytes -= len;
ordered->bytes_left -= len;
if (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags)) {
ASSERT(ordered->bytes_left == 0);
new->bytes_left = 0;
} else {
ordered->bytes_left -= len;
}
if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags)) {
if (ordered->truncated_len > len) {
ordered->truncated_len -= len;
} else {
new->truncated_len = ordered->truncated_len;
ordered->truncated_len = 0;
}
}
list_for_each_entry_safe(sum, tmpsum, &ordered->list, list) {
if (offset == len)
break;
list_move_tail(&sum->list, &new->list);
offset += sum->len;
}
/* Re-insert the node */
node = tree_insert(&tree->tree, ordered->file_offset, &ordered->rb_node);
if (node)
btrfs_panic(fs_info, -EEXIST,
"zoned: inconsistency in ordered tree at offset %llu",
ordered->file_offset);
ordered->file_offset);
spin_unlock_irq(&tree->lock);
node = tree_insert(&tree->tree, new->file_offset, &new->rb_node);
if (node)
btrfs_panic(fs_info, -EEXIST,
"zoned: inconsistency in ordered tree at offset %llu",
new->file_offset);
spin_unlock(&tree->lock);
/*
* The splitting extent is already counted and will be added again in
* btrfs_add_ordered_extent(). Subtract len to avoid double counting.
*/
percpu_counter_add_batch(&fs_info->ordered_bytes, -len, fs_info->delalloc_batch);
return btrfs_add_ordered_extent(BTRFS_I(inode), file_offset, len, len,
disk_bytenr, len, 0, flags,
ordered->compress_type);
list_add_tail(&new->root_extent_list, &root->ordered_extents);
root->nr_ordered_extents++;
spin_unlock_irq(&root->ordered_extent_lock);
return new;
}
int __init ordered_data_init(void)

View File

@ -14,13 +14,13 @@ struct btrfs_ordered_inode_tree {
};
struct btrfs_ordered_sum {
/* bytenr is the start of this extent on disk */
u64 bytenr;
/*
* this is the length in bytes covered by the sums array below.
* Logical start address and length for of the blocks covered by
* the sums array.
*/
int len;
u64 logical;
u32 len;
struct list_head list;
/* last field is a variable length array of csums */
u8 sums[];
@ -151,12 +151,6 @@ struct btrfs_ordered_extent {
struct completion completion;
struct btrfs_work flush_work;
struct list_head work_list;
/*
* Used to reverse-map physical address returned from ZONE_APPEND write
* command in a workqueue context
*/
u64 physical;
};
static inline void
@ -167,11 +161,15 @@ btrfs_ordered_inode_tree_init(struct btrfs_ordered_inode_tree *t)
t->last = NULL;
}
int btrfs_finish_one_ordered(struct btrfs_ordered_extent *ordered_extent);
int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry);
void btrfs_remove_ordered_extent(struct btrfs_inode *btrfs_inode,
struct btrfs_ordered_extent *entry);
bool btrfs_finish_ordered_extent(struct btrfs_ordered_extent *ordered,
struct page *page, u64 file_offset, u64 len,
bool uptodate);
void btrfs_mark_ordered_io_finished(struct btrfs_inode *inode,
struct page *page, u64 file_offset,
u64 num_bytes, bool uptodate);
@ -183,10 +181,6 @@ struct btrfs_ordered_extent *btrfs_alloc_ordered_extent(
u64 num_bytes, u64 ram_bytes, u64 disk_bytenr,
u64 disk_num_bytes, u64 offset, unsigned long flags,
int compress_type);
int btrfs_add_ordered_extent(struct btrfs_inode *inode, u64 file_offset,
u64 num_bytes, u64 ram_bytes, u64 disk_bytenr,
u64 disk_num_bytes, u64 offset, unsigned long flags,
int compress_type);
void btrfs_add_ordered_sum(struct btrfs_ordered_extent *entry,
struct btrfs_ordered_sum *sum);
struct btrfs_ordered_extent *btrfs_lookup_ordered_extent(struct btrfs_inode *inode,
@ -212,7 +206,8 @@ void btrfs_lock_and_flush_ordered_range(struct btrfs_inode *inode, u64 start,
struct extent_state **cached_state);
bool btrfs_try_lock_ordered_range(struct btrfs_inode *inode, u64 start, u64 end,
struct extent_state **cached_state);
int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 len);
struct btrfs_ordered_extent *btrfs_split_ordered_extent(
struct btrfs_ordered_extent *ordered, u64 len);
int __init ordered_data_init(void);
void __cold ordered_data_exit(void);

View File

@ -49,7 +49,7 @@ const char *btrfs_root_name(const struct btrfs_key *key, char *buf)
return buf;
}
static void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk)
static void print_chunk(const struct extent_buffer *eb, struct btrfs_chunk *chunk)
{
int num_stripes = btrfs_chunk_num_stripes(eb, chunk);
int i;
@ -62,7 +62,7 @@ static void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk)
btrfs_stripe_offset_nr(eb, chunk, i));
}
}
static void print_dev_item(struct extent_buffer *eb,
static void print_dev_item(const struct extent_buffer *eb,
struct btrfs_dev_item *dev_item)
{
pr_info("\t\tdev item devid %llu total_bytes %llu bytes used %llu\n",
@ -70,7 +70,7 @@ static void print_dev_item(struct extent_buffer *eb,
btrfs_device_total_bytes(eb, dev_item),
btrfs_device_bytes_used(eb, dev_item));
}
static void print_extent_data_ref(struct extent_buffer *eb,
static void print_extent_data_ref(const struct extent_buffer *eb,
struct btrfs_extent_data_ref *ref)
{
pr_cont("extent data backref root %llu objectid %llu offset %llu count %u\n",
@ -80,7 +80,7 @@ static void print_extent_data_ref(struct extent_buffer *eb,
btrfs_extent_data_ref_count(eb, ref));
}
static void print_extent_item(struct extent_buffer *eb, int slot, int type)
static void print_extent_item(const struct extent_buffer *eb, int slot, int type)
{
struct btrfs_extent_item *ei;
struct btrfs_extent_inline_ref *iref;
@ -169,7 +169,7 @@ static void print_extent_item(struct extent_buffer *eb, int slot, int type)
WARN_ON(ptr > end);
}
static void print_uuid_item(struct extent_buffer *l, unsigned long offset,
static void print_uuid_item(const struct extent_buffer *l, unsigned long offset,
u32 item_size)
{
if (!IS_ALIGNED(item_size, sizeof(u64))) {
@ -191,7 +191,7 @@ static void print_uuid_item(struct extent_buffer *l, unsigned long offset,
* Helper to output refs and locking status of extent buffer. Useful to debug
* race condition related problems.
*/
static void print_eb_refs_lock(struct extent_buffer *eb)
static void print_eb_refs_lock(const struct extent_buffer *eb)
{
#ifdef CONFIG_BTRFS_DEBUG
btrfs_info(eb->fs_info, "refs %u lock_owner %u current %u",
@ -199,7 +199,7 @@ static void print_eb_refs_lock(struct extent_buffer *eb)
#endif
}
void btrfs_print_leaf(struct extent_buffer *l)
void btrfs_print_leaf(const struct extent_buffer *l)
{
struct btrfs_fs_info *fs_info;
int i;
@ -355,7 +355,7 @@ void btrfs_print_leaf(struct extent_buffer *l)
}
}
void btrfs_print_tree(struct extent_buffer *c, bool follow)
void btrfs_print_tree(const struct extent_buffer *c, bool follow)
{
struct btrfs_fs_info *fs_info;
int i; u32 nr;

View File

@ -9,8 +9,8 @@
/* Buffer size to contain tree name and possibly additional data (offset) */
#define BTRFS_ROOT_NAME_BUF_LEN 48
void btrfs_print_leaf(struct extent_buffer *l);
void btrfs_print_tree(struct extent_buffer *c, bool follow);
void btrfs_print_leaf(const struct extent_buffer *l);
void btrfs_print_tree(const struct extent_buffer *c, bool follow);
const char *btrfs_root_name(const struct btrfs_key *key, char *buf);
#endif

View File

@ -1232,12 +1232,23 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
int ret = 0;
/*
* We need to have subvol_sem write locked, to prevent races between
* concurrent tasks trying to disable quotas, because we will unlock
* and relock qgroup_ioctl_lock across BTRFS_FS_QUOTA_ENABLED changes.
* We need to have subvol_sem write locked to prevent races with
* snapshot creation.
*/
lockdep_assert_held_write(&fs_info->subvol_sem);
/*
* Lock the cleaner mutex to prevent races with concurrent relocation,
* because relocation may be building backrefs for blocks of the quota
* root while we are deleting the root. This is like dropping fs roots
* of deleted snapshots/subvolumes, we need the same protection.
*
* This also prevents races between concurrent tasks trying to disable
* quotas, because we will unlock and relock qgroup_ioctl_lock across
* BTRFS_FS_QUOTA_ENABLED changes.
*/
mutex_lock(&fs_info->cleaner_mutex);
mutex_lock(&fs_info->qgroup_ioctl_lock);
if (!fs_info->quota_root)
goto out;
@ -1301,7 +1312,9 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
goto out;
}
spin_lock(&fs_info->trans_lock);
list_del(&quota_root->dirty_list);
spin_unlock(&fs_info->trans_lock);
btrfs_tree_lock(quota_root->node);
btrfs_clear_buffer_dirty(trans, quota_root->node);
@ -1317,6 +1330,7 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info)
btrfs_end_transaction(trans);
else if (trans)
ret = btrfs_end_transaction(trans);
mutex_unlock(&fs_info->cleaner_mutex);
return ret;
}

View File

@ -1079,7 +1079,7 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
/* see if we can add this page onto our existing bio */
if (last) {
u64 last_end = last->bi_iter.bi_sector << 9;
u64 last_end = last->bi_iter.bi_sector << SECTOR_SHIFT;
last_end += last->bi_iter.bi_size;
/*
@ -1099,7 +1099,7 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
bio = bio_alloc(stripe->dev->bdev,
max(BTRFS_STRIPE_LEN >> PAGE_SHIFT, 1),
op, GFP_NOFS);
bio->bi_iter.bi_sector = disk_start >> 9;
bio->bi_iter.bi_sector = disk_start >> SECTOR_SHIFT;
bio->bi_private = rbio;
__bio_add_page(bio, sector->page, sectorsize, sector->pgoff);
@ -2747,3 +2747,48 @@ void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
if (!lock_stripe_add(rbio))
start_async_work(rbio, scrub_rbio_work_locked);
}
/*
* This is for scrub call sites where we already have correct data contents.
* This allows us to avoid reading data stripes again.
*
* Unfortunately here we have to do page copy, other than reusing the pages.
* This is due to the fact rbio has its own page management for its cache.
*/
void raid56_parity_cache_data_pages(struct btrfs_raid_bio *rbio,
struct page **data_pages, u64 data_logical)
{
const u64 offset_in_full_stripe = data_logical -
rbio->bioc->full_stripe_logical;
const int page_index = offset_in_full_stripe >> PAGE_SHIFT;
const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
const u32 sectors_per_page = PAGE_SIZE / sectorsize;
int ret;
/*
* If we hit ENOMEM temporarily, but later at
* raid56_parity_submit_scrub_rbio() time it succeeded, we just do
* the extra read, not a big deal.
*
* If we hit ENOMEM later at raid56_parity_submit_scrub_rbio() time,
* the bio would got proper error number set.
*/
ret = alloc_rbio_data_pages(rbio);
if (ret < 0)
return;
/* data_logical must be at stripe boundary and inside the full stripe. */
ASSERT(IS_ALIGNED(offset_in_full_stripe, BTRFS_STRIPE_LEN));
ASSERT(offset_in_full_stripe < (rbio->nr_data << BTRFS_STRIPE_LEN_SHIFT));
for (int page_nr = 0; page_nr < (BTRFS_STRIPE_LEN >> PAGE_SHIFT); page_nr++) {
struct page *dst = rbio->stripe_pages[page_nr + page_index];
struct page *src = data_pages[page_nr];
memcpy_page(dst, 0, src, 0, PAGE_SIZE);
for (int sector_nr = sectors_per_page * page_index;
sector_nr < sectors_per_page * (page_index + 1);
sector_nr++)
rbio->stripe_sectors[sector_nr].uptodate = true;
}
}

View File

@ -193,6 +193,9 @@ struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
unsigned long *dbitmap, int stripe_nsectors);
void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio);
void raid56_parity_cache_data_pages(struct btrfs_raid_bio *rbio,
struct page **data_pages, u64 data_logical);
int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info);
void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info);

View File

@ -174,8 +174,8 @@ static void mark_block_processed(struct reloc_control *rc,
in_range(node->bytenr, rc->block_group->start,
rc->block_group->length)) {
blocksize = rc->extent_root->fs_info->nodesize;
set_extent_bits(&rc->processed_blocks, node->bytenr,
node->bytenr + blocksize - 1, EXTENT_DIRTY);
set_extent_bit(&rc->processed_blocks, node->bytenr,
node->bytenr + blocksize - 1, EXTENT_DIRTY, NULL);
}
node->processed = 1;
}
@ -3051,9 +3051,9 @@ static int relocate_one_page(struct inode *inode, struct file_ra_state *ra,
u64 boundary_end = boundary_start +
fs_info->sectorsize - 1;
set_extent_bits(&BTRFS_I(inode)->io_tree,
boundary_start, boundary_end,
EXTENT_BOUNDARY);
set_extent_bit(&BTRFS_I(inode)->io_tree,
boundary_start, boundary_end,
EXTENT_BOUNDARY, NULL);
}
unlock_extent(&BTRFS_I(inode)->io_tree, clamped_start, clamped_end,
&cached_state);
@ -4342,29 +4342,25 @@ int btrfs_recover_relocation(struct btrfs_fs_info *fs_info)
* cloning checksum properly handles the nodatasum extents.
* it also saves CPU time to re-calculate the checksum.
*/
int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len)
int btrfs_reloc_clone_csums(struct btrfs_ordered_extent *ordered)
{
struct btrfs_inode *inode = BTRFS_I(ordered->inode);
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_root *csum_root;
struct btrfs_ordered_sum *sums;
struct btrfs_ordered_extent *ordered;
int ret;
u64 disk_bytenr;
u64 new_bytenr;
u64 disk_bytenr = ordered->file_offset + inode->index_cnt;
struct btrfs_root *csum_root = btrfs_csum_root(fs_info, disk_bytenr);
LIST_HEAD(list);
int ret;
ordered = btrfs_lookup_ordered_extent(inode, file_pos);
BUG_ON(ordered->file_offset != file_pos || ordered->num_bytes != len);
disk_bytenr = file_pos + inode->index_cnt;
csum_root = btrfs_csum_root(fs_info, disk_bytenr);
ret = btrfs_lookup_csums_list(csum_root, disk_bytenr,
disk_bytenr + len - 1, &list, 0, false);
disk_bytenr + ordered->num_bytes - 1,
&list, 0, false);
if (ret)
goto out;
return ret;
while (!list_empty(&list)) {
sums = list_entry(list.next, struct btrfs_ordered_sum, list);
struct btrfs_ordered_sum *sums =
list_entry(list.next, struct btrfs_ordered_sum, list);
list_del_init(&sums->list);
/*
@ -4379,14 +4375,11 @@ int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len)
* disk_len vs real len like with real inodes since it's all
* disk length.
*/
new_bytenr = ordered->disk_bytenr + sums->bytenr - disk_bytenr;
sums->bytenr = new_bytenr;
sums->logical = ordered->disk_bytenr + sums->logical - disk_bytenr;
btrfs_add_ordered_sum(ordered, sums);
}
out:
btrfs_put_ordered_extent(ordered);
return ret;
return 0;
}
int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
@ -4523,3 +4516,19 @@ int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
ret = clone_backref_node(trans, rc, root, reloc_root);
return ret;
}
/*
* Get the current bytenr for the block group which is being relocated.
*
* Return U64_MAX if no running relocation.
*/
u64 btrfs_get_reloc_bg_bytenr(struct btrfs_fs_info *fs_info)
{
u64 logical = U64_MAX;
lockdep_assert_held(&fs_info->reloc_mutex);
if (fs_info->reloc_ctl && fs_info->reloc_ctl->block_group)
logical = fs_info->reloc_ctl->block_group->start;
return logical;
}

View File

@ -8,7 +8,7 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, struct btrfs_root *r
int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_recover_relocation(struct btrfs_fs_info *fs_info);
int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len);
int btrfs_reloc_clone_csums(struct btrfs_ordered_extent *ordered);
int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *buf,
struct extent_buffer *cow);
@ -19,5 +19,6 @@ int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info);
struct btrfs_root *find_reloc_root(struct btrfs_fs_info *fs_info, u64 bytenr);
int btrfs_should_ignore_reloc_root(struct btrfs_root *root);
u64 btrfs_get_reloc_bg_bytenr(struct btrfs_fs_info *fs_info);
#endif

View File

@ -177,7 +177,6 @@ struct scrub_ctx {
struct btrfs_fs_info *fs_info;
int first_free;
int cur_stripe;
struct list_head csum_list;
atomic_t cancel_req;
int readonly;
int sectors_per_bio;
@ -309,17 +308,6 @@ static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
scrub_pause_off(fs_info);
}
static void scrub_free_csums(struct scrub_ctx *sctx)
{
while (!list_empty(&sctx->csum_list)) {
struct btrfs_ordered_sum *sum;
sum = list_first_entry(&sctx->csum_list,
struct btrfs_ordered_sum, list);
list_del(&sum->list);
kfree(sum);
}
}
static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
{
int i;
@ -330,7 +318,6 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
for (i = 0; i < SCRUB_STRIPES_PER_SCTX; i++)
release_scrub_stripe(&sctx->stripes[i]);
scrub_free_csums(sctx);
kfree(sctx);
}
@ -352,7 +339,6 @@ static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(
refcount_set(&sctx->refs, 1);
sctx->is_dev_replace = is_dev_replace;
sctx->fs_info = fs_info;
INIT_LIST_HEAD(&sctx->csum_list);
for (i = 0; i < SCRUB_STRIPES_PER_SCTX; i++) {
int ret;
@ -479,11 +465,8 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device *
struct extent_buffer *eb;
struct btrfs_extent_item *ei;
struct scrub_warning swarn;
unsigned long ptr = 0;
u64 flags = 0;
u64 ref_root;
u32 item_size;
u8 ref_level = 0;
int ret;
/* Super block error, no need to search extent tree. */
@ -513,19 +496,28 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device *
item_size = btrfs_item_size(eb, path->slots[0]);
if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
do {
unsigned long ptr = 0;
u8 ref_level;
u64 ref_root;
while (true) {
ret = tree_backref_for_extent(&ptr, eb, &found_key, ei,
item_size, &ref_root,
&ref_level);
if (ret < 0) {
btrfs_warn(fs_info,
"failed to resolve tree backref for logical %llu: %d",
swarn.logical, ret);
break;
}
if (ret > 0)
break;
btrfs_warn_in_rcu(fs_info,
"%s at logical %llu on dev %s, physical %llu: metadata %s (level %d) in tree %llu",
errstr, swarn.logical,
btrfs_dev_name(dev),
swarn.physical,
ref_level ? "node" : "leaf",
ret < 0 ? -1 : ref_level,
ret < 0 ? -1 : ref_root);
} while (ret != 1);
errstr, swarn.logical, btrfs_dev_name(dev),
swarn.physical, (ref_level ? "node" : "leaf"),
ref_level, ref_root);
}
btrfs_release_path(path);
} else {
struct btrfs_backref_walk_ctx ctx = { 0 };
@ -546,48 +538,6 @@ static void scrub_print_common_warning(const char *errstr, struct btrfs_device *
btrfs_free_path(path);
}
static inline int scrub_nr_raid_mirrors(struct btrfs_io_context *bioc)
{
if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID5)
return 2;
else if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID6)
return 3;
else
return (int)bioc->num_stripes;
}
static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type,
u64 full_stripe_logical,
int nstripes, int mirror,
int *stripe_index,
u64 *stripe_offset)
{
int i;
if (map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
const int nr_data_stripes = (map_type & BTRFS_BLOCK_GROUP_RAID5) ?
nstripes - 1 : nstripes - 2;
/* RAID5/6 */
for (i = 0; i < nr_data_stripes; i++) {
const u64 data_stripe_start = full_stripe_logical +
(i * BTRFS_STRIPE_LEN);
if (logical >= data_stripe_start &&
logical < data_stripe_start + BTRFS_STRIPE_LEN)
break;
}
*stripe_index = i;
*stripe_offset = (logical - full_stripe_logical) &
BTRFS_STRIPE_LEN_MASK;
} else {
/* The other RAID type */
*stripe_index = mirror;
*stripe_offset = 0;
}
}
static int fill_writer_pointer_gap(struct scrub_ctx *sctx, u64 physical)
{
int ret = 0;
@ -924,8 +874,9 @@ static void scrub_stripe_report_errors(struct scrub_ctx *sctx,
/* For scrub, our mirror_num should always start at 1. */
ASSERT(stripe->mirror_num >= 1);
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
stripe->logical, &mapped_len, &bioc);
ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
stripe->logical, &mapped_len, &bioc,
NULL, NULL, 1);
/*
* If we failed, dev will be NULL, and later detailed reports
* will just be skipped.
@ -1957,8 +1908,8 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
bio->bi_end_io = raid56_scrub_wait_endio;
btrfs_bio_counter_inc_blocked(fs_info);
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, full_stripe_start,
&length, &bioc);
ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, full_stripe_start,
&length, &bioc, NULL, NULL, 1);
if (ret < 0) {
btrfs_put_bioc(bioc);
btrfs_bio_counter_dec(fs_info);
@ -1972,6 +1923,13 @@ static int scrub_raid56_parity_stripe(struct scrub_ctx *sctx,
btrfs_bio_counter_dec(fs_info);
goto out;
}
/* Use the recovered stripes as cache to avoid read them from disk again. */
for (int i = 0; i < data_stripes; i++) {
stripe = &sctx->raid56_data_stripes[i];
raid56_parity_cache_data_pages(rbio, stripe->pages,
full_stripe_start + (i << BTRFS_STRIPE_LEN_SHIFT));
}
raid56_parity_submit_scrub_rbio(rbio);
wait_for_completion_io(&io_done);
ret = blk_status_to_errno(bio->bi_status);
@ -2740,17 +2698,12 @@ static void scrub_workers_put(struct btrfs_fs_info *fs_info)
if (refcount_dec_and_mutex_lock(&fs_info->scrub_workers_refcnt,
&fs_info->scrub_lock)) {
struct workqueue_struct *scrub_workers = fs_info->scrub_workers;
struct workqueue_struct *scrub_wr_comp =
fs_info->scrub_wr_completion_workers;
fs_info->scrub_workers = NULL;
fs_info->scrub_wr_completion_workers = NULL;
mutex_unlock(&fs_info->scrub_lock);
if (scrub_workers)
destroy_workqueue(scrub_workers);
if (scrub_wr_comp)
destroy_workqueue(scrub_wr_comp);
}
}
@ -2761,7 +2714,6 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
int is_dev_replace)
{
struct workqueue_struct *scrub_workers = NULL;
struct workqueue_struct *scrub_wr_comp = NULL;
unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND;
int max_active = fs_info->thread_pool_size;
int ret = -ENOMEM;
@ -2769,21 +2721,17 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
if (refcount_inc_not_zero(&fs_info->scrub_workers_refcnt))
return 0;
scrub_workers = alloc_workqueue("btrfs-scrub", flags,
is_dev_replace ? 1 : max_active);
if (is_dev_replace)
scrub_workers = alloc_ordered_workqueue("btrfs-scrub", flags);
else
scrub_workers = alloc_workqueue("btrfs-scrub", flags, max_active);
if (!scrub_workers)
goto fail_scrub_workers;
scrub_wr_comp = alloc_workqueue("btrfs-scrubwrc", flags, max_active);
if (!scrub_wr_comp)
goto fail_scrub_wr_completion_workers;
return -ENOMEM;
mutex_lock(&fs_info->scrub_lock);
if (refcount_read(&fs_info->scrub_workers_refcnt) == 0) {
ASSERT(fs_info->scrub_workers == NULL &&
fs_info->scrub_wr_completion_workers == NULL);
ASSERT(fs_info->scrub_workers == NULL);
fs_info->scrub_workers = scrub_workers;
fs_info->scrub_wr_completion_workers = scrub_wr_comp;
refcount_set(&fs_info->scrub_workers_refcnt, 1);
mutex_unlock(&fs_info->scrub_lock);
return 0;
@ -2794,10 +2742,7 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info,
ret = 0;
destroy_workqueue(scrub_wr_comp);
fail_scrub_wr_completion_workers:
destroy_workqueue(scrub_workers);
fail_scrub_workers:
return ret;
}

View File

@ -1774,9 +1774,21 @@ static int read_symlink(struct btrfs_root *root,
ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
struct btrfs_file_extent_item);
type = btrfs_file_extent_type(path->nodes[0], ei);
if (unlikely(type != BTRFS_FILE_EXTENT_INLINE)) {
ret = -EUCLEAN;
btrfs_crit(root->fs_info,
"send: found symlink extent that is not inline, ino %llu root %llu extent type %d",
ino, btrfs_root_id(root), type);
goto out;
}
compression = btrfs_file_extent_compression(path->nodes[0], ei);
BUG_ON(type != BTRFS_FILE_EXTENT_INLINE);
BUG_ON(compression);
if (unlikely(compression != BTRFS_COMPRESS_NONE)) {
ret = -EUCLEAN;
btrfs_crit(root->fs_info,
"send: found symlink extent with compression, ino %llu root %llu compression type %d",
ino, btrfs_root_id(root), compression);
goto out;
}
off = btrfs_file_extent_inline_start(ei);
len = btrfs_file_extent_ram_bytes(path->nodes[0], ei);

View File

@ -100,9 +100,6 @@ void btrfs_init_subpage_info(struct btrfs_subpage_info *subpage_info, u32 sector
subpage_info->uptodate_offset = cur;
cur += nr_bits;
subpage_info->error_offset = cur;
cur += nr_bits;
subpage_info->dirty_offset = cur;
cur += nr_bits;
@ -367,28 +364,6 @@ void btrfs_page_end_writer_lock(const struct btrfs_fs_info *fs_info,
unlock_page(page);
}
static bool bitmap_test_range_all_set(unsigned long *addr, unsigned int start,
unsigned int nbits)
{
unsigned int found_zero;
found_zero = find_next_zero_bit(addr, start + nbits, start);
if (found_zero == start + nbits)
return true;
return false;
}
static bool bitmap_test_range_all_zero(unsigned long *addr, unsigned int start,
unsigned int nbits)
{
unsigned int found_set;
found_set = find_next_bit(addr, start + nbits, start);
if (found_set == start + nbits)
return true;
return false;
}
#define subpage_calc_start_bit(fs_info, page, name, start, len) \
({ \
unsigned int start_bit; \
@ -438,35 +413,6 @@ void btrfs_subpage_clear_uptodate(const struct btrfs_fs_info *fs_info,
spin_unlock_irqrestore(&subpage->lock, flags);
}
void btrfs_subpage_set_error(const struct btrfs_fs_info *fs_info,
struct page *page, u64 start, u32 len)
{
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
error, start, len);
unsigned long flags;
spin_lock_irqsave(&subpage->lock, flags);
bitmap_set(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
SetPageError(page);
spin_unlock_irqrestore(&subpage->lock, flags);
}
void btrfs_subpage_clear_error(const struct btrfs_fs_info *fs_info,
struct page *page, u64 start, u32 len)
{
struct btrfs_subpage *subpage = (struct btrfs_subpage *)page->private;
unsigned int start_bit = subpage_calc_start_bit(fs_info, page,
error, start, len);
unsigned long flags;
spin_lock_irqsave(&subpage->lock, flags);
bitmap_clear(subpage->bitmaps, start_bit, len >> fs_info->sectorsize_bits);
if (subpage_test_bitmap_all_zero(fs_info, subpage, error))
ClearPageError(page);
spin_unlock_irqrestore(&subpage->lock, flags);
}
void btrfs_subpage_set_dirty(const struct btrfs_fs_info *fs_info,
struct page *page, u64 start, u32 len)
{
@ -628,7 +574,6 @@ bool btrfs_subpage_test_##name(const struct btrfs_fs_info *fs_info, \
return ret; \
}
IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(uptodate);
IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(error);
IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(dirty);
IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(writeback);
IMPLEMENT_BTRFS_SUBPAGE_TEST_OP(ordered);
@ -696,7 +641,6 @@ bool btrfs_page_clamp_test_##name(const struct btrfs_fs_info *fs_info, \
}
IMPLEMENT_BTRFS_PAGE_OPS(uptodate, SetPageUptodate, ClearPageUptodate,
PageUptodate);
IMPLEMENT_BTRFS_PAGE_OPS(error, SetPageError, ClearPageError, PageError);
IMPLEMENT_BTRFS_PAGE_OPS(dirty, set_page_dirty, clear_page_dirty_for_io,
PageDirty);
IMPLEMENT_BTRFS_PAGE_OPS(writeback, set_page_writeback, end_page_writeback,
@ -767,3 +711,44 @@ void btrfs_page_unlock_writer(struct btrfs_fs_info *fs_info, struct page *page,
/* Have writers, use proper subpage helper to end it */
btrfs_page_end_writer_lock(fs_info, page, start, len);
}
#define GET_SUBPAGE_BITMAP(subpage, subpage_info, name, dst) \
bitmap_cut(dst, subpage->bitmaps, 0, \
subpage_info->name##_offset, subpage_info->bitmap_nr_bits)
void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info,
struct page *page, u64 start, u32 len)
{
struct btrfs_subpage_info *subpage_info = fs_info->subpage_info;
struct btrfs_subpage *subpage;
unsigned long uptodate_bitmap;
unsigned long error_bitmap;
unsigned long dirty_bitmap;
unsigned long writeback_bitmap;
unsigned long ordered_bitmap;
unsigned long checked_bitmap;
unsigned long flags;
ASSERT(PagePrivate(page) && page->private);
ASSERT(subpage_info);
subpage = (struct btrfs_subpage *)page->private;
spin_lock_irqsave(&subpage->lock, flags);
GET_SUBPAGE_BITMAP(subpage, subpage_info, uptodate, &uptodate_bitmap);
GET_SUBPAGE_BITMAP(subpage, subpage_info, dirty, &dirty_bitmap);
GET_SUBPAGE_BITMAP(subpage, subpage_info, writeback, &writeback_bitmap);
GET_SUBPAGE_BITMAP(subpage, subpage_info, ordered, &ordered_bitmap);
GET_SUBPAGE_BITMAP(subpage, subpage_info, checked, &checked_bitmap);
spin_unlock_irqrestore(&subpage->lock, flags);
dump_page(page, "btrfs subpage dump");
btrfs_warn(fs_info,
"start=%llu len=%u page=%llu, bitmaps uptodate=%*pbl error=%*pbl dirty=%*pbl writeback=%*pbl ordered=%*pbl checked=%*pbl",
start, len, page_offset(page),
subpage_info->bitmap_nr_bits, &uptodate_bitmap,
subpage_info->bitmap_nr_bits, &error_bitmap,
subpage_info->bitmap_nr_bits, &dirty_bitmap,
subpage_info->bitmap_nr_bits, &writeback_bitmap,
subpage_info->bitmap_nr_bits, &ordered_bitmap,
subpage_info->bitmap_nr_bits, &checked_bitmap);
}

View File

@ -8,17 +8,17 @@
/*
* Extra info for subpapge bitmap.
*
* For subpage we pack all uptodate/error/dirty/writeback/ordered bitmaps into
* For subpage we pack all uptodate/dirty/writeback/ordered bitmaps into
* one larger bitmap.
*
* This structure records how they are organized in the bitmap:
*
* /- uptodate_offset /- error_offset /- dirty_offset
* /- uptodate_offset /- dirty_offset /- ordered_offset
* | | |
* v v v
* |u|u|u|u|........|u|u|e|e|.......|e|e| ... |o|o|
* |u|u|u|u|........|u|u|d|d|.......|d|d|o|o|.......|o|o|
* |<- bitmap_nr_bits ->|
* |<--------------- total_nr_bits ---------------->|
* |<----------------- total_nr_bits ------------------>|
*/
struct btrfs_subpage_info {
/* Number of bits for each bitmap */
@ -32,7 +32,6 @@ struct btrfs_subpage_info {
* @bitmap_size, which is calculated from PAGE_SIZE / sectorsize.
*/
unsigned int uptodate_offset;
unsigned int error_offset;
unsigned int dirty_offset;
unsigned int writeback_offset;
unsigned int ordered_offset;
@ -141,7 +140,6 @@ bool btrfs_page_clamp_test_##name(const struct btrfs_fs_info *fs_info, \
struct page *page, u64 start, u32 len);
DECLARE_BTRFS_SUBPAGE_OPS(uptodate);
DECLARE_BTRFS_SUBPAGE_OPS(error);
DECLARE_BTRFS_SUBPAGE_OPS(dirty);
DECLARE_BTRFS_SUBPAGE_OPS(writeback);
DECLARE_BTRFS_SUBPAGE_OPS(ordered);
@ -154,5 +152,7 @@ void btrfs_page_assert_not_dirty(const struct btrfs_fs_info *fs_info,
struct page *page);
void btrfs_page_unlock_writer(struct btrfs_fs_info *fs_info, struct page *page,
u64 start, u32 len);
void __cold btrfs_subpage_dump_bitmap(const struct btrfs_fs_info *fs_info,
struct page *page, u64 start, u32 len);
#endif

View File

@ -1631,7 +1631,6 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info,
old_pool_size, new_pool_size);
btrfs_workqueue_set_max(fs_info->workers, new_pool_size);
btrfs_workqueue_set_max(fs_info->hipri_workers, new_pool_size);
btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size);
btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size);
workqueue_set_max_active(fs_info->endio_workers, new_pool_size);

View File

@ -159,7 +159,7 @@ static int test_find_delalloc(u32 sectorsize)
* |--- delalloc ---|
* |--- search ---|
*/
set_extent_delalloc(tmp, 0, sectorsize - 1, 0, NULL);
set_extent_bit(tmp, 0, sectorsize - 1, EXTENT_DELALLOC, NULL);
start = 0;
end = start + PAGE_SIZE - 1;
found = find_lock_delalloc_range(inode, locked_page, &start,
@ -190,7 +190,7 @@ static int test_find_delalloc(u32 sectorsize)
test_err("couldn't find the locked page");
goto out_bits;
}
set_extent_delalloc(tmp, sectorsize, max_bytes - 1, 0, NULL);
set_extent_bit(tmp, sectorsize, max_bytes - 1, EXTENT_DELALLOC, NULL);
start = test_start;
end = start + PAGE_SIZE - 1;
found = find_lock_delalloc_range(inode, locked_page, &start,
@ -245,7 +245,7 @@ static int test_find_delalloc(u32 sectorsize)
*
* We are re-using our test_start from above since it works out well.
*/
set_extent_delalloc(tmp, max_bytes, total_dirty - 1, 0, NULL);
set_extent_bit(tmp, max_bytes, total_dirty - 1, EXTENT_DELALLOC, NULL);
start = test_start;
end = start + PAGE_SIZE - 1;
found = find_lock_delalloc_range(inode, locked_page, &start,
@ -503,8 +503,8 @@ static int test_find_first_clear_extent_bit(void)
* Set 1M-4M alloc/discard and 32M-64M thus leaving a hole between
* 4M-32M
*/
set_extent_bits(&tree, SZ_1M, SZ_4M - 1,
CHUNK_TRIMMED | CHUNK_ALLOCATED);
set_extent_bit(&tree, SZ_1M, SZ_4M - 1,
CHUNK_TRIMMED | CHUNK_ALLOCATED, NULL);
find_first_clear_extent_bit(&tree, SZ_512K, &start, &end,
CHUNK_TRIMMED | CHUNK_ALLOCATED);
@ -516,8 +516,8 @@ static int test_find_first_clear_extent_bit(void)
}
/* Now add 32M-64M so that we have a hole between 4M-32M */
set_extent_bits(&tree, SZ_32M, SZ_64M - 1,
CHUNK_TRIMMED | CHUNK_ALLOCATED);
set_extent_bit(&tree, SZ_32M, SZ_64M - 1,
CHUNK_TRIMMED | CHUNK_ALLOCATED, NULL);
/*
* Request first hole starting at 12M, we should get 4M-32M
@ -548,7 +548,7 @@ static int test_find_first_clear_extent_bit(void)
* Set 64M-72M with CHUNK_ALLOC flag, then search for CHUNK_TRIMMED flag
* being unset in this range, we should get the entry in range 64M-72M
*/
set_extent_bits(&tree, SZ_64M, SZ_64M + SZ_8M - 1, CHUNK_ALLOCATED);
set_extent_bit(&tree, SZ_64M, SZ_64M + SZ_8M - 1, CHUNK_ALLOCATED, NULL);
find_first_clear_extent_bit(&tree, SZ_64M + SZ_1M, &start, &end,
CHUNK_TRIMMED);

View File

@ -374,8 +374,6 @@ static noinline int join_transaction(struct btrfs_fs_info *fs_info,
spin_lock_init(&cur_trans->dirty_bgs_lock);
INIT_LIST_HEAD(&cur_trans->deleted_bgs);
spin_lock_init(&cur_trans->dropped_roots_lock);
INIT_LIST_HEAD(&cur_trans->releasing_ebs);
spin_lock_init(&cur_trans->releasing_ebs_lock);
list_add_tail(&cur_trans->list, &fs_info->trans_list);
extent_io_tree_init(fs_info, &cur_trans->dirty_pages,
IO_TREE_TRANS_DIRTY_PAGES);
@ -1056,7 +1054,6 @@ int btrfs_write_marked_extents(struct btrfs_fs_info *fs_info,
u64 start = 0;
u64 end;
atomic_inc(&BTRFS_I(fs_info->btree_inode)->sync_writers);
while (!find_first_extent_bit(dirty_pages, start, &start, &end,
mark, &cached_state)) {
bool wait_writeback = false;
@ -1092,7 +1089,6 @@ int btrfs_write_marked_extents(struct btrfs_fs_info *fs_info,
cond_resched();
start = end + 1;
}
atomic_dec(&BTRFS_I(fs_info->btree_inode)->sync_writers);
return werr;
}
@ -1688,7 +1684,10 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
* insert the directory item
*/
ret = btrfs_set_inode_index(BTRFS_I(parent_inode), &index);
BUG_ON(ret); /* -ENOMEM */
if (ret) {
btrfs_abort_transaction(trans, ret);
goto fail;
}
/* check if there is a file/dir which has the same name. */
dir_item = btrfs_lookup_dir_item(NULL, parent_root, path,
@ -2484,13 +2483,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
goto scrub_continue;
}
/*
* At this point, we should have written all the tree blocks allocated
* in this transaction. So it's now safe to free the redirtyied extent
* buffers.
*/
btrfs_free_redirty_list(cur_trans);
ret = write_all_supers(fs_info, 0);
/*
* the super is written, we can safely allow the tree-loggers

View File

@ -94,9 +94,6 @@ struct btrfs_transaction {
*/
atomic_t pending_ordered;
wait_queue_head_t pending_wait;
spinlock_t releasing_ebs_lock;
struct list_head releasing_ebs;
};
enum {

View File

@ -25,10 +25,10 @@
#include "compression.h"
#include "volumes.h"
#include "misc.h"
#include "btrfs_inode.h"
#include "fs.h"
#include "accessors.h"
#include "file-item.h"
#include "inode-item.h"
/*
* Error message should follow the following format:
@ -1620,9 +1620,10 @@ static int check_inode_ref(struct extent_buffer *leaf,
/*
* Common point to switch the item-specific validation.
*/
static int check_leaf_item(struct extent_buffer *leaf,
struct btrfs_key *key, int slot,
struct btrfs_key *prev_key)
static enum btrfs_tree_block_status check_leaf_item(struct extent_buffer *leaf,
struct btrfs_key *key,
int slot,
struct btrfs_key *prev_key)
{
int ret = 0;
struct btrfs_chunk *chunk;
@ -1671,10 +1672,13 @@ static int check_leaf_item(struct extent_buffer *leaf,
ret = check_extent_data_ref(leaf, key, slot);
break;
}
return ret;
if (ret)
return BTRFS_TREE_BLOCK_INVALID_ITEM;
return BTRFS_TREE_BLOCK_CLEAN;
}
static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
enum btrfs_tree_block_status __btrfs_check_leaf(struct extent_buffer *leaf)
{
struct btrfs_fs_info *fs_info = leaf->fs_info;
/* No valid key type is 0, so all key should be larger than this key */
@ -1687,7 +1691,7 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
generic_err(leaf, 0,
"invalid level for leaf, have %d expect 0",
btrfs_header_level(leaf));
return -EUCLEAN;
return BTRFS_TREE_BLOCK_INVALID_LEVEL;
}
/*
@ -1710,32 +1714,32 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
generic_err(leaf, 0,
"invalid root, root %llu must never be empty",
owner);
return -EUCLEAN;
return BTRFS_TREE_BLOCK_INVALID_NRITEMS;
}
/* Unknown tree */
if (unlikely(owner == 0)) {
generic_err(leaf, 0,
"invalid owner, root 0 is not defined");
return -EUCLEAN;
return BTRFS_TREE_BLOCK_INVALID_OWNER;
}
/* EXTENT_TREE_V2 can have empty extent trees. */
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2))
return 0;
return BTRFS_TREE_BLOCK_CLEAN;
if (unlikely(owner == BTRFS_EXTENT_TREE_OBJECTID)) {
generic_err(leaf, 0,
"invalid root, root %llu must never be empty",
owner);
return -EUCLEAN;
return BTRFS_TREE_BLOCK_INVALID_NRITEMS;
}
return 0;
return BTRFS_TREE_BLOCK_CLEAN;
}
if (unlikely(nritems == 0))
return 0;
return BTRFS_TREE_BLOCK_CLEAN;
/*
* Check the following things to make sure this is a good leaf, and
@ -1751,7 +1755,6 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
for (slot = 0; slot < nritems; slot++) {
u32 item_end_expected;
u64 item_data_end;
int ret;
btrfs_item_key_to_cpu(leaf, &key, slot);
@ -1762,7 +1765,7 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
prev_key.objectid, prev_key.type,
prev_key.offset, key.objectid, key.type,
key.offset);
return -EUCLEAN;
return BTRFS_TREE_BLOCK_BAD_KEY_ORDER;
}
item_data_end = (u64)btrfs_item_offset(leaf, slot) +
@ -1781,7 +1784,7 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
generic_err(leaf, slot,
"unexpected item end, have %llu expect %u",
item_data_end, item_end_expected);
return -EUCLEAN;
return BTRFS_TREE_BLOCK_INVALID_OFFSETS;
}
/*
@ -1793,7 +1796,7 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
generic_err(leaf, slot,
"slot end outside of leaf, have %llu expect range [0, %u]",
item_data_end, BTRFS_LEAF_DATA_SIZE(fs_info));
return -EUCLEAN;
return BTRFS_TREE_BLOCK_INVALID_OFFSETS;
}
/* Also check if the item pointer overlaps with btrfs item. */
@ -1804,16 +1807,22 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
btrfs_item_nr_offset(leaf, slot) +
sizeof(struct btrfs_item),
btrfs_item_ptr_offset(leaf, slot));
return -EUCLEAN;
return BTRFS_TREE_BLOCK_INVALID_OFFSETS;
}
if (check_item_data) {
/*
* We only want to do this if WRITTEN is set, otherwise the leaf
* may be in some intermediate state and won't appear valid.
*/
if (btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_WRITTEN)) {
enum btrfs_tree_block_status ret;
/*
* Check if the item size and content meet other
* criteria
*/
ret = check_leaf_item(leaf, &key, slot, &prev_key);
if (unlikely(ret < 0))
if (unlikely(ret != BTRFS_TREE_BLOCK_CLEAN))
return ret;
}
@ -1822,21 +1831,21 @@ static int check_leaf(struct extent_buffer *leaf, bool check_item_data)
prev_key.offset = key.offset;
}
return BTRFS_TREE_BLOCK_CLEAN;
}
int btrfs_check_leaf(struct extent_buffer *leaf)
{
enum btrfs_tree_block_status ret;
ret = __btrfs_check_leaf(leaf);
if (unlikely(ret != BTRFS_TREE_BLOCK_CLEAN))
return -EUCLEAN;
return 0;
}
ALLOW_ERROR_INJECTION(btrfs_check_leaf, ERRNO);
int btrfs_check_leaf_full(struct extent_buffer *leaf)
{
return check_leaf(leaf, true);
}
ALLOW_ERROR_INJECTION(btrfs_check_leaf_full, ERRNO);
int btrfs_check_leaf_relaxed(struct extent_buffer *leaf)
{
return check_leaf(leaf, false);
}
int btrfs_check_node(struct extent_buffer *node)
enum btrfs_tree_block_status __btrfs_check_node(struct extent_buffer *node)
{
struct btrfs_fs_info *fs_info = node->fs_info;
unsigned long nr = btrfs_header_nritems(node);
@ -1844,13 +1853,12 @@ int btrfs_check_node(struct extent_buffer *node)
int slot;
int level = btrfs_header_level(node);
u64 bytenr;
int ret = 0;
if (unlikely(level <= 0 || level >= BTRFS_MAX_LEVEL)) {
generic_err(node, 0,
"invalid level for node, have %d expect [1, %d]",
level, BTRFS_MAX_LEVEL - 1);
return -EUCLEAN;
return BTRFS_TREE_BLOCK_INVALID_LEVEL;
}
if (unlikely(nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(fs_info))) {
btrfs_crit(fs_info,
@ -1858,7 +1866,7 @@ int btrfs_check_node(struct extent_buffer *node)
btrfs_header_owner(node), node->start,
nr == 0 ? "small" : "large", nr,
BTRFS_NODEPTRS_PER_BLOCK(fs_info));
return -EUCLEAN;
return BTRFS_TREE_BLOCK_INVALID_NRITEMS;
}
for (slot = 0; slot < nr - 1; slot++) {
@ -1869,15 +1877,13 @@ int btrfs_check_node(struct extent_buffer *node)
if (unlikely(!bytenr)) {
generic_err(node, slot,
"invalid NULL node pointer");
ret = -EUCLEAN;
goto out;
return BTRFS_TREE_BLOCK_INVALID_BLOCKPTR;
}
if (unlikely(!IS_ALIGNED(bytenr, fs_info->sectorsize))) {
generic_err(node, slot,
"unaligned pointer, have %llu should be aligned to %u",
bytenr, fs_info->sectorsize);
ret = -EUCLEAN;
goto out;
return BTRFS_TREE_BLOCK_INVALID_BLOCKPTR;
}
if (unlikely(btrfs_comp_cpu_keys(&key, &next_key) >= 0)) {
@ -1886,12 +1892,20 @@ int btrfs_check_node(struct extent_buffer *node)
key.objectid, key.type, key.offset,
next_key.objectid, next_key.type,
next_key.offset);
ret = -EUCLEAN;
goto out;
return BTRFS_TREE_BLOCK_BAD_KEY_ORDER;
}
}
out:
return ret;
return BTRFS_TREE_BLOCK_CLEAN;
}
int btrfs_check_node(struct extent_buffer *node)
{
enum btrfs_tree_block_status ret;
ret = __btrfs_check_node(node);
if (unlikely(ret != BTRFS_TREE_BLOCK_CLEAN))
return -EUCLEAN;
return 0;
}
ALLOW_ERROR_INJECTION(btrfs_check_node, ERRNO);
@ -1949,3 +1963,61 @@ int btrfs_check_eb_owner(const struct extent_buffer *eb, u64 root_owner)
}
return 0;
}
int btrfs_verify_level_key(struct extent_buffer *eb, int level,
struct btrfs_key *first_key, u64 parent_transid)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
int found_level;
struct btrfs_key found_key;
int ret;
found_level = btrfs_header_level(eb);
if (found_level != level) {
WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
KERN_ERR "BTRFS: tree level check failed\n");
btrfs_err(fs_info,
"tree level mismatch detected, bytenr=%llu level expected=%u has=%u",
eb->start, level, found_level);
return -EIO;
}
if (!first_key)
return 0;
/*
* For live tree block (new tree blocks in current transaction),
* we need proper lock context to avoid race, which is impossible here.
* So we only checks tree blocks which is read from disk, whose
* generation <= fs_info->last_trans_committed.
*/
if (btrfs_header_generation(eb) > fs_info->last_trans_committed)
return 0;
/* We have @first_key, so this @eb must have at least one item */
if (btrfs_header_nritems(eb) == 0) {
btrfs_err(fs_info,
"invalid tree nritems, bytenr=%llu nritems=0 expect >0",
eb->start);
WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG));
return -EUCLEAN;
}
if (found_level)
btrfs_node_key_to_cpu(eb, &found_key, 0);
else
btrfs_item_key_to_cpu(eb, &found_key, 0);
ret = btrfs_comp_cpu_keys(first_key, &found_key);
if (ret) {
WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
KERN_ERR "BTRFS: tree first key check failed\n");
btrfs_err(fs_info,
"tree first key mismatch detected, bytenr=%llu parent_transid=%llu key expected=(%llu,%u,%llu) has=(%llu,%u,%llu)",
eb->start, parent_transid, first_key->objectid,
first_key->type, first_key->offset,
found_key.objectid, found_key.type,
found_key.offset);
}
return ret;
}

View File

@ -40,22 +40,33 @@ struct btrfs_tree_parent_check {
u8 level;
};
/*
* Comprehensive leaf checker.
* Will check not only the item pointers, but also every possible member
* in item data.
*/
int btrfs_check_leaf_full(struct extent_buffer *leaf);
enum btrfs_tree_block_status {
BTRFS_TREE_BLOCK_CLEAN,
BTRFS_TREE_BLOCK_INVALID_NRITEMS,
BTRFS_TREE_BLOCK_INVALID_PARENT_KEY,
BTRFS_TREE_BLOCK_BAD_KEY_ORDER,
BTRFS_TREE_BLOCK_INVALID_LEVEL,
BTRFS_TREE_BLOCK_INVALID_FREE_SPACE,
BTRFS_TREE_BLOCK_INVALID_OFFSETS,
BTRFS_TREE_BLOCK_INVALID_BLOCKPTR,
BTRFS_TREE_BLOCK_INVALID_ITEM,
BTRFS_TREE_BLOCK_INVALID_OWNER,
};
/*
* Less strict leaf checker.
* Will only check item pointers, not reading item data.
* Exported simply for btrfs-progs which wants to have the
* btrfs_tree_block_status return codes.
*/
int btrfs_check_leaf_relaxed(struct extent_buffer *leaf);
enum btrfs_tree_block_status __btrfs_check_leaf(struct extent_buffer *leaf);
enum btrfs_tree_block_status __btrfs_check_node(struct extent_buffer *node);
int btrfs_check_leaf(struct extent_buffer *leaf);
int btrfs_check_node(struct extent_buffer *node);
int btrfs_check_chunk_valid(struct extent_buffer *leaf,
struct btrfs_chunk *chunk, u64 logical);
int btrfs_check_eb_owner(const struct extent_buffer *eb, u64 root_owner);
int btrfs_verify_level_key(struct extent_buffer *eb, int level,
struct btrfs_key *first_key, u64 parent_transid);
#endif

View File

@ -859,10 +859,10 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
struct btrfs_ordered_sum,
list);
csum_root = btrfs_csum_root(fs_info,
sums->bytenr);
sums->logical);
if (!ret)
ret = btrfs_del_csums(trans, csum_root,
sums->bytenr,
sums->logical,
sums->len);
if (!ret)
ret = btrfs_csum_file_blocks(trans,
@ -3252,7 +3252,7 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
* Returns 1 if the inode was logged before in the transaction, 0 if it was not,
* and < 0 on error.
*/
static int inode_logged(struct btrfs_trans_handle *trans,
static int inode_logged(const struct btrfs_trans_handle *trans,
struct btrfs_inode *inode,
struct btrfs_path *path_in)
{
@ -4056,14 +4056,14 @@ static int drop_inode_items(struct btrfs_trans_handle *trans,
while (1) {
ret = btrfs_search_slot(trans, log, &key, path, -1, 1);
BUG_ON(ret == 0); /* Logic error */
if (ret < 0)
if (ret < 0) {
break;
} else if (ret > 0) {
if (path->slots[0] == 0)
break;
path->slots[0]--;
}
if (path->slots[0] == 0)
break;
path->slots[0]--;
btrfs_item_key_to_cpu(path->nodes[0], &found_key,
path->slots[0]);
@ -4221,7 +4221,7 @@ static int log_csums(struct btrfs_trans_handle *trans,
struct btrfs_root *log_root,
struct btrfs_ordered_sum *sums)
{
const u64 lock_end = sums->bytenr + sums->len - 1;
const u64 lock_end = sums->logical + sums->len - 1;
struct extent_state *cached_state = NULL;
int ret;
@ -4239,7 +4239,7 @@ static int log_csums(struct btrfs_trans_handle *trans,
* file which happens to refer to the same extent as well. Such races
* can leave checksum items in the log with overlapping ranges.
*/
ret = lock_extent(&log_root->log_csum_range, sums->bytenr, lock_end,
ret = lock_extent(&log_root->log_csum_range, sums->logical, lock_end,
&cached_state);
if (ret)
return ret;
@ -4252,11 +4252,11 @@ static int log_csums(struct btrfs_trans_handle *trans,
* some checksums missing in the fs/subvolume tree. So just delete (or
* trim and adjust) any existing csum items in the log for this range.
*/
ret = btrfs_del_csums(trans, log_root, sums->bytenr, sums->len);
ret = btrfs_del_csums(trans, log_root, sums->logical, sums->len);
if (!ret)
ret = btrfs_csum_file_blocks(trans, log_root, sums);
unlock_extent(&log_root->log_csum_range, sums->bytenr, lock_end,
unlock_extent(&log_root->log_csum_range, sums->logical, lock_end,
&cached_state);
return ret;
@ -5303,7 +5303,7 @@ static int btrfs_check_ref_name_override(struct extent_buffer *eb,
* multiple times when multiple tasks have joined the same log transaction.
*/
static bool need_log_inode(const struct btrfs_trans_handle *trans,
const struct btrfs_inode *inode)
struct btrfs_inode *inode)
{
/*
* If a directory was not modified, no dentries added or removed, we can
@ -5321,7 +5321,7 @@ static bool need_log_inode(const struct btrfs_trans_handle *trans,
* logged_trans will be 0, in which case we have to fully log it since
* logged_trans is a transient field, not persisted.
*/
if (inode->logged_trans == trans->transid &&
if (inode_logged(trans, inode, NULL) == 1 &&
!test_bit(BTRFS_INODE_COPY_EVERYTHING, &inode->runtime_flags))
return false;
@ -7309,7 +7309,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree)
*/
void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
struct btrfs_inode *dir, struct btrfs_inode *inode,
int for_rename)
bool for_rename)
{
/*
* when we're logging a file, if it hasn't been renamed
@ -7325,18 +7325,25 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
inode->last_unlink_trans = trans->transid;
mutex_unlock(&inode->log_mutex);
/*
* if this directory was already logged any new
* names for this file/dir will get recorded
*/
if (dir->logged_trans == trans->transid)
if (!for_rename)
return;
/*
* if the inode we're about to unlink was logged,
* the log will be properly updated for any new names
* If this directory was already logged, any new names will be logged
* with btrfs_log_new_name() and old names will be deleted from the log
* tree with btrfs_del_dir_entries_in_log() or with
* btrfs_del_inode_ref_in_log().
*/
if (inode->logged_trans == trans->transid)
if (inode_logged(trans, dir, NULL) == 1)
return;
/*
* If the inode we're about to unlink was logged before, the log will be
* properly updated with the new name with btrfs_log_new_name() and the
* old name removed with btrfs_del_dir_entries_in_log() or with
* btrfs_del_inode_ref_in_log().
*/
if (inode_logged(trans, inode, NULL) == 1)
return;
/*
@ -7346,13 +7353,6 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
* properly. So, we have to be conservative and force commits
* so the new name gets discovered.
*/
if (for_rename)
goto record;
/* we can safely do the unlink without any special recording */
return;
record:
mutex_lock(&dir->log_mutex);
dir->last_unlink_trans = trans->transid;
mutex_unlock(&dir->log_mutex);

View File

@ -100,7 +100,7 @@ void btrfs_end_log_trans(struct btrfs_root *root);
void btrfs_pin_log_trans(struct btrfs_root *root);
void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
struct btrfs_inode *dir, struct btrfs_inode *inode,
int for_rename);
bool for_rename);
void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
struct btrfs_inode *dir);
void btrfs_log_new_name(struct btrfs_trans_handle *trans,

View File

@ -226,21 +226,32 @@ int btrfs_tree_mod_log_insert_key(struct extent_buffer *eb, int slot,
enum btrfs_mod_log_op op)
{
struct tree_mod_elem *tm;
int ret;
int ret = 0;
if (!tree_mod_need_log(eb->fs_info, eb))
return 0;
tm = alloc_tree_mod_elem(eb, slot, op);
if (!tm)
return -ENOMEM;
ret = -ENOMEM;
if (tree_mod_dont_log(eb->fs_info, eb)) {
kfree(tm);
/*
* Don't error if we failed to allocate memory because we don't
* need to log.
*/
return 0;
} else if (ret != 0) {
/*
* We previously failed to allocate memory and we need to log,
* so we have to fail.
*/
goto out_unlock;
}
ret = tree_mod_log_insert(eb->fs_info, tm);
out_unlock:
write_unlock(&eb->fs_info->tree_mod_log_lock);
if (ret)
kfree(tm);
@ -248,6 +259,26 @@ int btrfs_tree_mod_log_insert_key(struct extent_buffer *eb, int slot,
return ret;
}
static struct tree_mod_elem *tree_mod_log_alloc_move(struct extent_buffer *eb,
int dst_slot, int src_slot,
int nr_items)
{
struct tree_mod_elem *tm;
tm = kzalloc(sizeof(*tm), GFP_NOFS);
if (!tm)
return ERR_PTR(-ENOMEM);
tm->logical = eb->start;
tm->slot = src_slot;
tm->move.dst_slot = dst_slot;
tm->move.nr_items = nr_items;
tm->op = BTRFS_MOD_LOG_MOVE_KEYS;
RB_CLEAR_NODE(&tm->node);
return tm;
}
int btrfs_tree_mod_log_insert_move(struct extent_buffer *eb,
int dst_slot, int src_slot,
int nr_items)
@ -262,34 +293,45 @@ int btrfs_tree_mod_log_insert_move(struct extent_buffer *eb,
return 0;
tm_list = kcalloc(nr_items, sizeof(struct tree_mod_elem *), GFP_NOFS);
if (!tm_list)
return -ENOMEM;
tm = kzalloc(sizeof(*tm), GFP_NOFS);
if (!tm) {
if (!tm_list) {
ret = -ENOMEM;
goto free_tms;
goto lock;
}
tm->logical = eb->start;
tm->slot = src_slot;
tm->move.dst_slot = dst_slot;
tm->move.nr_items = nr_items;
tm->op = BTRFS_MOD_LOG_MOVE_KEYS;
tm = tree_mod_log_alloc_move(eb, dst_slot, src_slot, nr_items);
if (IS_ERR(tm)) {
ret = PTR_ERR(tm);
tm = NULL;
goto lock;
}
for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
tm_list[i] = alloc_tree_mod_elem(eb, i + dst_slot,
BTRFS_MOD_LOG_KEY_REMOVE_WHILE_MOVING);
if (!tm_list[i]) {
ret = -ENOMEM;
goto free_tms;
goto lock;
}
}
if (tree_mod_dont_log(eb->fs_info, eb))
lock:
if (tree_mod_dont_log(eb->fs_info, eb)) {
/*
* Don't error if we failed to allocate memory because we don't
* need to log.
*/
ret = 0;
goto free_tms;
}
locked = true;
/*
* We previously failed to allocate memory and we need to log, so we
* have to fail.
*/
if (ret != 0)
goto free_tms;
/*
* When we override something during the move, we log these removals.
* This can only happen when we move towards the beginning of the
@ -310,10 +352,12 @@ int btrfs_tree_mod_log_insert_move(struct extent_buffer *eb,
return 0;
free_tms:
for (i = 0; i < nr_items; i++) {
if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
rb_erase(&tm_list[i]->node, &eb->fs_info->tree_mod_log);
kfree(tm_list[i]);
if (tm_list) {
for (i = 0; i < nr_items; i++) {
if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
rb_erase(&tm_list[i]->node, &eb->fs_info->tree_mod_log);
kfree(tm_list[i]);
}
}
if (locked)
write_unlock(&eb->fs_info->tree_mod_log_lock);
@ -363,14 +407,14 @@ int btrfs_tree_mod_log_insert_root(struct extent_buffer *old_root,
GFP_NOFS);
if (!tm_list) {
ret = -ENOMEM;
goto free_tms;
goto lock;
}
for (i = 0; i < nritems; i++) {
tm_list[i] = alloc_tree_mod_elem(old_root, i,
BTRFS_MOD_LOG_KEY_REMOVE_WHILE_FREEING);
if (!tm_list[i]) {
ret = -ENOMEM;
goto free_tms;
goto lock;
}
}
}
@ -378,7 +422,7 @@ int btrfs_tree_mod_log_insert_root(struct extent_buffer *old_root,
tm = kzalloc(sizeof(*tm), GFP_NOFS);
if (!tm) {
ret = -ENOMEM;
goto free_tms;
goto lock;
}
tm->logical = new_root->start;
@ -387,14 +431,28 @@ int btrfs_tree_mod_log_insert_root(struct extent_buffer *old_root,
tm->generation = btrfs_header_generation(old_root);
tm->op = BTRFS_MOD_LOG_ROOT_REPLACE;
if (tree_mod_dont_log(fs_info, NULL))
lock:
if (tree_mod_dont_log(fs_info, NULL)) {
/*
* Don't error if we failed to allocate memory because we don't
* need to log.
*/
ret = 0;
goto free_tms;
} else if (ret != 0) {
/*
* We previously failed to allocate memory and we need to log,
* so we have to fail.
*/
goto out_unlock;
}
if (tm_list)
ret = tree_mod_log_free_eb(fs_info, tm_list, nritems);
if (!ret)
ret = tree_mod_log_insert(fs_info, tm);
out_unlock:
write_unlock(&fs_info->tree_mod_log_lock);
if (ret)
goto free_tms;
@ -486,9 +544,14 @@ int btrfs_tree_mod_log_eb_copy(struct extent_buffer *dst,
struct btrfs_fs_info *fs_info = dst->fs_info;
int ret = 0;
struct tree_mod_elem **tm_list = NULL;
struct tree_mod_elem **tm_list_add, **tm_list_rem;
struct tree_mod_elem **tm_list_add = NULL;
struct tree_mod_elem **tm_list_rem = NULL;
int i;
bool locked = false;
struct tree_mod_elem *dst_move_tm = NULL;
struct tree_mod_elem *src_move_tm = NULL;
u32 dst_move_nr_items = btrfs_header_nritems(dst) - dst_offset;
u32 src_move_nr_items = btrfs_header_nritems(src) - (src_offset + nr_items);
if (!tree_mod_need_log(fs_info, NULL))
return 0;
@ -498,8 +561,30 @@ int btrfs_tree_mod_log_eb_copy(struct extent_buffer *dst,
tm_list = kcalloc(nr_items * 2, sizeof(struct tree_mod_elem *),
GFP_NOFS);
if (!tm_list)
return -ENOMEM;
if (!tm_list) {
ret = -ENOMEM;
goto lock;
}
if (dst_move_nr_items) {
dst_move_tm = tree_mod_log_alloc_move(dst, dst_offset + nr_items,
dst_offset, dst_move_nr_items);
if (IS_ERR(dst_move_tm)) {
ret = PTR_ERR(dst_move_tm);
dst_move_tm = NULL;
goto lock;
}
}
if (src_move_nr_items) {
src_move_tm = tree_mod_log_alloc_move(src, src_offset,
src_offset + nr_items,
src_move_nr_items);
if (IS_ERR(src_move_tm)) {
ret = PTR_ERR(src_move_tm);
src_move_tm = NULL;
goto lock;
}
}
tm_list_add = tm_list;
tm_list_rem = tm_list + nr_items;
@ -508,21 +593,40 @@ int btrfs_tree_mod_log_eb_copy(struct extent_buffer *dst,
BTRFS_MOD_LOG_KEY_REMOVE);
if (!tm_list_rem[i]) {
ret = -ENOMEM;
goto free_tms;
goto lock;
}
tm_list_add[i] = alloc_tree_mod_elem(dst, i + dst_offset,
BTRFS_MOD_LOG_KEY_ADD);
if (!tm_list_add[i]) {
ret = -ENOMEM;
goto free_tms;
goto lock;
}
}
if (tree_mod_dont_log(fs_info, NULL))
lock:
if (tree_mod_dont_log(fs_info, NULL)) {
/*
* Don't error if we failed to allocate memory because we don't
* need to log.
*/
ret = 0;
goto free_tms;
}
locked = true;
/*
* We previously failed to allocate memory and we need to log, so we
* have to fail.
*/
if (ret != 0)
goto free_tms;
if (dst_move_tm) {
ret = tree_mod_log_insert(fs_info, dst_move_tm);
if (ret)
goto free_tms;
}
for (i = 0; i < nr_items; i++) {
ret = tree_mod_log_insert(fs_info, tm_list_rem[i]);
if (ret)
@ -531,6 +635,11 @@ int btrfs_tree_mod_log_eb_copy(struct extent_buffer *dst,
if (ret)
goto free_tms;
}
if (src_move_tm) {
ret = tree_mod_log_insert(fs_info, src_move_tm);
if (ret)
goto free_tms;
}
write_unlock(&fs_info->tree_mod_log_lock);
kfree(tm_list);
@ -538,10 +647,18 @@ int btrfs_tree_mod_log_eb_copy(struct extent_buffer *dst,
return 0;
free_tms:
for (i = 0; i < nr_items * 2; i++) {
if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log);
kfree(tm_list[i]);
if (dst_move_tm && !RB_EMPTY_NODE(&dst_move_tm->node))
rb_erase(&dst_move_tm->node, &fs_info->tree_mod_log);
kfree(dst_move_tm);
if (src_move_tm && !RB_EMPTY_NODE(&src_move_tm->node))
rb_erase(&src_move_tm->node, &fs_info->tree_mod_log);
kfree(src_move_tm);
if (tm_list) {
for (i = 0; i < nr_items * 2; i++) {
if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log);
kfree(tm_list[i]);
}
}
if (locked)
write_unlock(&fs_info->tree_mod_log_lock);
@ -562,22 +679,38 @@ int btrfs_tree_mod_log_free_eb(struct extent_buffer *eb)
nritems = btrfs_header_nritems(eb);
tm_list = kcalloc(nritems, sizeof(struct tree_mod_elem *), GFP_NOFS);
if (!tm_list)
return -ENOMEM;
if (!tm_list) {
ret = -ENOMEM;
goto lock;
}
for (i = 0; i < nritems; i++) {
tm_list[i] = alloc_tree_mod_elem(eb, i,
BTRFS_MOD_LOG_KEY_REMOVE_WHILE_FREEING);
if (!tm_list[i]) {
ret = -ENOMEM;
goto free_tms;
goto lock;
}
}
if (tree_mod_dont_log(eb->fs_info, eb))
lock:
if (tree_mod_dont_log(eb->fs_info, eb)) {
/*
* Don't error if we failed to allocate memory because we don't
* need to log.
*/
ret = 0;
goto free_tms;
} else if (ret != 0) {
/*
* We previously failed to allocate memory and we need to log,
* so we have to fail.
*/
goto out_unlock;
}
ret = tree_mod_log_free_eb(eb->fs_info, tm_list, nritems);
out_unlock:
write_unlock(&eb->fs_info->tree_mod_log_lock);
if (ret)
goto free_tms;
@ -586,9 +719,11 @@ int btrfs_tree_mod_log_free_eb(struct extent_buffer *eb)
return 0;
free_tms:
for (i = 0; i < nritems; i++)
kfree(tm_list[i]);
kfree(tm_list);
if (tm_list) {
for (i = 0; i < nritems; i++)
kfree(tm_list[i]);
kfree(tm_list);
}
return ret;
}
@ -664,10 +799,27 @@ static void tree_mod_log_rewind(struct btrfs_fs_info *fs_info,
unsigned long o_dst;
unsigned long o_src;
unsigned long p_size = sizeof(struct btrfs_key_ptr);
/*
* max_slot tracks the maximum valid slot of the rewind eb at every
* step of the rewind. This is in contrast with 'n' which eventually
* matches the number of items, but can be wrong during moves or if
* removes overlap on already valid slots (which is probably separately
* a bug). We do this to validate the offsets of memmoves for rewinding
* moves and detect invalid memmoves.
*
* Since a rewind eb can start empty, max_slot is a signed integer with
* a special meaning for -1, which is that no slot is valid to move out
* of. Any other negative value is invalid.
*/
int max_slot;
int move_src_end_slot;
int move_dst_end_slot;
n = btrfs_header_nritems(eb);
max_slot = n - 1;
read_lock(&fs_info->tree_mod_log_lock);
while (tm && tm->seq >= time_seq) {
ASSERT(max_slot >= -1);
/*
* All the operations are recorded with the operator used for
* the modification. As we're going backwards, we do the
@ -684,6 +836,8 @@ static void tree_mod_log_rewind(struct btrfs_fs_info *fs_info,
btrfs_set_node_ptr_generation(eb, tm->slot,
tm->generation);
n++;
if (tm->slot > max_slot)
max_slot = tm->slot;
break;
case BTRFS_MOD_LOG_KEY_REPLACE:
BUG_ON(tm->slot >= n);
@ -693,14 +847,37 @@ static void tree_mod_log_rewind(struct btrfs_fs_info *fs_info,
tm->generation);
break;
case BTRFS_MOD_LOG_KEY_ADD:
/*
* It is possible we could have already removed keys
* behind the known max slot, so this will be an
* overestimate. In practice, the copy operation
* inserts them in increasing order, and overestimating
* just means we miss some warnings, so it's OK. It
* isn't worth carefully tracking the full array of
* valid slots to check against when moving.
*/
if (tm->slot == max_slot)
max_slot--;
/* if a move operation is needed it's in the log */
n--;
break;
case BTRFS_MOD_LOG_MOVE_KEYS:
ASSERT(tm->move.nr_items > 0);
move_src_end_slot = tm->move.dst_slot + tm->move.nr_items - 1;
move_dst_end_slot = tm->slot + tm->move.nr_items - 1;
o_dst = btrfs_node_key_ptr_offset(eb, tm->slot);
o_src = btrfs_node_key_ptr_offset(eb, tm->move.dst_slot);
if (WARN_ON(move_src_end_slot > max_slot ||
tm->move.nr_items <= 0)) {
btrfs_warn(fs_info,
"move from invalid tree mod log slot eb %llu slot %d dst_slot %d nr_items %d seq %llu n %u max_slot %d",
eb->start, tm->slot,
tm->move.dst_slot, tm->move.nr_items,
tm->seq, n, max_slot);
}
memmove_extent_buffer(eb, o_dst, o_src,
tm->move.nr_items * p_size);
max_slot = move_dst_end_slot;
break;
case BTRFS_MOD_LOG_ROOT_REPLACE:
/*

View File

@ -370,6 +370,8 @@ static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid,
{
struct btrfs_fs_devices *fs_devs;
ASSERT(fsid || !metadata_fsid);
fs_devs = kzalloc(sizeof(*fs_devs), GFP_KERNEL);
if (!fs_devs)
return ERR_PTR(-ENOMEM);
@ -380,18 +382,17 @@ static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid,
INIT_LIST_HEAD(&fs_devs->alloc_list);
INIT_LIST_HEAD(&fs_devs->fs_list);
INIT_LIST_HEAD(&fs_devs->seed_list);
if (fsid)
memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE);
if (metadata_fsid)
memcpy(fs_devs->metadata_uuid, metadata_fsid, BTRFS_FSID_SIZE);
else if (fsid)
memcpy(fs_devs->metadata_uuid, fsid, BTRFS_FSID_SIZE);
if (fsid) {
memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE);
memcpy(fs_devs->metadata_uuid,
metadata_fsid ?: fsid, BTRFS_FSID_SIZE);
}
return fs_devs;
}
void btrfs_free_device(struct btrfs_device *device)
static void btrfs_free_device(struct btrfs_device *device)
{
WARN_ON(!list_empty(&device->post_commit_list));
rcu_string_free(device->name);
@ -426,6 +427,21 @@ void __exit btrfs_cleanup_fs_uuids(void)
}
}
static bool match_fsid_fs_devices(const struct btrfs_fs_devices *fs_devices,
const u8 *fsid, const u8 *metadata_fsid)
{
if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) != 0)
return false;
if (!metadata_fsid)
return true;
if (memcmp(metadata_fsid, fs_devices->metadata_uuid, BTRFS_FSID_SIZE) != 0)
return false;
return true;
}
static noinline struct btrfs_fs_devices *find_fsid(
const u8 *fsid, const u8 *metadata_fsid)
{
@ -435,19 +451,25 @@ static noinline struct btrfs_fs_devices *find_fsid(
/* Handle non-split brain cases */
list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
if (metadata_fsid) {
if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0
&& memcmp(metadata_fsid, fs_devices->metadata_uuid,
BTRFS_FSID_SIZE) == 0)
return fs_devices;
} else {
if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
return fs_devices;
}
if (match_fsid_fs_devices(fs_devices, fsid, metadata_fsid))
return fs_devices;
}
return NULL;
}
/*
* First check if the metadata_uuid is different from the fsid in the given
* fs_devices. Then check if the given fsid is the same as the metadata_uuid
* in the fs_devices. If it is, return true; otherwise, return false.
*/
static inline bool check_fsid_changed(const struct btrfs_fs_devices *fs_devices,
const u8 *fsid)
{
return memcmp(fs_devices->fsid, fs_devices->metadata_uuid,
BTRFS_FSID_SIZE) != 0 &&
memcmp(fs_devices->metadata_uuid, fsid, BTRFS_FSID_SIZE) == 0;
}
static struct btrfs_fs_devices *find_fsid_with_metadata_uuid(
struct btrfs_super_block *disk_super)
{
@ -461,14 +483,14 @@ static struct btrfs_fs_devices *find_fsid_with_metadata_uuid(
* at all and the CHANGING_FSID_V2 flag set.
*/
list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
if (fs_devices->fsid_change &&
memcmp(disk_super->metadata_uuid, fs_devices->fsid,
BTRFS_FSID_SIZE) == 0 &&
memcmp(fs_devices->fsid, fs_devices->metadata_uuid,
BTRFS_FSID_SIZE) == 0) {
if (!fs_devices->fsid_change)
continue;
if (match_fsid_fs_devices(fs_devices, disk_super->metadata_uuid,
fs_devices->fsid))
return fs_devices;
}
}
/*
* Handle scanned device having completed its fsid change but
* belonging to a fs_devices that was created by a device that
@ -476,13 +498,11 @@ static struct btrfs_fs_devices *find_fsid_with_metadata_uuid(
* CHANGING_FSID_V2 flag set.
*/
list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
if (fs_devices->fsid_change &&
memcmp(fs_devices->metadata_uuid,
fs_devices->fsid, BTRFS_FSID_SIZE) != 0 &&
memcmp(disk_super->metadata_uuid, fs_devices->metadata_uuid,
BTRFS_FSID_SIZE) == 0) {
if (!fs_devices->fsid_change)
continue;
if (check_fsid_changed(fs_devices, disk_super->metadata_uuid))
return fs_devices;
}
}
return find_fsid(disk_super->fsid, disk_super->metadata_uuid);
@ -673,18 +693,16 @@ static struct btrfs_fs_devices *find_fsid_inprogress(
struct btrfs_fs_devices *fs_devices;
list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
BTRFS_FSID_SIZE) != 0 &&
memcmp(fs_devices->metadata_uuid, disk_super->fsid,
BTRFS_FSID_SIZE) == 0 && !fs_devices->fsid_change) {
if (fs_devices->fsid_change)
continue;
if (check_fsid_changed(fs_devices, disk_super->fsid))
return fs_devices;
}
}
return find_fsid(disk_super->fsid, NULL);
}
static struct btrfs_fs_devices *find_fsid_changed(
struct btrfs_super_block *disk_super)
{
@ -701,10 +719,7 @@ static struct btrfs_fs_devices *find_fsid_changed(
*/
list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
/* Changed UUIDs */
if (memcmp(fs_devices->metadata_uuid, fs_devices->fsid,
BTRFS_FSID_SIZE) != 0 &&
memcmp(fs_devices->metadata_uuid, disk_super->metadata_uuid,
BTRFS_FSID_SIZE) == 0 &&
if (check_fsid_changed(fs_devices, disk_super->metadata_uuid) &&
memcmp(fs_devices->fsid, disk_super->fsid,
BTRFS_FSID_SIZE) != 0)
return fs_devices;
@ -735,11 +750,10 @@ static struct btrfs_fs_devices *find_fsid_reverted_metadata(
* fs_devices equal to the FSID of the disk.
*/
list_for_each_entry(fs_devices, &fs_uuids, fs_list) {
if (memcmp(fs_devices->fsid, fs_devices->metadata_uuid,
BTRFS_FSID_SIZE) != 0 &&
memcmp(fs_devices->metadata_uuid, disk_super->fsid,
BTRFS_FSID_SIZE) == 0 &&
fs_devices->fsid_change)
if (!fs_devices->fsid_change)
continue;
if (check_fsid_changed(fs_devices, disk_super->fsid))
return fs_devices;
}
@ -790,12 +804,8 @@ static noinline struct btrfs_device *device_list_add(const char *path,
if (!fs_devices) {
if (has_metadata_uuid)
fs_devices = alloc_fs_devices(disk_super->fsid,
disk_super->metadata_uuid);
else
fs_devices = alloc_fs_devices(disk_super->fsid, NULL);
fs_devices = alloc_fs_devices(disk_super->fsid,
has_metadata_uuid ? disk_super->metadata_uuid : NULL);
if (IS_ERR(fs_devices))
return ERR_CAST(fs_devices);
@ -1918,7 +1928,7 @@ static void update_dev_time(const char *device_path)
return;
now = current_time(d_inode(path.dentry));
inode_update_time(d_inode(path.dentry), &now, S_MTIME | S_CTIME);
inode_update_time(d_inode(path.dentry), &now, S_MTIME | S_CTIME | S_VERSION);
path_put(&path);
}
@ -6163,17 +6173,10 @@ static void handle_ops_on_dev_replace(enum btrfs_map_op op,
bioc->replace_nr_stripes = nr_extra_stripes;
}
static bool need_full_stripe(enum btrfs_map_op op)
{
return (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS);
}
static u64 btrfs_max_io_len(struct map_lookup *map, enum btrfs_map_op op,
u64 offset, u32 *stripe_nr, u64 *stripe_offset,
u64 *full_stripe_start)
{
ASSERT(op != BTRFS_MAP_DISCARD);
/*
* Stripe_nr is the stripe where this block falls. stripe_offset is
* the offset of this block in its stripe.
@ -6226,11 +6229,11 @@ static void set_io_stripe(struct btrfs_io_stripe *dst, const struct map_lookup *
stripe_offset + btrfs_stripe_nr_to_offset(stripe_nr);
}
int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
u64 logical, u64 *length,
struct btrfs_io_context **bioc_ret,
struct btrfs_io_stripe *smap, int *mirror_num_ret,
int need_raid_map)
int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
u64 logical, u64 *length,
struct btrfs_io_context **bioc_ret,
struct btrfs_io_stripe *smap, int *mirror_num_ret,
int need_raid_map)
{
struct extent_map *em;
struct map_lookup *map;
@ -6253,7 +6256,6 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
u64 max_len;
ASSERT(bioc_ret);
ASSERT(op != BTRFS_MAP_DISCARD);
num_copies = btrfs_num_copies(fs_info, logical, fs_info->sectorsize);
if (mirror_num > num_copies)
@ -6285,21 +6287,21 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
stripe_index = stripe_nr % map->num_stripes;
stripe_nr /= map->num_stripes;
if (!need_full_stripe(op))
if (op == BTRFS_MAP_READ)
mirror_num = 1;
} else if (map->type & BTRFS_BLOCK_GROUP_RAID1_MASK) {
if (need_full_stripe(op))
if (op != BTRFS_MAP_READ) {
num_stripes = map->num_stripes;
else if (mirror_num)
} else if (mirror_num) {
stripe_index = mirror_num - 1;
else {
} else {
stripe_index = find_live_mirror(fs_info, map, 0,
dev_replace_is_ongoing);
mirror_num = stripe_index + 1;
}
} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
if (need_full_stripe(op)) {
if (op != BTRFS_MAP_READ) {
num_stripes = map->num_stripes;
} else if (mirror_num) {
stripe_index = mirror_num - 1;
@ -6313,7 +6315,7 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
stripe_index = (stripe_nr % factor) * map->sub_stripes;
stripe_nr /= factor;
if (need_full_stripe(op))
if (op != BTRFS_MAP_READ)
num_stripes = map->sub_stripes;
else if (mirror_num)
stripe_index += mirror_num - 1;
@ -6326,7 +6328,7 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
}
} else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
if (need_raid_map && (need_full_stripe(op) || mirror_num > 1)) {
if (need_raid_map && (op != BTRFS_MAP_READ || mirror_num > 1)) {
/*
* Push stripe_nr back to the start of the full stripe
* For those cases needing a full stripe, @stripe_nr
@ -6362,7 +6364,7 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
/* We distribute the parity blocks across stripes */
stripe_index = (stripe_nr + stripe_index) % map->num_stripes;
if (!need_full_stripe(op) && mirror_num <= 1)
if (op == BTRFS_MAP_READ && mirror_num <= 1)
mirror_num = 1;
}
} else {
@ -6402,7 +6404,7 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
*/
if (smap && num_alloc_stripes == 1 &&
!((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) && mirror_num > 1) &&
(!need_full_stripe(op) || !dev_replace_is_ongoing ||
(op == BTRFS_MAP_READ || !dev_replace_is_ongoing ||
!dev_replace->tgtdev)) {
set_io_stripe(smap, map, stripe_index, stripe_offset, stripe_nr);
*mirror_num_ret = mirror_num;
@ -6426,7 +6428,7 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
* It's still mostly the same as other profiles, just with extra rotation.
*/
if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map &&
(need_full_stripe(op) || mirror_num > 1)) {
(op != BTRFS_MAP_READ || mirror_num > 1)) {
/*
* For RAID56 @stripe_nr is already the number of full stripes
* before us, which is also the rotation value (needs to modulo
@ -6453,11 +6455,11 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
}
}
if (need_full_stripe(op))
if (op != BTRFS_MAP_READ)
max_errors = btrfs_chunk_max_errors(map);
if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL &&
need_full_stripe(op)) {
op != BTRFS_MAP_READ) {
handle_ops_on_dev_replace(op, bioc, dev_replace, logical,
&num_stripes, &max_errors);
}
@ -6477,23 +6479,6 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
return ret;
}
int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
u64 logical, u64 *length,
struct btrfs_io_context **bioc_ret, int mirror_num)
{
return __btrfs_map_block(fs_info, op, logical, length, bioc_ret,
NULL, &mirror_num, 0);
}
/* For Scrub/replace */
int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
u64 logical, u64 *length,
struct btrfs_io_context **bioc_ret)
{
return __btrfs_map_block(fs_info, op, logical, length, bioc_ret,
NULL, NULL, 1);
}
static bool dev_args_match_fs_devices(const struct btrfs_dev_lookup_args *args,
const struct btrfs_fs_devices *fs_devices)
{
@ -8070,8 +8055,8 @@ int btrfs_map_repair_block(struct btrfs_fs_info *fs_info,
ASSERT(mirror_num > 0);
ret = __btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical, &map_length,
&bioc, smap, &mirror_ret, true);
ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical, &map_length,
&bioc, smap, &mirror_ret, true);
if (ret < 0)
return ret;

View File

@ -280,8 +280,19 @@ enum btrfs_read_policy {
struct btrfs_fs_devices {
u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
/*
* UUID written into the btree blocks:
*
* - If metadata_uuid != fsid then super block must have
* BTRFS_FEATURE_INCOMPAT_METADATA_UUID flag set.
*
* - Following shall be true at all times:
* - metadata_uuid == btrfs_header::fsid
* - metadata_uuid == btrfs_dev_item::fsid
*/
u8 metadata_uuid[BTRFS_FSID_SIZE];
bool fsid_change;
struct list_head fs_list;
/*
@ -319,34 +330,32 @@ struct btrfs_fs_devices {
*/
struct btrfs_device *latest_dev;
/* all of the devices in the FS, protected by a mutex
* so we can safely walk it to write out the supers without
* worrying about add/remove by the multi-device code.
* Scrubbing super can kick off supers writing by holding
* this mutex lock.
/*
* All of the devices in the filesystem, protected by a mutex so we can
* safely walk it to write out the super blocks without worrying about
* adding/removing by the multi-device code. Scrubbing super block can
* kick off supers writing by holding this mutex lock.
*/
struct mutex device_list_mutex;
/* List of all devices, protected by device_list_mutex */
struct list_head devices;
/*
* Devices which can satisfy space allocation. Protected by
* chunk_mutex
*/
/* Devices which can satisfy space allocation. Protected by * chunk_mutex. */
struct list_head alloc_list;
struct list_head seed_list;
bool seeding;
/* Count fs-devices opened. */
int opened;
/* set when we find or add a device that doesn't have the
* nonrot flag set
*/
/* Set when we find or add a device that doesn't have the nonrot flag set. */
bool rotating;
/* Devices support TRIM/discard commands */
/* Devices support TRIM/discard commands. */
bool discardable;
bool fsid_change;
/* The filesystem is a seed filesystem. */
bool seeding;
struct btrfs_fs_info *fs_info;
/* sysfs kobjects */
@ -357,7 +366,7 @@ struct btrfs_fs_devices {
enum btrfs_chunk_allocation_policy chunk_alloc_policy;
/* Policy used to read the mirrored stripes */
/* Policy used to read the mirrored stripes. */
enum btrfs_read_policy read_policy;
};
@ -547,15 +556,12 @@ struct btrfs_dev_lookup_args {
enum btrfs_map_op {
BTRFS_MAP_READ,
BTRFS_MAP_WRITE,
BTRFS_MAP_DISCARD,
BTRFS_MAP_GET_READ_MIRRORS,
};
static inline enum btrfs_map_op btrfs_op(struct bio *bio)
{
switch (bio_op(bio)) {
case REQ_OP_DISCARD:
return BTRFS_MAP_DISCARD;
case REQ_OP_WRITE:
case REQ_OP_ZONE_APPEND:
return BTRFS_MAP_WRITE;
@ -589,15 +595,9 @@ void btrfs_get_bioc(struct btrfs_io_context *bioc);
void btrfs_put_bioc(struct btrfs_io_context *bioc);
int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
u64 logical, u64 *length,
struct btrfs_io_context **bioc_ret, int mirror_num);
int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
u64 logical, u64 *length,
struct btrfs_io_context **bioc_ret);
int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
u64 logical, u64 *length,
struct btrfs_io_context **bioc_ret,
struct btrfs_io_stripe *smap, int *mirror_num_ret,
int need_raid_map);
struct btrfs_io_context **bioc_ret,
struct btrfs_io_stripe *smap, int *mirror_num_ret,
int need_raid_map);
int btrfs_map_repair_block(struct btrfs_fs_info *fs_info,
struct btrfs_io_stripe *smap, u64 logical,
u32 length, int mirror_num);
@ -628,7 +628,6 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
const u64 *devid, const u8 *uuid,
const char *path);
void btrfs_put_dev_args_from_path(struct btrfs_dev_lookup_args *args);
void btrfs_free_device(struct btrfs_device *device);
int btrfs_rm_device(struct btrfs_fs_info *fs_info,
struct btrfs_dev_lookup_args *args,
struct block_device **bdev, fmode_t *mode);

View File

@ -63,7 +63,7 @@ struct list_head *zlib_alloc_workspace(unsigned int level)
workspacesize = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
zlib_inflate_workspacesize());
workspace->strm.workspace = kvzalloc(workspacesize, GFP_KERNEL);
workspace->strm.workspace = kvzalloc(workspacesize, GFP_KERNEL | __GFP_NOWARN);
workspace->level = level;
workspace->buf = NULL;
/*

View File

@ -15,6 +15,7 @@
#include "transaction.h"
#include "dev-replace.h"
#include "space-info.h"
#include "super.h"
#include "fs.h"
#include "accessors.h"
#include "bio.h"
@ -1057,7 +1058,7 @@ u64 btrfs_find_allocatable_zones(struct btrfs_device *device, u64 hole_start,
/* Check if zones in the region are all empty */
if (btrfs_dev_is_sequential(device, pos) &&
find_next_zero_bit(zinfo->empty_zones, end, begin) != end) {
!bitmap_test_range_all_set(zinfo->empty_zones, begin, nzones)) {
pos += zinfo->zone_size;
continue;
}
@ -1156,23 +1157,23 @@ int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size)
struct btrfs_zoned_device_info *zinfo = device->zone_info;
const u8 shift = zinfo->zone_size_shift;
unsigned long begin = start >> shift;
unsigned long end = (start + size) >> shift;
unsigned long nbits = size >> shift;
u64 pos;
int ret;
ASSERT(IS_ALIGNED(start, zinfo->zone_size));
ASSERT(IS_ALIGNED(size, zinfo->zone_size));
if (end > zinfo->nr_zones)
if (begin + nbits > zinfo->nr_zones)
return -ERANGE;
/* All the zones are conventional */
if (find_next_bit(zinfo->seq_zones, end, begin) == end)
if (bitmap_test_range_all_zero(zinfo->seq_zones, begin, nbits))
return 0;
/* All the zones are sequential and empty */
if (find_next_zero_bit(zinfo->seq_zones, end, begin) == end &&
find_next_zero_bit(zinfo->empty_zones, end, begin) == end)
if (bitmap_test_range_all_set(zinfo->seq_zones, begin, nbits) &&
bitmap_test_range_all_set(zinfo->empty_zones, begin, nbits))
return 0;
for (pos = start; pos < start + size; pos += zinfo->zone_size) {
@ -1602,37 +1603,17 @@ void btrfs_calc_zone_unusable(struct btrfs_block_group *cache)
void btrfs_redirty_list_add(struct btrfs_transaction *trans,
struct extent_buffer *eb)
{
struct btrfs_fs_info *fs_info = eb->fs_info;
if (!btrfs_is_zoned(fs_info) ||
btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN) ||
!list_empty(&eb->release_list))
if (!btrfs_is_zoned(eb->fs_info) ||
btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN))
return;
ASSERT(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
memzero_extent_buffer(eb, 0, eb->len);
set_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags);
set_extent_buffer_dirty(eb);
set_extent_bits_nowait(&trans->dirty_pages, eb->start,
eb->start + eb->len - 1, EXTENT_DIRTY);
spin_lock(&trans->releasing_ebs_lock);
list_add_tail(&eb->release_list, &trans->releasing_ebs);
spin_unlock(&trans->releasing_ebs_lock);
atomic_inc(&eb->refs);
}
void btrfs_free_redirty_list(struct btrfs_transaction *trans)
{
spin_lock(&trans->releasing_ebs_lock);
while (!list_empty(&trans->releasing_ebs)) {
struct extent_buffer *eb;
eb = list_first_entry(&trans->releasing_ebs,
struct extent_buffer, release_list);
list_del_init(&eb->release_list);
free_extent_buffer(eb);
}
spin_unlock(&trans->releasing_ebs_lock);
set_extent_bit(&trans->dirty_pages, eb->start, eb->start + eb->len - 1,
EXTENT_DIRTY | EXTENT_NOWAIT, NULL);
}
bool btrfs_use_zone_append(struct btrfs_bio *bbio)
@ -1677,63 +1658,89 @@ bool btrfs_use_zone_append(struct btrfs_bio *bbio)
void btrfs_record_physical_zoned(struct btrfs_bio *bbio)
{
const u64 physical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT;
struct btrfs_ordered_extent *ordered;
struct btrfs_ordered_sum *sum = bbio->sums;
ordered = btrfs_lookup_ordered_extent(bbio->inode, bbio->file_offset);
if (WARN_ON(!ordered))
return;
ordered->physical = physical;
btrfs_put_ordered_extent(ordered);
if (physical < bbio->orig_physical)
sum->logical -= bbio->orig_physical - physical;
else
sum->logical += physical - bbio->orig_physical;
}
void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered)
static void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered,
u64 logical)
{
struct btrfs_inode *inode = BTRFS_I(ordered->inode);
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct extent_map_tree *em_tree;
struct extent_map_tree *em_tree = &BTRFS_I(ordered->inode)->extent_tree;
struct extent_map *em;
struct btrfs_ordered_sum *sum;
u64 orig_logical = ordered->disk_bytenr;
struct map_lookup *map;
u64 physical = ordered->physical;
u64 chunk_start_phys;
u64 logical;
em = btrfs_get_chunk_map(fs_info, orig_logical, 1);
if (IS_ERR(em))
return;
map = em->map_lookup;
chunk_start_phys = map->stripes[0].physical;
if (WARN_ON_ONCE(map->num_stripes > 1) ||
WARN_ON_ONCE((map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) != 0) ||
WARN_ON_ONCE(physical < chunk_start_phys) ||
WARN_ON_ONCE(physical > chunk_start_phys + em->orig_block_len)) {
free_extent_map(em);
return;
}
logical = em->start + (physical - map->stripes[0].physical);
free_extent_map(em);
if (orig_logical == logical)
return;
ordered->disk_bytenr = logical;
em_tree = &inode->extent_tree;
write_lock(&em_tree->lock);
em = search_extent_mapping(em_tree, ordered->file_offset,
ordered->num_bytes);
em->block_start = logical;
free_extent_map(em);
write_unlock(&em_tree->lock);
}
list_for_each_entry(sum, &ordered->list, list) {
if (logical < orig_logical)
sum->bytenr -= orig_logical - logical;
else
sum->bytenr += logical - orig_logical;
static bool btrfs_zoned_split_ordered(struct btrfs_ordered_extent *ordered,
u64 logical, u64 len)
{
struct btrfs_ordered_extent *new;
if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags) &&
split_extent_map(BTRFS_I(ordered->inode), ordered->file_offset,
ordered->num_bytes, len, logical))
return false;
new = btrfs_split_ordered_extent(ordered, len);
if (IS_ERR(new))
return false;
new->disk_bytenr = logical;
btrfs_finish_one_ordered(new);
return true;
}
void btrfs_finish_ordered_zoned(struct btrfs_ordered_extent *ordered)
{
struct btrfs_inode *inode = BTRFS_I(ordered->inode);
struct btrfs_fs_info *fs_info = inode->root->fs_info;
struct btrfs_ordered_sum *sum =
list_first_entry(&ordered->list, typeof(*sum), list);
u64 logical = sum->logical;
u64 len = sum->len;
while (len < ordered->disk_num_bytes) {
sum = list_next_entry(sum, list);
if (sum->logical == logical + len) {
len += sum->len;
continue;
}
if (!btrfs_zoned_split_ordered(ordered, logical, len)) {
set_bit(BTRFS_ORDERED_IOERR, &ordered->flags);
btrfs_err(fs_info, "failed to split ordered extent");
goto out;
}
logical = sum->logical;
len = sum->len;
}
if (ordered->disk_bytenr != logical)
btrfs_rewrite_logical_zoned(ordered, logical);
out:
/*
* If we end up here for nodatasum I/O, the btrfs_ordered_sum structures
* were allocated by btrfs_alloc_dummy_sum only to record the logical
* addresses and don't contain actual checksums. We thus must free them
* here so that we don't attempt to log the csums later.
*/
if ((inode->flags & BTRFS_INODE_NODATASUM) ||
test_bit(BTRFS_FS_STATE_NO_CSUMS, &fs_info->fs_state)) {
while ((sum = list_first_entry_or_null(&ordered->list,
typeof(*sum), list))) {
list_del(&sum->list);
kfree(sum);
}
}
}
@ -1792,8 +1799,8 @@ static int read_zone_info(struct btrfs_fs_info *fs_info, u64 logical,
int nmirrors;
int i, ret;
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
&mapped_length, &bioc);
ret = btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
&mapped_length, &bioc, NULL, NULL, 1);
if (ret || !bioc || mapped_length < PAGE_SIZE) {
ret = -EIO;
goto out_put_bioc;

View File

@ -30,6 +30,8 @@ struct btrfs_zoned_device_info {
struct blk_zone sb_zones[2 * BTRFS_SUPER_MIRROR_MAX];
};
void btrfs_finish_ordered_zoned(struct btrfs_ordered_extent *ordered);
#ifdef CONFIG_BLK_DEV_ZONED
int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
struct blk_zone *zone);
@ -54,10 +56,8 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new);
void btrfs_calc_zone_unusable(struct btrfs_block_group *cache);
void btrfs_redirty_list_add(struct btrfs_transaction *trans,
struct extent_buffer *eb);
void btrfs_free_redirty_list(struct btrfs_transaction *trans);
bool btrfs_use_zone_append(struct btrfs_bio *bbio);
void btrfs_record_physical_zoned(struct btrfs_bio *bbio);
void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered);
bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb,
struct btrfs_block_group **cache_ret);
@ -179,7 +179,6 @@ static inline void btrfs_calc_zone_unusable(struct btrfs_block_group *cache) { }
static inline void btrfs_redirty_list_add(struct btrfs_transaction *trans,
struct extent_buffer *eb) { }
static inline void btrfs_free_redirty_list(struct btrfs_transaction *trans) { }
static inline bool btrfs_use_zone_append(struct btrfs_bio *bbio)
{
@ -190,9 +189,6 @@ static inline void btrfs_record_physical_zoned(struct btrfs_bio *bbio)
{
}
static inline void btrfs_rewrite_logical_zoned(
struct btrfs_ordered_extent *ordered) { }
static inline bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb,
struct btrfs_block_group **cache_ret)

View File

@ -356,7 +356,7 @@ struct list_head *zstd_alloc_workspace(unsigned int level)
workspace->level = level;
workspace->req_level = level;
workspace->last_used = jiffies;
workspace->mem = kvmalloc(workspace->size, GFP_KERNEL);
workspace->mem = kvmalloc(workspace->size, GFP_KERNEL | __GFP_NOWARN);
workspace->buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!workspace->mem || !workspace->buf)
goto fail;

View File

@ -661,6 +661,35 @@ DEFINE_EVENT(btrfs__ordered_extent, btrfs_ordered_extent_mark_finished,
TP_ARGS(inode, ordered)
);
TRACE_EVENT(btrfs_finish_ordered_extent,
TP_PROTO(const struct btrfs_inode *inode, u64 start, u64 len,
bool uptodate),
TP_ARGS(inode, start, len, uptodate),
TP_STRUCT__entry_btrfs(
__field( u64, ino )
__field( u64, start )
__field( u64, len )
__field( bool, uptodate )
__field( u64, root_objectid )
),
TP_fast_assign_btrfs(inode->root->fs_info,
__entry->ino = btrfs_ino(inode);
__entry->start = start;
__entry->len = len;
__entry->uptodate = uptodate;
__entry->root_objectid = inode->root->root_key.objectid;
),
TP_printk_btrfs("root=%llu(%s) ino=%llu start=%llu len=%llu uptodate=%d",
show_root_type(__entry->root_objectid),
__entry->ino, __entry->start,
__entry->len, !!__entry->uptodate)
);
DECLARE_EVENT_CLASS(btrfs__writepage,
TP_PROTO(const struct page *page, const struct inode *inode,
@ -1982,25 +2011,27 @@ DEFINE_EVENT(btrfs__prelim_ref, btrfs_prelim_ref_insert,
);
TRACE_EVENT(btrfs_inode_mod_outstanding_extents,
TP_PROTO(const struct btrfs_root *root, u64 ino, int mod),
TP_PROTO(const struct btrfs_root *root, u64 ino, int mod, unsigned outstanding),
TP_ARGS(root, ino, mod),
TP_ARGS(root, ino, mod, outstanding),
TP_STRUCT__entry_btrfs(
__field( u64, root_objectid )
__field( u64, ino )
__field( int, mod )
__field( unsigned, outstanding )
),
TP_fast_assign_btrfs(root->fs_info,
__entry->root_objectid = root->root_key.objectid;
__entry->ino = ino;
__entry->mod = mod;
__entry->outstanding = outstanding;
),
TP_printk_btrfs("root=%llu(%s) ino=%llu mod=%d",
TP_printk_btrfs("root=%llu(%s) ino=%llu mod=%d outstanding=%u",
show_root_type(__entry->root_objectid),
__entry->ino, __entry->mod)
__entry->ino, __entry->mod, __entry->outstanding)
);
DECLARE_EVENT_CLASS(btrfs__block_group,

View File

@ -204,7 +204,6 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
"__ubsan_handle_builtin_unreachable",
"arch_call_rest_init",
"arch_cpu_idle_dead",
"btrfs_assertfail",
"cpu_bringup_and_idle",
"cpu_startup_entry",
"do_exit",