for-6.13-rc4-tag

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEE8rQSAMVO+zA4DBdWxWXV+ddtWDsFAmdw8AgACgkQxWXV+ddt
 WDsL4w/+Ib5WGmd2Rjn1+1X9U5dzrEb+/072UBAhwwaqOOUTlBofeyRSdYqFB0oZ
 aucRMXdXPpVe1xrXsj0WsOZmPsuZT46Eh2ALqqZP5fO1sgBkJ2WmQF0Ei7uypfb+
 abQwiEO2IaMMwt2XgDNzbpZS7oVNGEXHzoHF0R/deL4FoBDNMsbCfRnW+L9++tWU
 dUSpafLhgMMwivJN07VJYwU4ZVXsBhmKv2qI8WpJ5w9kJb1ssN692CvBOVjhuSYd
 A8IMV84dW2KO37fmPqN36QAWotz4mKpv8yrhjJvrix7nAOcXe3TXFUhaFBh1Vmzg
 G5bhkqYcNP6UHT7CIcLZE1mdv6ZAKTp0zSNCh2Uu51+MJL2tIQVjTaUQhbkYLnLN
 9DS2dXz4ksm9ISrjr2tmPe4kgyNQIrp5TCdwXu3CYs+AaU7yKeEBukZ7mXcp/e/W
 TdLKvzPRLMED8mGlFBwg2QbOvcJJ663UW2esyv6DvC61F3tXyiV2RXSC/1qF+RyZ
 FBJvvEevensQlASn1NScuQV+iEQpMo2lMURnRjSG8dGhwMmHpW3wifa2TJDyBzWS
 AH0MriQA9nsYQTkPGPnqr46/BAhFG2vEfVlX20Sk9S0PTBLu8YRy/o2evcV67J8v
 zGaa5pa7fQPbEjRv4Rthdb4R2VIFkZTOtIZSZfjHkPDjtvS7ahU=
 =NwGH
 -----END PGP SIGNATURE-----

Merge tag 'for-6.13-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs fixes from David Sterba:
 "A few more fixes that accumulated over the last two weeks, fixing some
  user reported problems:

   - swapfile fixes:
       - conditional reschedule in the activation loop
       - fix race with memory mapped file when activating
       - make activation loop interruptible
       - rework and fix extent sharing checks

   - folio fixes:
       - in send, recheck folio mapping after unlock
       - in relocation, recheck folio mapping after unlock

   - fix waiting for encoded read io_uring requests

   - fix transaction atomicity when enabling simple quotas

   - move COW block trace point before the block gets freed

   - print various sizes in sysfs with correct endianity"

* tag 'for-6.13-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux:
  btrfs: sysfs: fix direct super block member reads
  btrfs: fix transaction atomicity bug when enabling simple quotas
  btrfs: avoid monopolizing a core when activating a swap file
  btrfs: allow swap activation to be interruptible
  btrfs: fix swap file activation failure due to extents that used to be shared
  btrfs: fix race with memory mapped writes when activating swap file
  btrfs: check folio mapping after unlock in put_file_data()
  btrfs: check folio mapping after unlock in relocate_one_folio()
  btrfs: fix use-after-free when COWing tree bock and tracing is enabled
  btrfs: fix use-after-free waiting for encoded read endios
This commit is contained in:
Linus Torvalds 2024-12-29 09:34:34 -08:00
commit c059361673
6 changed files with 130 additions and 56 deletions

View File

@ -654,6 +654,8 @@ int btrfs_force_cow_block(struct btrfs_trans_handle *trans,
goto error_unlock_cow;
}
}
trace_btrfs_cow_block(root, buf, cow);
if (unlock_orig)
btrfs_tree_unlock(buf);
free_extent_buffer_stale(buf);
@ -710,7 +712,6 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans,
{
struct btrfs_fs_info *fs_info = root->fs_info;
u64 search_start;
int ret;
if (unlikely(test_bit(BTRFS_ROOT_DELETING, &root->state))) {
btrfs_abort_transaction(trans, -EUCLEAN);
@ -751,12 +752,8 @@ int btrfs_cow_block(struct btrfs_trans_handle *trans,
* Also We don't care about the error, as it's handled internally.
*/
btrfs_qgroup_trace_subtree_after_cow(trans, root, buf);
ret = btrfs_force_cow_block(trans, root, buf, parent, parent_slot,
cow_ret, search_start, 0, nest);
trace_btrfs_cow_block(root, buf, *cow_ret);
return ret;
return btrfs_force_cow_block(trans, root, buf, parent, parent_slot,
cow_ret, search_start, 0, nest);
}
ALLOW_ERROR_INJECTION(btrfs_cow_block, ERRNO);

View File

@ -9078,9 +9078,9 @@ out:
}
struct btrfs_encoded_read_private {
wait_queue_head_t wait;
struct completion done;
void *uring_ctx;
atomic_t pending;
refcount_t pending_refs;
blk_status_t status;
};
@ -9099,14 +9099,14 @@ static void btrfs_encoded_read_endio(struct btrfs_bio *bbio)
*/
WRITE_ONCE(priv->status, bbio->bio.bi_status);
}
if (atomic_dec_and_test(&priv->pending)) {
if (refcount_dec_and_test(&priv->pending_refs)) {
int err = blk_status_to_errno(READ_ONCE(priv->status));
if (priv->uring_ctx) {
btrfs_uring_read_extent_endio(priv->uring_ctx, err);
kfree(priv);
} else {
wake_up(&priv->wait);
complete(&priv->done);
}
}
bio_put(&bbio->bio);
@ -9126,8 +9126,8 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
if (!priv)
return -ENOMEM;
init_waitqueue_head(&priv->wait);
atomic_set(&priv->pending, 1);
init_completion(&priv->done);
refcount_set(&priv->pending_refs, 1);
priv->status = 0;
priv->uring_ctx = uring_ctx;
@ -9140,7 +9140,7 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
size_t bytes = min_t(u64, disk_io_size, PAGE_SIZE);
if (bio_add_page(&bbio->bio, pages[i], bytes, 0) < bytes) {
atomic_inc(&priv->pending);
refcount_inc(&priv->pending_refs);
btrfs_submit_bbio(bbio, 0);
bbio = btrfs_bio_alloc(BIO_MAX_VECS, REQ_OP_READ, fs_info,
@ -9155,11 +9155,11 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
disk_io_size -= bytes;
} while (disk_io_size);
atomic_inc(&priv->pending);
refcount_inc(&priv->pending_refs);
btrfs_submit_bbio(bbio, 0);
if (uring_ctx) {
if (atomic_dec_return(&priv->pending) == 0) {
if (refcount_dec_and_test(&priv->pending_refs)) {
ret = blk_status_to_errno(READ_ONCE(priv->status));
btrfs_uring_read_extent_endio(uring_ctx, ret);
kfree(priv);
@ -9168,8 +9168,8 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
return -EIOCBQUEUED;
} else {
if (atomic_dec_return(&priv->pending) != 0)
io_wait_event(priv->wait, !atomic_read(&priv->pending));
if (!refcount_dec_and_test(&priv->pending_refs))
wait_for_completion_io(&priv->done);
/* See btrfs_encoded_read_endio() for ordering. */
ret = blk_status_to_errno(READ_ONCE(priv->status));
kfree(priv);
@ -9799,15 +9799,25 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct extent_state *cached_state = NULL;
struct extent_map *em = NULL;
struct btrfs_chunk_map *map = NULL;
struct btrfs_device *device = NULL;
struct btrfs_swap_info bsi = {
.lowest_ppage = (sector_t)-1ULL,
};
struct btrfs_backref_share_check_ctx *backref_ctx = NULL;
struct btrfs_path *path = NULL;
int ret = 0;
u64 isize;
u64 start;
u64 prev_extent_end = 0;
/*
* Acquire the inode's mmap lock to prevent races with memory mapped
* writes, as they could happen after we flush delalloc below and before
* we lock the extent range further below. The inode was already locked
* up in the call chain.
*/
btrfs_assert_inode_locked(BTRFS_I(inode));
down_write(&BTRFS_I(inode)->i_mmap_lock);
/*
* If the swap file was just created, make sure delalloc is done. If the
@ -9816,22 +9826,32 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
*/
ret = btrfs_wait_ordered_range(BTRFS_I(inode), 0, (u64)-1);
if (ret)
return ret;
goto out_unlock_mmap;
/*
* The inode is locked, so these flags won't change after we check them.
*/
if (BTRFS_I(inode)->flags & BTRFS_INODE_COMPRESS) {
btrfs_warn(fs_info, "swapfile must not be compressed");
return -EINVAL;
ret = -EINVAL;
goto out_unlock_mmap;
}
if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW)) {
btrfs_warn(fs_info, "swapfile must not be copy-on-write");
return -EINVAL;
ret = -EINVAL;
goto out_unlock_mmap;
}
if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {
btrfs_warn(fs_info, "swapfile must not be checksummed");
return -EINVAL;
ret = -EINVAL;
goto out_unlock_mmap;
}
path = btrfs_alloc_path();
backref_ctx = btrfs_alloc_backref_share_check_ctx();
if (!path || !backref_ctx) {
ret = -ENOMEM;
goto out_unlock_mmap;
}
/*
@ -9846,7 +9866,8 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
if (!btrfs_exclop_start(fs_info, BTRFS_EXCLOP_SWAP_ACTIVATE)) {
btrfs_warn(fs_info,
"cannot activate swapfile while exclusive operation is running");
return -EBUSY;
ret = -EBUSY;
goto out_unlock_mmap;
}
/*
@ -9860,7 +9881,8 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
btrfs_exclop_finish(fs_info);
btrfs_warn(fs_info,
"cannot activate swapfile because snapshot creation is in progress");
return -EINVAL;
ret = -EINVAL;
goto out_unlock_mmap;
}
/*
* Snapshots can create extents which require COW even if NODATACOW is
@ -9881,7 +9903,8 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
btrfs_warn(fs_info,
"cannot activate swapfile because subvolume %llu is being deleted",
btrfs_root_id(root));
return -EPERM;
ret = -EPERM;
goto out_unlock_mmap;
}
atomic_inc(&root->nr_swapfiles);
spin_unlock(&root->root_item_lock);
@ -9889,24 +9912,39 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
lock_extent(io_tree, 0, isize - 1, &cached_state);
start = 0;
while (start < isize) {
u64 logical_block_start, physical_block_start;
while (prev_extent_end < isize) {
struct btrfs_key key;
struct extent_buffer *leaf;
struct btrfs_file_extent_item *ei;
struct btrfs_block_group *bg;
u64 len = isize - start;
u64 logical_block_start;
u64 physical_block_start;
u64 extent_gen;
u64 disk_bytenr;
u64 len;
em = btrfs_get_extent(BTRFS_I(inode), NULL, start, len);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
key.objectid = btrfs_ino(BTRFS_I(inode));
key.type = BTRFS_EXTENT_DATA_KEY;
key.offset = prev_extent_end;
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0)
goto out;
}
if (em->disk_bytenr == EXTENT_MAP_HOLE) {
/*
* If key not found it means we have an implicit hole (NO_HOLES
* is enabled).
*/
if (ret > 0) {
btrfs_warn(fs_info, "swapfile must not have holes");
ret = -EINVAL;
goto out;
}
if (em->disk_bytenr == EXTENT_MAP_INLINE) {
leaf = path->nodes[0];
ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item);
if (btrfs_file_extent_type(leaf, ei) == BTRFS_FILE_EXTENT_INLINE) {
/*
* It's unlikely we'll ever actually find ourselves
* here, as a file small enough to fit inline won't be
@ -9918,23 +9956,45 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
ret = -EINVAL;
goto out;
}
if (extent_map_is_compressed(em)) {
if (btrfs_file_extent_compression(leaf, ei) != BTRFS_COMPRESS_NONE) {
btrfs_warn(fs_info, "swapfile must not be compressed");
ret = -EINVAL;
goto out;
}
logical_block_start = extent_map_block_start(em) + (start - em->start);
len = min(len, em->len - (start - em->start));
free_extent_map(em);
em = NULL;
disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, ei);
if (disk_bytenr == 0) {
btrfs_warn(fs_info, "swapfile must not have holes");
ret = -EINVAL;
goto out;
}
ret = can_nocow_extent(inode, start, &len, NULL, false, true);
logical_block_start = disk_bytenr + btrfs_file_extent_offset(leaf, ei);
extent_gen = btrfs_file_extent_generation(leaf, ei);
prev_extent_end = btrfs_file_extent_end(path);
if (prev_extent_end > isize)
len = isize - key.offset;
else
len = btrfs_file_extent_num_bytes(leaf, ei);
backref_ctx->curr_leaf_bytenr = leaf->start;
/*
* Don't need the path anymore, release to avoid deadlocks when
* calling btrfs_is_data_extent_shared() because when joining a
* transaction it can block waiting for the current one's commit
* which in turn may be trying to lock the same leaf to flush
* delayed items for example.
*/
btrfs_release_path(path);
ret = btrfs_is_data_extent_shared(BTRFS_I(inode), disk_bytenr,
extent_gen, backref_ctx);
if (ret < 0) {
goto out;
} else if (ret) {
ret = 0;
} else {
} else if (ret > 0) {
btrfs_warn(fs_info,
"swapfile must not be copy-on-write");
ret = -EINVAL;
@ -9969,7 +10029,6 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
physical_block_start = (map->stripes[0].physical +
(logical_block_start - map->start));
len = min(len, map->chunk_len - (logical_block_start - map->start));
btrfs_free_chunk_map(map);
map = NULL;
@ -10010,20 +10069,23 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
if (ret)
goto out;
}
bsi.start = start;
bsi.start = key.offset;
bsi.block_start = physical_block_start;
bsi.block_len = len;
}
start += len;
if (fatal_signal_pending(current)) {
ret = -EINTR;
goto out;
}
cond_resched();
}
if (bsi.block_len)
ret = btrfs_add_swap_extent(sis, &bsi);
out:
if (!IS_ERR_OR_NULL(em))
free_extent_map(em);
if (!IS_ERR_OR_NULL(map))
btrfs_free_chunk_map(map);
@ -10036,6 +10098,10 @@ out:
btrfs_exclop_finish(fs_info);
out_unlock_mmap:
up_write(&BTRFS_I(inode)->i_mmap_lock);
btrfs_free_backref_share_ctx(backref_ctx);
btrfs_free_path(path);
if (ret)
return ret;

View File

@ -1121,6 +1121,7 @@ int btrfs_quota_enable(struct btrfs_fs_info *fs_info,
fs_info->qgroup_flags = BTRFS_QGROUP_STATUS_FLAG_ON;
if (simple) {
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_SIMPLE_MODE;
btrfs_set_fs_incompat(fs_info, SIMPLE_QUOTA);
btrfs_set_qgroup_status_enable_gen(leaf, ptr, trans->transid);
} else {
fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
@ -1254,8 +1255,6 @@ out_add_root:
spin_lock(&fs_info->qgroup_lock);
fs_info->quota_root = quota_root;
set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
if (simple)
btrfs_set_fs_incompat(fs_info, SIMPLE_QUOTA);
spin_unlock(&fs_info->qgroup_lock);
/* Skip rescan for simple qgroups. */

View File

@ -2902,6 +2902,7 @@ static int relocate_one_folio(struct reloc_control *rc,
const bool use_rst = btrfs_need_stripe_tree_update(fs_info, rc->block_group->flags);
ASSERT(index <= last_index);
again:
folio = filemap_lock_folio(inode->i_mapping, index);
if (IS_ERR(folio)) {
@ -2937,6 +2938,11 @@ static int relocate_one_folio(struct reloc_control *rc,
ret = -EIO;
goto release_folio;
}
if (folio->mapping != inode->i_mapping) {
folio_unlock(folio);
folio_put(folio);
goto again;
}
}
/*

View File

@ -5280,6 +5280,7 @@ static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len)
unsigned cur_len = min_t(unsigned, len,
PAGE_SIZE - pg_offset);
again:
folio = filemap_lock_folio(mapping, index);
if (IS_ERR(folio)) {
page_cache_sync_readahead(mapping,
@ -5312,6 +5313,11 @@ static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len)
ret = -EIO;
break;
}
if (folio->mapping != mapping) {
folio_unlock(folio);
folio_put(folio);
goto again;
}
}
memcpy_from_folio(sctx->send_buf + sctx->send_size, folio,

View File

@ -1118,7 +1118,7 @@ static ssize_t btrfs_nodesize_show(struct kobject *kobj,
{
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
return sysfs_emit(buf, "%u\n", fs_info->super_copy->nodesize);
return sysfs_emit(buf, "%u\n", fs_info->nodesize);
}
BTRFS_ATTR(, nodesize, btrfs_nodesize_show);
@ -1128,7 +1128,7 @@ static ssize_t btrfs_sectorsize_show(struct kobject *kobj,
{
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
return sysfs_emit(buf, "%u\n", fs_info->super_copy->sectorsize);
return sysfs_emit(buf, "%u\n", fs_info->sectorsize);
}
BTRFS_ATTR(, sectorsize, btrfs_sectorsize_show);
@ -1180,7 +1180,7 @@ static ssize_t btrfs_clone_alignment_show(struct kobject *kobj,
{
struct btrfs_fs_info *fs_info = to_fs_info(kobj);
return sysfs_emit(buf, "%u\n", fs_info->super_copy->sectorsize);
return sysfs_emit(buf, "%u\n", fs_info->sectorsize);
}
BTRFS_ATTR(, clone_alignment, btrfs_clone_alignment_show);