mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-01-16 18:08:20 +00:00
96c36eaa77
[BUG] Test case btrfs/124 failed if larger metadata folio is enabled, the dying message looks like this: BTRFS error (device dm-2): bad tree block start, mirror 2 want 31686656 have 0 BTRFS info (device dm-2): read error corrected: ino 0 off 31686656 (dev /dev/mapper/test-scratch2 sector 20928) BUG: kernel NULL pointer dereference, address: 0000000000000020 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page CPU: 6 PID: 350881 Comm: btrfs Tainted: G OE 6.7.0-rc3-custom+ #128 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS unknown 2/2/2022 RIP: 0010:btrfs_read_extent_buffer+0x106/0x180 [btrfs] PKRU: 55555554 Call Trace: <TASK> read_tree_block+0x33/0xb0 [btrfs] read_block_for_search+0x23e/0x340 [btrfs] btrfs_search_slot+0x2f9/0xe60 [btrfs] btrfs_lookup_csum+0x75/0x160 [btrfs] btrfs_lookup_bio_sums+0x21a/0x560 [btrfs] btrfs_submit_chunk+0x152/0x680 [btrfs] btrfs_submit_bio+0x1c/0x50 [btrfs] submit_one_bio+0x40/0x80 [btrfs] submit_extent_page+0x158/0x390 [btrfs] btrfs_do_readpage+0x330/0x740 [btrfs] extent_readahead+0x38d/0x6c0 [btrfs] read_pages+0x94/0x2c0 page_cache_ra_unbounded+0x12d/0x190 relocate_file_extent_cluster+0x7c1/0x9d0 [btrfs] relocate_block_group+0x2d3/0x560 [btrfs] btrfs_relocate_block_group+0x2c7/0x4b0 [btrfs] btrfs_relocate_chunk+0x4c/0x1a0 [btrfs] btrfs_balance+0x925/0x13c0 [btrfs] btrfs_ioctl+0x19f1/0x25d0 [btrfs] __x64_sys_ioctl+0x90/0xd0 do_syscall_64+0x3f/0xf0 entry_SYSCALL_64_after_hwframe+0x6e/0x76 [CAUSE] The dying line is at btrfs_repair_io_failure() call inside btrfs_repair_eb_io_failure(). The function is still relying on the extent buffer using page sized folios. When the extent buffer is using larger folio, we go into the 2nd slot of folios[], and triggered the NULL pointer dereference. [FIX] Migrate btrfs_repair_io_failure() to folio interfaces. So that when we hit a larger folio, we just submit the whole folio in one go. This also affects data repair path through btrfs_end_repair_bio(), thankfully data is still fully page based, we can just add an ASSERT(), and use page_folio() to convert the page to folio. Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
112 lines
3.0 KiB
C
112 lines
3.0 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* Copyright (C) 2007 Oracle. All rights reserved.
|
|
* Copyright (C) 2022 Christoph Hellwig.
|
|
*/
|
|
|
|
#ifndef BTRFS_BIO_H
|
|
#define BTRFS_BIO_H
|
|
|
|
#include <linux/bio.h>
|
|
#include <linux/workqueue.h>
|
|
#include "tree-checker.h"
|
|
|
|
struct btrfs_bio;
|
|
struct btrfs_fs_info;
|
|
|
|
#define BTRFS_BIO_INLINE_CSUM_SIZE 64
|
|
|
|
/*
|
|
* Maximum number of sectors for a single bio to limit the size of the
|
|
* checksum array. This matches the number of bio_vecs per bio and thus the
|
|
* I/O size for buffered I/O.
|
|
*/
|
|
#define BTRFS_MAX_BIO_SECTORS (256)
|
|
|
|
typedef void (*btrfs_bio_end_io_t)(struct btrfs_bio *bbio);
|
|
|
|
/*
|
|
* Highlevel btrfs I/O structure. It is allocated by btrfs_bio_alloc and
|
|
* passed to btrfs_submit_bio for mapping to the physical devices.
|
|
*/
|
|
struct btrfs_bio {
|
|
/*
|
|
* Inode and offset into it that this I/O operates on.
|
|
* Only set for data I/O.
|
|
*/
|
|
struct btrfs_inode *inode;
|
|
u64 file_offset;
|
|
|
|
union {
|
|
/*
|
|
* For data reads: checksumming and original I/O information.
|
|
* (for internal use in the btrfs_submit_bio machinery only)
|
|
*/
|
|
struct {
|
|
u8 *csum;
|
|
u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
|
|
struct bvec_iter saved_iter;
|
|
};
|
|
|
|
/*
|
|
* For data writes:
|
|
* - ordered extent covering the bio
|
|
* - pointer to the checksums for this bio
|
|
* - original physical address from the allocator
|
|
* (for zone append only)
|
|
*/
|
|
struct {
|
|
struct btrfs_ordered_extent *ordered;
|
|
struct btrfs_ordered_sum *sums;
|
|
u64 orig_physical;
|
|
};
|
|
|
|
/* For metadata reads: parentness verification. */
|
|
struct btrfs_tree_parent_check parent_check;
|
|
};
|
|
|
|
/* End I/O information supplied to btrfs_bio_alloc */
|
|
btrfs_bio_end_io_t end_io;
|
|
void *private;
|
|
|
|
/* For internal use in read end I/O handling */
|
|
unsigned int mirror_num;
|
|
atomic_t pending_ios;
|
|
struct work_struct end_io_work;
|
|
|
|
/* File system that this I/O operates on. */
|
|
struct btrfs_fs_info *fs_info;
|
|
|
|
/*
|
|
* This member must come last, bio_alloc_bioset will allocate enough
|
|
* bytes for entire btrfs_bio but relies on bio being last.
|
|
*/
|
|
struct bio bio;
|
|
};
|
|
|
|
static inline struct btrfs_bio *btrfs_bio(struct bio *bio)
|
|
{
|
|
return container_of(bio, struct btrfs_bio, bio);
|
|
}
|
|
|
|
int __init btrfs_bioset_init(void);
|
|
void __cold btrfs_bioset_exit(void);
|
|
|
|
void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_fs_info *fs_info,
|
|
btrfs_bio_end_io_t end_io, void *private);
|
|
struct btrfs_bio *btrfs_bio_alloc(unsigned int nr_vecs, blk_opf_t opf,
|
|
struct btrfs_fs_info *fs_info,
|
|
btrfs_bio_end_io_t end_io, void *private);
|
|
void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status);
|
|
|
|
/* Submit using blkcg_punt_bio_submit. */
|
|
#define REQ_BTRFS_CGROUP_PUNT REQ_FS_PRIVATE
|
|
|
|
void btrfs_submit_bio(struct btrfs_bio *bbio, int mirror_num);
|
|
void btrfs_submit_repair_write(struct btrfs_bio *bbio, int mirror_num, bool dev_replace);
|
|
int btrfs_repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start,
|
|
u64 length, u64 logical, struct folio *folio,
|
|
unsigned int folio_offset, int mirror_num);
|
|
|
|
#endif
|