2018-04-03 19:16:55 +02:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0 */
|
2017-10-09 01:51:02 +00:00
|
|
|
/*
|
|
|
|
* Copyright (C) Qu Wenruo 2017. All rights reserved.
|
|
|
|
*/
|
|
|
|
|
2018-04-03 19:16:55 +02:00
|
|
|
#ifndef BTRFS_TREE_CHECKER_H
|
|
|
|
#define BTRFS_TREE_CHECKER_H
|
2017-10-09 01:51:02 +00:00
|
|
|
|
2024-01-27 03:19:56 +01:00
|
|
|
#include <linux/types.h>
|
2022-11-15 10:44:04 +01:00
|
|
|
#include <uapi/linux/btrfs_tree.h>
|
|
|
|
|
|
|
|
struct extent_buffer;
|
btrfs: validate system chunk array at btrfs_validate_super()
Currently btrfs_validate_super() only does a very basic check on the
array chunk size (not too large than the available space, but not too
small to contain no chunk).
The more comprehensive checks (the regular chunk checks and size check
inside the system chunk array) are all done inside btrfs_read_sys_array().
It's not a big deal, but it also means we do not do any validation on
the system chunk array at super block writeback time either.
Do the following modification to centralize the system chunk array
checks into btrfs_validate_super():
- Make chunk_err() helper accept stack chunk pointer
If @leaf parameter is NULL, then the @chunk pointer will be a pointer
to the chunk item, other than the offset inside the leaf.
And since @leaf can be NULL, add a new @fs_info parameter for that
case.
- Make btrfs_check_chunk_valid() handle stack chunk pointer
The same as chunk_err(), a new @fs_info parameter, and if @leaf is
NULL, then @chunk will be a pointer to a stack chunk.
If @chunk is NULL, then all needed btrfs_chunk members will be read
using the stack helper instead of the leaf helper.
This means we need to read out all the needed member at the beginning
of the function.
Furthermore, at super block read time, fs_info->sectorsize is not yet
initialized, we need one extra @sectorsize parameter to grab the
correct sectorsize.
- Introduce a helper validate_sys_chunk_array()
* Validate the disk key.
* Validate the size before we access the full chunk items.
* Do the full chunk item validation.
- Call validate_sys_chunk_array() at btrfs_validate_super()
- Simplify the checks inside btrfs_read_sys_array()
Now the checks will be converted to an ASSERT().
- Simplify the checks inside read_one_chunk()
Now that all chunk items inside system chunk array and chunk tree are
verified, there is no need to verify them again inside read_one_chunk().
This change has the following advantages:
- More comprehensive checks at write time
And unlike the sys_chunk_array read routine, this time we do not need
to allocate a dummy extent buffer to do the check.
All the checks done here require no new memory allocation.
- Slightly improved readability when iterating the system chunk array
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2024-12-21 16:15:19 +10:30
|
|
|
struct btrfs_fs_info;
|
2022-11-15 10:44:04 +01:00
|
|
|
struct btrfs_chunk;
|
2024-01-27 03:19:56 +01:00
|
|
|
struct btrfs_key;
|
2022-11-15 10:44:04 +01:00
|
|
|
|
|
|
|
/* All the extra info needed to verify the parentness of a tree block. */
|
|
|
|
struct btrfs_tree_parent_check {
|
|
|
|
/*
|
|
|
|
* The owner check against the tree block.
|
|
|
|
*
|
|
|
|
* Can be 0 to skip the owner check.
|
|
|
|
*/
|
|
|
|
u64 owner_root;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Expected transid, can be 0 to skip the check, but such skip
|
2023-12-05 19:26:39 +01:00
|
|
|
* should only be utilized for backref walk related code.
|
2022-11-15 10:44:04 +01:00
|
|
|
*/
|
|
|
|
u64 transid;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The expected first key.
|
|
|
|
*
|
|
|
|
* This check can be skipped if @has_first_key is false, such skip
|
|
|
|
* can happen for case where we don't have the parent node key,
|
|
|
|
* e.g. reading the tree root, doing backref walk.
|
|
|
|
*/
|
|
|
|
struct btrfs_key first_key;
|
|
|
|
bool has_first_key;
|
|
|
|
|
|
|
|
/* The expected level. Should always be set. */
|
|
|
|
u8 level;
|
|
|
|
};
|
2017-10-09 01:51:02 +00:00
|
|
|
|
2023-04-29 16:07:13 -04:00
|
|
|
enum btrfs_tree_block_status {
|
|
|
|
BTRFS_TREE_BLOCK_CLEAN,
|
|
|
|
BTRFS_TREE_BLOCK_INVALID_NRITEMS,
|
|
|
|
BTRFS_TREE_BLOCK_INVALID_PARENT_KEY,
|
|
|
|
BTRFS_TREE_BLOCK_BAD_KEY_ORDER,
|
|
|
|
BTRFS_TREE_BLOCK_INVALID_LEVEL,
|
|
|
|
BTRFS_TREE_BLOCK_INVALID_FREE_SPACE,
|
|
|
|
BTRFS_TREE_BLOCK_INVALID_OFFSETS,
|
|
|
|
BTRFS_TREE_BLOCK_INVALID_BLOCKPTR,
|
|
|
|
BTRFS_TREE_BLOCK_INVALID_ITEM,
|
|
|
|
BTRFS_TREE_BLOCK_INVALID_OWNER,
|
btrfs: make sure that WRITTEN is set on all metadata blocks
We previously would call btrfs_check_leaf() if we had the check
integrity code enabled, which meant that we could only run the extended
leaf checks if we had WRITTEN set on the header flags.
This leaves a gap in our checking, because we could end up with
corruption on disk where WRITTEN isn't set on the leaf, and then the
extended leaf checks don't get run which we rely on to validate all of
the item pointers to make sure we don't access memory outside of the
extent buffer.
However, since 732fab95abe2 ("btrfs: check-integrity: remove
CONFIG_BTRFS_FS_CHECK_INTEGRITY option") we no longer call
btrfs_check_leaf() from btrfs_mark_buffer_dirty(), which means we only
ever call it on blocks that are being written out, and thus have WRITTEN
set, or that are being read in, which should have WRITTEN set.
Add checks to make sure we have WRITTEN set appropriately, and then make
sure __btrfs_check_leaf() always does the item checking. This will
protect us from file systems that have been corrupted and no longer have
WRITTEN set on some of the blocks.
This was hit on a crafted image tweaking the WRITTEN bit and reported by
KASAN as out-of-bound access in the eb accessors. The example is a dir
item at the end of an eb.
[2.042] BTRFS warning (device loop1): bad eb member start: ptr 0x3fff start 30572544 member offset 16410 size 2
[2.040] general protection fault, probably for non-canonical address 0xe0009d1000000003: 0000 [#1] PREEMPT SMP KASAN NOPTI
[2.537] KASAN: maybe wild-memory-access in range [0x0005088000000018-0x000508800000001f]
[2.729] CPU: 0 PID: 2587 Comm: mount Not tainted 6.8.2 #1
[2.729] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
[2.621] RIP: 0010:btrfs_get_16+0x34b/0x6d0
[2.621] RSP: 0018:ffff88810871fab8 EFLAGS: 00000206
[2.621] RAX: 0000a11000000003 RBX: ffff888104ff8720 RCX: ffff88811b2288c0
[2.621] RDX: dffffc0000000000 RSI: ffffffff81dd8aca RDI: ffff88810871f748
[2.621] RBP: 000000000000401a R08: 0000000000000001 R09: ffffed10210e3ee9
[2.621] R10: ffff88810871f74f R11: 205d323430333737 R12: 000000000000001a
[2.621] R13: 000508800000001a R14: 1ffff110210e3f5d R15: ffffffff850011e8
[2.621] FS: 00007f56ea275840(0000) GS:ffff88811b200000(0000) knlGS:0000000000000000
[2.621] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[2.621] CR2: 00007febd13b75c0 CR3: 000000010bb50000 CR4: 00000000000006f0
[2.621] Call Trace:
[2.621] <TASK>
[2.621] ? show_regs+0x74/0x80
[2.621] ? die_addr+0x46/0xc0
[2.621] ? exc_general_protection+0x161/0x2a0
[2.621] ? asm_exc_general_protection+0x26/0x30
[2.621] ? btrfs_get_16+0x33a/0x6d0
[2.621] ? btrfs_get_16+0x34b/0x6d0
[2.621] ? btrfs_get_16+0x33a/0x6d0
[2.621] ? __pfx_btrfs_get_16+0x10/0x10
[2.621] ? __pfx_mutex_unlock+0x10/0x10
[2.621] btrfs_match_dir_item_name+0x101/0x1a0
[2.621] btrfs_lookup_dir_item+0x1f3/0x280
[2.621] ? __pfx_btrfs_lookup_dir_item+0x10/0x10
[2.621] btrfs_get_tree+0xd25/0x1910
Reported-by: lei lu <llfamsec@gmail.com>
CC: stable@vger.kernel.org # 6.7+
Reviewed-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
[ copy more details from report ]
Signed-off-by: David Sterba <dsterba@suse.com>
2024-04-29 09:03:35 -04:00
|
|
|
BTRFS_TREE_BLOCK_WRITTEN_NOT_SET,
|
2023-04-29 16:07:13 -04:00
|
|
|
};
|
|
|
|
|
2023-04-29 16:07:15 -04:00
|
|
|
/*
|
|
|
|
* Exported simply for btrfs-progs which wants to have the
|
|
|
|
* btrfs_tree_block_status return codes.
|
|
|
|
*/
|
|
|
|
enum btrfs_tree_block_status __btrfs_check_leaf(struct extent_buffer *leaf);
|
2023-04-29 16:07:16 -04:00
|
|
|
enum btrfs_tree_block_status __btrfs_check_node(struct extent_buffer *node);
|
2023-04-29 16:07:15 -04:00
|
|
|
|
2023-04-29 16:07:12 -04:00
|
|
|
int btrfs_check_leaf(struct extent_buffer *leaf);
|
2019-03-20 16:25:00 +01:00
|
|
|
int btrfs_check_node(struct extent_buffer *node);
|
2017-10-09 01:51:02 +00:00
|
|
|
|
btrfs: validate system chunk array at btrfs_validate_super()
Currently btrfs_validate_super() only does a very basic check on the
array chunk size (not too large than the available space, but not too
small to contain no chunk).
The more comprehensive checks (the regular chunk checks and size check
inside the system chunk array) are all done inside btrfs_read_sys_array().
It's not a big deal, but it also means we do not do any validation on
the system chunk array at super block writeback time either.
Do the following modification to centralize the system chunk array
checks into btrfs_validate_super():
- Make chunk_err() helper accept stack chunk pointer
If @leaf parameter is NULL, then the @chunk pointer will be a pointer
to the chunk item, other than the offset inside the leaf.
And since @leaf can be NULL, add a new @fs_info parameter for that
case.
- Make btrfs_check_chunk_valid() handle stack chunk pointer
The same as chunk_err(), a new @fs_info parameter, and if @leaf is
NULL, then @chunk will be a pointer to a stack chunk.
If @chunk is NULL, then all needed btrfs_chunk members will be read
using the stack helper instead of the leaf helper.
This means we need to read out all the needed member at the beginning
of the function.
Furthermore, at super block read time, fs_info->sectorsize is not yet
initialized, we need one extra @sectorsize parameter to grab the
correct sectorsize.
- Introduce a helper validate_sys_chunk_array()
* Validate the disk key.
* Validate the size before we access the full chunk items.
* Do the full chunk item validation.
- Call validate_sys_chunk_array() at btrfs_validate_super()
- Simplify the checks inside btrfs_read_sys_array()
Now the checks will be converted to an ASSERT().
- Simplify the checks inside read_one_chunk()
Now that all chunk items inside system chunk array and chunk tree are
verified, there is no need to verify them again inside read_one_chunk().
This change has the following advantages:
- More comprehensive checks at write time
And unlike the sys_chunk_array read routine, this time we do not need
to allocate a dummy extent buffer to do the check.
All the checks done here require no new memory allocation.
- Slightly improved readability when iterating the system chunk array
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2024-12-21 16:15:19 +10:30
|
|
|
int btrfs_check_chunk_valid(const struct btrfs_fs_info *fs_info,
|
|
|
|
const struct extent_buffer *leaf,
|
|
|
|
const struct btrfs_chunk *chunk, u64 logical,
|
|
|
|
u32 sectorsize);
|
btrfs: tree-checker: check extent buffer owner against owner rootid
Btrfs doesn't check whether the tree block respects the root owner.
This means, if a tree block referred by a parent in extent tree, but has
owner of 5, btrfs can still continue reading the tree block, as long as
it doesn't trigger other sanity checks.
Normally this is fine, but combined with the empty tree check in
check_leaf(), if we hit an empty extent tree, but the root node has
csum tree owner, we can let such extent buffer to sneak in.
Shrink the hole by:
- Do extra eb owner check at tree read time
- Make sure the root owner extent buffer exactly matches the root id.
Unfortunately we can't yet completely patch the hole, there are several
call sites can't pass all info we need:
- For reloc/log trees
Their owner is key::offset, not key::objectid.
We need the full root key to do that accurate check.
For now, we just skip the ownership check for those trees.
- For add_data_references() of relocation
That call site doesn't have any parent/ownership info, as all the
bytenrs are all from btrfs_find_all_leafs().
- For direct backref items walk
Direct backref items records the parent bytenr directly, thus unlike
indirect backref item, we don't do a full tree search.
Thus in that case, we don't have full parent owner to check.
For the later two cases, they all pass 0 as @owner_root, thus we can
skip those cases if @owner_root is 0.
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
2022-03-16 08:05:58 +08:00
|
|
|
int btrfs_check_eb_owner(const struct extent_buffer *eb, u64 root_owner);
|
2024-10-16 10:57:48 +01:00
|
|
|
int btrfs_verify_level_key(struct extent_buffer *eb,
|
|
|
|
const struct btrfs_tree_parent_check *check);
|
2019-03-20 13:16:42 +08:00
|
|
|
|
2017-10-09 01:51:02 +00:00
|
|
|
#endif
|