mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2024-12-29 09:12:07 +00:00
Xarray: do not return sibling entries from xas_find_marked()
Patch series "Fixes and cleanups to xarray", v3.
This series contains some random fixes and cleanups to xarray. Patch 1-2
are fixes and patch 3-6 are cleanups. More details can be found in
respective patches.
This patch (of 5):
Similar to issue fixed in commit cbc0285433
("XArray: Do not return
sibling entries from xa_load()"), we may return sibling entries from
xas_find_marked as following:
Thread A: Thread B:
xa_store_range(xa, entry, 6, 7, gfp);
xa_set_mark(xa, 6, mark)
XA_STATE(xas, xa, 6);
xas_find_marked(&xas, 7, mark);
offset = xas_find_chunk(xas, advance, mark);
[offset is 6 which points to a valid entry]
xa_store_range(xa, entry, 4, 7, gfp);
entry = xa_entry(xa, node, 6);
[entry is a sibling of 4]
if (!xa_is_node(entry))
return entry;
Skip sibling entry like xas_find() does to protect caller from seeing
sibling entry from xas_find_marked() or caller may use sibling entry as a
valid entry and crash the kernel.
Besides, load_race() test is modified to catch mentioned issue and
modified load_race() only passes after this fix is merged.
Here is an example how this bug could be triggerred in theory in nfs which
enables large folio in mapping:
Let's take a look at involved racer:
1. How pages could be created and dirtied in nfs.
write
ksys_write
vfs_write
new_sync_write
nfs_file_write
generic_perform_write
nfs_write_begin
fgf_set_order
__filemap_get_folio
nfs_write_end
nfs_update_folio
nfs_writepage_setup
nfs_mark_request_dirty
filemap_dirty_folio
__folio_mark_dirty
__xa_set_mark
2. How dirty pages could be deleted in nfs.
ioctl
do_vfs_ioctl
file_ioctl
ioctl_preallocate
vfs_fallocate
nfs42_fallocate
nfs42_proc_deallocate
truncate_pagecache_range
truncate_inode_pages_range
truncate_inode_folio
filemap_remove_folio
page_cache_delete
xas_store(&xas, NULL);
3. How dirty pages could be lockless searched
sync_file_range
ksys_sync_file_range
__filemap_fdatawrite_range
filemap_fdatawrite_wbc
do_writepages
writeback_use_writepage
writeback_iter
writeback_get_folio
filemap_get_folios_tag
find_get_entry
folio = xas_find_marked()
folio_try_get(folio)
In theory, kernel will crash as following:
1.Create 2.Search 3.Delete
/* write page 2,3 */
write
...
nfs_write_begin
fgf_set_order
__filemap_get_folio
...
/* index = 2, order = 1 */
xa_store(&xas, folio)
nfs_write_end
...
__folio_mark_dirty
/* sync page 2 and page 3 */
sync_file_range
...
find_get_entry
folio = xas_find_marked()
/* offset will be 2 */
offset = xas_find_chunk()
/* delete page 2 and page 3 */
ioctl
...
xas_store(&xas, NULL);
/* write page 0-3 */
write
...
nfs_write_begin
fgf_set_order
__filemap_get_folio
...
/* index = 0, order = 2 */
xa_store(&xas, folio)
nfs_write_end
...
__folio_mark_dirty
/* get sibling entry from offset 2 */
entry = xa_entry(.., 2)
/* use sibling entry as folio and crash kernel */
folio_try_get(folio)
Link: https://lkml.kernel.org/r/20241218154613.58754-2-shikemeng@huaweicloud.com
Link: https://lkml.kernel.org/r/20241213122523.12764-1-shikemeng@huaweicloud.com
Link: https://lkml.kernel.org/r/20241213122523.12764-2-shikemeng@huaweicloud.com
Signed-off-by: Kemeng Shi <shikemeng@huaweicloud.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
649d7ad103
commit
e6b6abc540
@ -1387,6 +1387,8 @@ void *xas_find_marked(struct xa_state *xas, unsigned long max, xa_mark_t mark)
|
||||
entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
|
||||
if (!entry && !(xa_track_free(xas->xa) && mark == XA_FREE_MARK))
|
||||
continue;
|
||||
if (xa_is_sibling(entry))
|
||||
continue;
|
||||
if (!xa_is_node(entry))
|
||||
return entry;
|
||||
xas->xa_node = xa_to_node(entry);
|
||||
|
@ -227,6 +227,7 @@ static void *load_creator(void *ptr)
|
||||
unsigned long index = (3 << RADIX_TREE_MAP_SHIFT) -
|
||||
(1 << order);
|
||||
item_insert_order(tree, index, order);
|
||||
xa_set_mark(tree, index, XA_MARK_1);
|
||||
item_delete_rcu(tree, index);
|
||||
}
|
||||
}
|
||||
@ -242,8 +243,11 @@ static void *load_worker(void *ptr)
|
||||
|
||||
rcu_register_thread();
|
||||
while (!stop_iteration) {
|
||||
unsigned long find_index = (2 << RADIX_TREE_MAP_SHIFT) + 1;
|
||||
struct item *item = xa_load(ptr, index);
|
||||
assert(!xa_is_internal(item));
|
||||
item = xa_find(ptr, &find_index, index, XA_MARK_1);
|
||||
assert(!xa_is_internal(item));
|
||||
}
|
||||
rcu_unregister_thread();
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user