mirror of
https://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
synced 2025-01-17 13:58:46 +00:00
f238b8c33c
Commit d0637c505f8a1 ("arm64: enable THP_SWAP for arm64") brings up THP_SWAP on ARM64, but it doesn't enable THP_SWP on hardware with MTE as the MTE code works with the assumption tags save/restore is always handling a folio with only one page. The limitation should be removed as more and more ARM64 SoCs have this feature. Co-existence of MTE and THP_SWAP becomes more and more important. This patch makes MTE tags saving support large folios, then we don't need to split large folios into base pages for swapping out on ARM64 SoCs with MTE any more. arch_prepare_to_swap() should take folio rather than page as parameter because we support THP swap-out as a whole. It saves tags for all pages in a large folio. As now we are restoring tags based-on folio, in arch_swap_restore(), we may increase some extra loops and early-exitings while refaulting a large folio which is still in swapcache in do_swap_page(). In case a large folio has nr pages, do_swap_page() will only set the PTE of the particular page which is causing the page fault. Thus do_swap_page() runs nr times, and each time, arch_swap_restore() will loop nr times for those subpages in the folio. So right now the algorithmic complexity becomes O(nr^2). Once we support mapping large folios in do_swap_page(), extra loops and early-exitings will decrease while not being completely removed as a large folio might get partially tagged in corner cases such as, 1. a large folio in swapcache can be partially unmapped, thus, MTE tags for the unmapped pages will be invalidated; 2. users might use mprotect() to set MTEs on a part of a large folio. arch_thp_swp_supported() is dropped since ARM64 MTE was the only one who needed it. Link: https://lkml.kernel.org/r/20240322114136.61386-2-21cnbao@gmail.com Signed-off-by: Barry Song <v-songbaohua@oppo.com> Reviewed-by: Steven Price <steven.price@arm.com> Acked-by: Chris Li <chrisl@kernel.org> Reviewed-by: Ryan Roberts <ryan.roberts@arm.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will@kernel.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: David Hildenbrand <david@redhat.com> Cc: Kemeng Shi <shikemeng@huaweicloud.com> Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org> Cc: Anshuman Khandual <anshuman.khandual@arm.com> Cc: Peter Collingbourne <pcc@google.com> Cc: Yosry Ahmed <yosryahmed@google.com> Cc: Peter Xu <peterx@redhat.com> Cc: Lorenzo Stoakes <lstoakes@gmail.com> Cc: "Mike Rapoport (IBM)" <rppt@kernel.org> Cc: Hugh Dickins <hughd@google.com> Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com> Cc: Rick Edgecombe <rick.p.edgecombe@intel.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
131 lines
2.5 KiB
C
131 lines
2.5 KiB
C
// SPDX-License-Identifier: GPL-2.0-only
|
|
|
|
#include <linux/pagemap.h>
|
|
#include <linux/xarray.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/swap.h>
|
|
#include <linux/swapops.h>
|
|
#include <asm/mte.h>
|
|
|
|
static DEFINE_XARRAY(mte_pages);
|
|
|
|
void *mte_allocate_tag_storage(void)
|
|
{
|
|
/* tags granule is 16 bytes, 2 tags stored per byte */
|
|
return kmalloc(MTE_PAGE_TAG_STORAGE, GFP_KERNEL);
|
|
}
|
|
|
|
void mte_free_tag_storage(char *storage)
|
|
{
|
|
kfree(storage);
|
|
}
|
|
|
|
int mte_save_tags(struct page *page)
|
|
{
|
|
void *tag_storage, *ret;
|
|
|
|
if (!page_mte_tagged(page))
|
|
return 0;
|
|
|
|
tag_storage = mte_allocate_tag_storage();
|
|
if (!tag_storage)
|
|
return -ENOMEM;
|
|
|
|
mte_save_page_tags(page_address(page), tag_storage);
|
|
|
|
/* lookup the swap entry.val from the page */
|
|
ret = xa_store(&mte_pages, page_swap_entry(page).val, tag_storage,
|
|
GFP_KERNEL);
|
|
if (WARN(xa_is_err(ret), "Failed to store MTE tags")) {
|
|
mte_free_tag_storage(tag_storage);
|
|
return xa_err(ret);
|
|
} else if (ret) {
|
|
/* Entry is being replaced, free the old entry */
|
|
mte_free_tag_storage(ret);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void mte_restore_tags(swp_entry_t entry, struct page *page)
|
|
{
|
|
void *tags = xa_load(&mte_pages, entry.val);
|
|
|
|
if (!tags)
|
|
return;
|
|
|
|
if (try_page_mte_tagging(page)) {
|
|
mte_restore_page_tags(page_address(page), tags);
|
|
set_page_mte_tagged(page);
|
|
}
|
|
}
|
|
|
|
void mte_invalidate_tags(int type, pgoff_t offset)
|
|
{
|
|
swp_entry_t entry = swp_entry(type, offset);
|
|
void *tags = xa_erase(&mte_pages, entry.val);
|
|
|
|
mte_free_tag_storage(tags);
|
|
}
|
|
|
|
static inline void __mte_invalidate_tags(struct page *page)
|
|
{
|
|
swp_entry_t entry = page_swap_entry(page);
|
|
|
|
mte_invalidate_tags(swp_type(entry), swp_offset(entry));
|
|
}
|
|
|
|
void mte_invalidate_tags_area(int type)
|
|
{
|
|
swp_entry_t entry = swp_entry(type, 0);
|
|
swp_entry_t last_entry = swp_entry(type + 1, 0);
|
|
void *tags;
|
|
|
|
XA_STATE(xa_state, &mte_pages, entry.val);
|
|
|
|
xa_lock(&mte_pages);
|
|
xas_for_each(&xa_state, tags, last_entry.val - 1) {
|
|
__xa_erase(&mte_pages, xa_state.xa_index);
|
|
mte_free_tag_storage(tags);
|
|
}
|
|
xa_unlock(&mte_pages);
|
|
}
|
|
|
|
int arch_prepare_to_swap(struct folio *folio)
|
|
{
|
|
long i, nr;
|
|
int err;
|
|
|
|
if (!system_supports_mte())
|
|
return 0;
|
|
|
|
nr = folio_nr_pages(folio);
|
|
|
|
for (i = 0; i < nr; i++) {
|
|
err = mte_save_tags(folio_page(folio, i));
|
|
if (err)
|
|
goto out;
|
|
}
|
|
return 0;
|
|
|
|
out:
|
|
while (i--)
|
|
__mte_invalidate_tags(folio_page(folio, i));
|
|
return err;
|
|
}
|
|
|
|
void arch_swap_restore(swp_entry_t entry, struct folio *folio)
|
|
{
|
|
long i, nr;
|
|
|
|
if (!system_supports_mte())
|
|
return;
|
|
|
|
nr = folio_nr_pages(folio);
|
|
|
|
for (i = 0; i < nr; i++) {
|
|
mte_restore_tags(entry, folio_page(folio, i));
|
|
entry.val++;
|
|
}
|
|
}
|