linux-next/arch/arm64/mm/mteswap.c
Barry Song f238b8c33c arm64: mm: swap: support THP_SWAP on hardware with MTE
Commit d0637c505f8a1 ("arm64: enable THP_SWAP for arm64") brings up
THP_SWAP on ARM64, but it doesn't enable THP_SWP on hardware with MTE as
the MTE code works with the assumption tags save/restore is always
handling a folio with only one page.

The limitation should be removed as more and more ARM64 SoCs have this
feature.  Co-existence of MTE and THP_SWAP becomes more and more
important.

This patch makes MTE tags saving support large folios, then we don't need
to split large folios into base pages for swapping out on ARM64 SoCs with
MTE any more.

arch_prepare_to_swap() should take folio rather than page as parameter
because we support THP swap-out as a whole.  It saves tags for all pages
in a large folio.

As now we are restoring tags based-on folio, in arch_swap_restore(), we
may increase some extra loops and early-exitings while refaulting a large
folio which is still in swapcache in do_swap_page().  In case a large
folio has nr pages, do_swap_page() will only set the PTE of the particular
page which is causing the page fault.  Thus do_swap_page() runs nr times,
and each time, arch_swap_restore() will loop nr times for those subpages
in the folio.  So right now the algorithmic complexity becomes O(nr^2).

Once we support mapping large folios in do_swap_page(), extra loops and
early-exitings will decrease while not being completely removed as a large
folio might get partially tagged in corner cases such as, 1.  a large
folio in swapcache can be partially unmapped, thus, MTE tags for the
unmapped pages will be invalidated; 2.  users might use mprotect() to set
MTEs on a part of a large folio.

arch_thp_swp_supported() is dropped since ARM64 MTE was the only one who
needed it.

Link: https://lkml.kernel.org/r/20240322114136.61386-2-21cnbao@gmail.com
Signed-off-by: Barry Song <v-songbaohua@oppo.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Acked-by: Chris Li <chrisl@kernel.org>
Reviewed-by: Ryan Roberts <ryan.roberts@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Kemeng Shi <shikemeng@huaweicloud.com>
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Peter Collingbourne <pcc@google.com>
Cc: Yosry Ahmed <yosryahmed@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Lorenzo Stoakes <lstoakes@gmail.com>
Cc: "Mike Rapoport (IBM)" <rppt@kernel.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.ibm.com>
Cc: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-04-25 20:56:07 -07:00

131 lines
2.5 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
#include <linux/pagemap.h>
#include <linux/xarray.h>
#include <linux/slab.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <asm/mte.h>
static DEFINE_XARRAY(mte_pages);
void *mte_allocate_tag_storage(void)
{
/* tags granule is 16 bytes, 2 tags stored per byte */
return kmalloc(MTE_PAGE_TAG_STORAGE, GFP_KERNEL);
}
void mte_free_tag_storage(char *storage)
{
kfree(storage);
}
int mte_save_tags(struct page *page)
{
void *tag_storage, *ret;
if (!page_mte_tagged(page))
return 0;
tag_storage = mte_allocate_tag_storage();
if (!tag_storage)
return -ENOMEM;
mte_save_page_tags(page_address(page), tag_storage);
/* lookup the swap entry.val from the page */
ret = xa_store(&mte_pages, page_swap_entry(page).val, tag_storage,
GFP_KERNEL);
if (WARN(xa_is_err(ret), "Failed to store MTE tags")) {
mte_free_tag_storage(tag_storage);
return xa_err(ret);
} else if (ret) {
/* Entry is being replaced, free the old entry */
mte_free_tag_storage(ret);
}
return 0;
}
void mte_restore_tags(swp_entry_t entry, struct page *page)
{
void *tags = xa_load(&mte_pages, entry.val);
if (!tags)
return;
if (try_page_mte_tagging(page)) {
mte_restore_page_tags(page_address(page), tags);
set_page_mte_tagged(page);
}
}
void mte_invalidate_tags(int type, pgoff_t offset)
{
swp_entry_t entry = swp_entry(type, offset);
void *tags = xa_erase(&mte_pages, entry.val);
mte_free_tag_storage(tags);
}
static inline void __mte_invalidate_tags(struct page *page)
{
swp_entry_t entry = page_swap_entry(page);
mte_invalidate_tags(swp_type(entry), swp_offset(entry));
}
void mte_invalidate_tags_area(int type)
{
swp_entry_t entry = swp_entry(type, 0);
swp_entry_t last_entry = swp_entry(type + 1, 0);
void *tags;
XA_STATE(xa_state, &mte_pages, entry.val);
xa_lock(&mte_pages);
xas_for_each(&xa_state, tags, last_entry.val - 1) {
__xa_erase(&mte_pages, xa_state.xa_index);
mte_free_tag_storage(tags);
}
xa_unlock(&mte_pages);
}
int arch_prepare_to_swap(struct folio *folio)
{
long i, nr;
int err;
if (!system_supports_mte())
return 0;
nr = folio_nr_pages(folio);
for (i = 0; i < nr; i++) {
err = mte_save_tags(folio_page(folio, i));
if (err)
goto out;
}
return 0;
out:
while (i--)
__mte_invalidate_tags(folio_page(folio, i));
return err;
}
void arch_swap_restore(swp_entry_t entry, struct folio *folio)
{
long i, nr;
if (!system_supports_mte())
return;
nr = folio_nr_pages(folio);
for (i = 0; i < nr; i++) {
mte_restore_tags(entry, folio_page(folio, i));
entry.val++;
}
}