Ryan Roberts 5a00bfd6a5 arm64/mm: new ptep layer to manage contig bit
Create a new layer for the in-table PTE manipulation APIs.  For now, The
existing API is prefixed with double underscore to become the arch-private
API and the public API is just a simple wrapper that calls the private
API.

The public API implementation will subsequently be used to transparently
manipulate the contiguous bit where appropriate.  But since there are
already some contig-aware users (e.g.  hugetlb, kernel mapper), we must
first ensure those users use the private API directly so that the future
contig-bit manipulations in the public API do not interfere with those
existing uses.

The following APIs are treated this way:

 - ptep_get
 - set_pte
 - set_ptes
 - pte_clear
 - ptep_get_and_clear
 - ptep_test_and_clear_young
 - ptep_clear_flush_young
 - ptep_set_wrprotect
 - ptep_set_access_flags

Link: https://lkml.kernel.org/r/20240215103205.2607016-11-ryan.roberts@arm.com
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
Tested-by: John Hubbard <jhubbard@nvidia.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Barry Song <21cnbao@gmail.com>
Cc: Borislav Petkov (AMD) <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Morse <james.morse@arm.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Will Deacon <will@kernel.org>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-02-22 15:27:18 -08:00

219 lines
5.9 KiB
C

// SPDX-License-Identifier: GPL-2.0-only
/*
* Extensible Firmware Interface
*
* Based on Extensible Firmware Interface Specification version 2.4
*
* Copyright (C) 2013, 2014 Linaro Ltd.
*/
#include <linux/efi.h>
#include <linux/init.h>
#include <linux/screen_info.h>
#include <asm/efi.h>
#include <asm/stacktrace.h>
static bool region_is_misaligned(const efi_memory_desc_t *md)
{
if (PAGE_SIZE == EFI_PAGE_SIZE)
return false;
return !PAGE_ALIGNED(md->phys_addr) ||
!PAGE_ALIGNED(md->num_pages << EFI_PAGE_SHIFT);
}
/*
* Only regions of type EFI_RUNTIME_SERVICES_CODE need to be
* executable, everything else can be mapped with the XN bits
* set. Also take the new (optional) RO/XP bits into account.
*/
static __init pteval_t create_mapping_protection(efi_memory_desc_t *md)
{
u64 attr = md->attribute;
u32 type = md->type;
if (type == EFI_MEMORY_MAPPED_IO)
return PROT_DEVICE_nGnRE;
if (region_is_misaligned(md)) {
static bool __initdata code_is_misaligned;
/*
* Regions that are not aligned to the OS page size cannot be
* mapped with strict permissions, as those might interfere
* with the permissions that are needed by the adjacent
* region's mapping. However, if we haven't encountered any
* misaligned runtime code regions so far, we can safely use
* non-executable permissions for non-code regions.
*/
code_is_misaligned |= (type == EFI_RUNTIME_SERVICES_CODE);
return code_is_misaligned ? pgprot_val(PAGE_KERNEL_EXEC)
: pgprot_val(PAGE_KERNEL);
}
/* R-- */
if ((attr & (EFI_MEMORY_XP | EFI_MEMORY_RO)) ==
(EFI_MEMORY_XP | EFI_MEMORY_RO))
return pgprot_val(PAGE_KERNEL_RO);
/* R-X */
if (attr & EFI_MEMORY_RO)
return pgprot_val(PAGE_KERNEL_ROX);
/* RW- */
if (((attr & (EFI_MEMORY_RP | EFI_MEMORY_WP | EFI_MEMORY_XP)) ==
EFI_MEMORY_XP) ||
type != EFI_RUNTIME_SERVICES_CODE)
return pgprot_val(PAGE_KERNEL);
/* RWX */
return pgprot_val(PAGE_KERNEL_EXEC);
}
int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md)
{
pteval_t prot_val = create_mapping_protection(md);
bool page_mappings_only = (md->type == EFI_RUNTIME_SERVICES_CODE ||
md->type == EFI_RUNTIME_SERVICES_DATA);
/*
* If this region is not aligned to the page size used by the OS, the
* mapping will be rounded outwards, and may end up sharing a page
* frame with an adjacent runtime memory region. Given that the page
* table descriptor covering the shared page will be rewritten when the
* adjacent region gets mapped, we must avoid block mappings here so we
* don't have to worry about splitting them when that happens.
*/
if (region_is_misaligned(md))
page_mappings_only = true;
create_pgd_mapping(mm, md->phys_addr, md->virt_addr,
md->num_pages << EFI_PAGE_SHIFT,
__pgprot(prot_val | PTE_NG), page_mappings_only);
return 0;
}
struct set_perm_data {
const efi_memory_desc_t *md;
bool has_bti;
};
static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data)
{
struct set_perm_data *spd = data;
const efi_memory_desc_t *md = spd->md;
pte_t pte = __ptep_get(ptep);
if (md->attribute & EFI_MEMORY_RO)
pte = set_pte_bit(pte, __pgprot(PTE_RDONLY));
if (md->attribute & EFI_MEMORY_XP)
pte = set_pte_bit(pte, __pgprot(PTE_PXN));
else if (system_supports_bti_kernel() && spd->has_bti)
pte = set_pte_bit(pte, __pgprot(PTE_GP));
__set_pte(ptep, pte);
return 0;
}
int __init efi_set_mapping_permissions(struct mm_struct *mm,
efi_memory_desc_t *md,
bool has_bti)
{
struct set_perm_data data = { md, has_bti };
BUG_ON(md->type != EFI_RUNTIME_SERVICES_CODE &&
md->type != EFI_RUNTIME_SERVICES_DATA);
if (region_is_misaligned(md))
return 0;
/*
* Calling apply_to_page_range() is only safe on regions that are
* guaranteed to be mapped down to pages. Since we are only called
* for regions that have been mapped using efi_create_mapping() above
* (and this is checked by the generic Memory Attributes table parsing
* routines), there is no need to check that again here.
*/
return apply_to_page_range(mm, md->virt_addr,
md->num_pages << EFI_PAGE_SHIFT,
set_permissions, &data);
}
/*
* UpdateCapsule() depends on the system being shutdown via
* ResetSystem().
*/
bool efi_poweroff_required(void)
{
return efi_enabled(EFI_RUNTIME_SERVICES);
}
asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f)
{
pr_err_ratelimited(FW_BUG "register x18 corrupted by EFI %s\n", f);
return s;
}
static DEFINE_RAW_SPINLOCK(efi_rt_lock);
void arch_efi_call_virt_setup(void)
{
efi_virtmap_load();
__efi_fpsimd_begin();
raw_spin_lock(&efi_rt_lock);
}
void arch_efi_call_virt_teardown(void)
{
raw_spin_unlock(&efi_rt_lock);
__efi_fpsimd_end();
efi_virtmap_unload();
}
asmlinkage u64 *efi_rt_stack_top __ro_after_init;
asmlinkage efi_status_t __efi_rt_asm_recover(void);
bool efi_runtime_fixup_exception(struct pt_regs *regs, const char *msg)
{
/* Check whether the exception occurred while running the firmware */
if (!current_in_efi() || regs->pc >= TASK_SIZE_64)
return false;
pr_err(FW_BUG "Unable to handle %s in EFI runtime service\n", msg);
add_taint(TAINT_FIRMWARE_WORKAROUND, LOCKDEP_STILL_OK);
clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
regs->regs[0] = EFI_ABORTED;
regs->regs[30] = efi_rt_stack_top[-1];
regs->pc = (u64)__efi_rt_asm_recover;
if (IS_ENABLED(CONFIG_SHADOW_CALL_STACK))
regs->regs[18] = efi_rt_stack_top[-2];
return true;
}
/* EFI requires 8 KiB of stack space for runtime services */
static_assert(THREAD_SIZE >= SZ_8K);
static int __init arm64_efi_rt_init(void)
{
void *p;
if (!efi_enabled(EFI_RUNTIME_SERVICES))
return 0;
p = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, GFP_KERNEL,
NUMA_NO_NODE, &&l);
l: if (!p) {
pr_warn("Failed to allocate EFI runtime stack\n");
clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
return -ENOMEM;
}
efi_rt_stack_top = p + THREAD_SIZE;
return 0;
}
core_initcall(arm64_efi_rt_init);