From 49ccf2c3cafb5774a4562d61294afcf492bed487 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Tue, 19 Nov 2024 12:09:14 -0800 Subject: [PATCH 01/14] arm64: mte: set VM_MTE_ALLOWED for hugetlbfs at correct place The commit 5de195060b2e ("mm: resolve faulty mmap_region() error path behaviour") moved vm flags validation before fop->mmap for file mappings. But when commit 25c17c4b55de ("hugetlb: arm64: add mte support") was rebased on top of it, the hugetlbfs part was missed. Mmapping hugetlbfs file may not have MAP_HUGETLB set. Fixes: 25c17c4b55de ("hugetlb: arm64: add mte support") Signed-off-by: Yang Shi Reviewed-by: Catalin Marinas Link: https://lore.kernel.org/r/20241119200914.1145249-1-yang@os.amperecomputing.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/mman.h | 3 ++- fs/hugetlbfs/inode.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h index 1d53022fc7e1..21df8bbd2668 100644 --- a/arch/arm64/include/asm/mman.h +++ b/arch/arm64/include/asm/mman.h @@ -7,6 +7,7 @@ #ifndef BUILD_VDSO #include #include +#include #include #include @@ -44,7 +45,7 @@ static inline unsigned long arch_calc_vm_flag_bits(struct file *file, if (system_supports_mte()) { if (flags & (MAP_ANONYMOUS | MAP_HUGETLB)) return VM_MTE_ALLOWED; - if (shmem_file(file)) + if (shmem_file(file) || is_file_hugepages(file)) return VM_MTE_ALLOWED; } diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index a4441fb77f7c..90f883d6b8fd 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -113,7 +113,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) * way when do_mmap unwinds (may be important on powerpc * and ia64). */ - vm_flags_set(vma, VM_HUGETLB | VM_DONTEXPAND | VM_MTE_ALLOWED); + vm_flags_set(vma, VM_HUGETLB | VM_DONTEXPAND); vma->vm_ops = &hugetlb_vm_ops; ret = seal_check_write(info->seals, vma); From 56a708742a8bf127eb66798bfc9c9516c61f9930 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Mon, 25 Nov 2024 09:16:50 -0800 Subject: [PATCH 02/14] arm64: mm: Fix zone_dma_limit calculation Commit ba0fb44aed47 ("dma-mapping: replace zone_dma_bits by zone_dma_limit") and subsequent patches changed how zone_dma_limit is calculated to allow a reduced ZONE_DMA even when RAM starts above 4GB. Commit 122c234ef4e1 ("arm64: mm: keep low RAM dma zone") further fixed this to ensure ZONE_DMA remains below U32_MAX if RAM starts below 4GB, especially on platforms that do not have IORT or DT description of the device DMA ranges. While zone boundaries calculation was fixed by the latter commit, zone_dma_limit, used to determine the GFP_DMA flag in the core code, was not updated. This results in excessive use of GFP_DMA and unnecessary ZONE_DMA allocations on some platforms. Update zone_dma_limit to match the actual upper bound of ZONE_DMA. Fixes: ba0fb44aed47 ("dma-mapping: replace zone_dma_bits by zone_dma_limit") Cc: # 6.12.x Reported-by: Yutang Jiang Tested-by: Yutang Jiang Signed-off-by: Yang Shi Link: https://lore.kernel.org/r/20241125171650.77424-1-yang@os.amperecomputing.com [catalin.marinas@arm.com: some tweaking of the commit log] Signed-off-by: Catalin Marinas --- arch/arm64/mm/init.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index d21f67d67cf5..ccdef53872a0 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -117,15 +117,6 @@ static void __init arch_reserve_crashkernel(void) static phys_addr_t __init max_zone_phys(phys_addr_t zone_limit) { - /** - * Information we get from firmware (e.g. DT dma-ranges) describe DMA - * bus constraints. Devices using DMA might have their own limitations. - * Some of them rely on DMA zone in low 32-bit memory. Keep low RAM - * DMA zone on platforms that have RAM there. - */ - if (memblock_start_of_DRAM() < U32_MAX) - zone_limit = min(zone_limit, U32_MAX); - return min(zone_limit, memblock_end_of_DRAM() - 1) + 1; } @@ -141,6 +132,14 @@ static void __init zone_sizes_init(void) acpi_zone_dma_limit = acpi_iort_dma_get_max_cpu_address(); dt_zone_dma_limit = of_dma_get_max_cpu_address(NULL); zone_dma_limit = min(dt_zone_dma_limit, acpi_zone_dma_limit); + /* + * Information we get from firmware (e.g. DT dma-ranges) describe DMA + * bus constraints. Devices using DMA might have their own limitations. + * Some of them rely on DMA zone in low 32-bit memory. Keep low RAM + * DMA zone on platforms that have RAM there. + */ + if (memblock_start_of_DRAM() < U32_MAX) + zone_dma_limit = min(zone_dma_limit, U32_MAX); arm64_dma_phys_limit = max_zone_phys(zone_dma_limit); max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit); #endif From 8d09e2d569f6e34301387f24433b42062517ca85 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 2 Dec 2024 17:03:59 +0000 Subject: [PATCH 03/14] arm64: patching: avoid early page_to_phys() When arm64 is configured with CONFIG_DEBUG_VIRTUAL=y, a warning is printed from the patching code because patch_map(), e.g. | ------------[ cut here ]------------ | WARNING: CPU: 0 PID: 0 at arch/arm64/kernel/patching.c:45 patch_map.constprop.0+0x120/0xd00 | CPU: 0 UID: 0 PID: 0 Comm: swapper Not tainted 6.13.0-rc1-00002-ge1a5d6c6be55 #1 | Hardware name: linux,dummy-virt (DT) | pstate: 800003c5 (Nzcv DAIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--) | pc : patch_map.constprop.0+0x120/0xd00 | lr : patch_map.constprop.0+0x120/0xd00 | sp : ffffa9bb312a79a0 | x29: ffffa9bb312a79a0 x28: 0000000000000001 x27: 0000000000000001 | x26: 0000000000000000 x25: 0000000000000000 x24: 00000000000402e8 | x23: ffffa9bb2c94c1c8 x22: ffffa9bb2c94c000 x21: ffffa9bb222e883c | x20: 0000000000000002 x19: ffffc1ffc100ba40 x18: ffffa9bb2cf0f21c | x17: 0000000000000006 x16: 0000000000000000 x15: 0000000000000004 | x14: 1ffff5376625b4ac x13: ffff753766a67fb8 x12: ffff753766919cd1 | x11: 0000000000000003 x10: 1ffff5376625b4c3 x9 : 1ffff5376625b4af | x8 : ffff753766254f0a x7 : 0000000041b58ab3 x6 : ffff753766254f18 | x5 : ffffa9bb312d9bc0 x4 : 0000000000000000 x3 : ffffa9bb29bd90e4 | x2 : 0000000000000002 x1 : ffffa9bb312d9bc0 x0 : 0000000000000000 | Call trace: | patch_map.constprop.0+0x120/0xd00 (P) | patch_map.constprop.0+0x120/0xd00 (L) | __aarch64_insn_write+0xa8/0x120 | aarch64_insn_patch_text_nosync+0x4c/0xb8 | arch_jump_label_transform_queue+0x7c/0x100 | jump_label_update+0x154/0x460 | static_key_enable_cpuslocked+0x1d8/0x280 | static_key_enable+0x2c/0x48 | early_randomize_kstack_offset+0x104/0x168 | do_early_param+0xe4/0x148 | parse_args+0x3a4/0x838 | parse_early_options+0x50/0x68 | parse_early_param+0x58/0xe0 | setup_arch+0x78/0x1f0 | start_kernel+0xa0/0x530 | __primary_switched+0x8c/0xa0 | irq event stamp: 0 | hardirqs last enabled at (0): [<0000000000000000>] 0x0 | hardirqs last disabled at (0): [<0000000000000000>] 0x0 | softirqs last enabled at (0): [<0000000000000000>] 0x0 | softirqs last disabled at (0): [<0000000000000000>] 0x0 | ---[ end trace 0000000000000000 ]--- The warning has been produced since commit: 3e25d5a49f99b75b ("asm-generic: add an optional pfn_valid check to page_to_phys") ... which added a pfn_valid() check into page_to_phys(), and at this point in boot pfn_valid() will always return false because the vmemmap has not yet been initialized and there are no valid mem_sections yet. Before that commit, the arithmetic performed by page_to_phys() would give the expected physical address, though it is somewhat dubious to use vmemmap addresses before the vmemmap has been initialized. Aside from kernel image addresses, all executable code should be allocated from execmem (where all allocations will fall within the vmalloc area), and so there's no need for the fallback case when CONFIG_EXECMEM=n. Simplify patch_map() accordingly, directly converting kernel image addresses and removing the redundant fallback case. Fixes: 3e25d5a49f99 ("asm-generic: add an optional pfn_valid check to page_to_phys") Signed-off-by: Mark Rutland Cc: Arnd Bergmann Cc: Christoph Hellwig Cc: Mike Rapoport Cc: Thomas Huth Cc: Will Deacon Link: https://lore.kernel.org/r/20241202170359.1475019-1-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/patching.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/arch/arm64/kernel/patching.c b/arch/arm64/kernel/patching.c index 7f99723fbb8c..1041bc67a3ee 100644 --- a/arch/arm64/kernel/patching.c +++ b/arch/arm64/kernel/patching.c @@ -30,20 +30,17 @@ static bool is_image_text(unsigned long addr) static void __kprobes *patch_map(void *addr, int fixmap) { - unsigned long uintaddr = (uintptr_t) addr; - bool image = is_image_text(uintaddr); - struct page *page; + phys_addr_t phys; - if (image) - page = phys_to_page(__pa_symbol(addr)); - else if (IS_ENABLED(CONFIG_EXECMEM)) - page = vmalloc_to_page(addr); - else - return addr; + if (is_image_text((unsigned long)addr)) { + phys = __pa_symbol(addr); + } else { + struct page *page = vmalloc_to_page(addr); + BUG_ON(!page); + phys = page_to_phys(page) + offset_in_page(addr); + } - BUG_ON(!page); - return (void *)set_fixmap_offset(fixmap, page_to_phys(page) + - (uintaddr & ~PAGE_MASK)); + return (void *)set_fixmap_offset(fixmap, phys); } static void __kprobes patch_unmap(int fixmap) From d44679fb954ffea961036ed1aeb7d65035f78489 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 2 Dec 2024 14:57:29 +0000 Subject: [PATCH 04/14] drivers/virt: pkvm: Don't fail ioremap() call if MMIO_GUARD fails Calling the MMIO_GUARD hypercall from guests which have not been enrolled (e.g. because they are running without pvmfw) results in -EINVAL being returned. In this case, MMIO_GUARD is not active and so we can simply proceed with the normal ioremap() routine. Don't fail ioremap() if MMIO_GUARD fails; instead WARN_ON_ONCE() to highlight that the pvm environment is slightly wonky. Fixes: 0f1269495800 ("drivers/virt: pkvm: Intercept ioremap using pKVM MMIO_GUARD hypercall") Signed-off-by: Will Deacon Link: https://lore.kernel.org/r/20241202145731.6422-2-will@kernel.org Signed-off-by: Catalin Marinas --- drivers/virt/coco/pkvm-guest/arm-pkvm-guest.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/virt/coco/pkvm-guest/arm-pkvm-guest.c b/drivers/virt/coco/pkvm-guest/arm-pkvm-guest.c index 56a3859dda8a..4230b817a80b 100644 --- a/drivers/virt/coco/pkvm-guest/arm-pkvm-guest.c +++ b/drivers/virt/coco/pkvm-guest/arm-pkvm-guest.c @@ -87,12 +87,8 @@ static int mmio_guard_ioremap_hook(phys_addr_t phys, size_t size, while (phys < end) { const int func_id = ARM_SMCCC_VENDOR_HYP_KVM_MMIO_GUARD_FUNC_ID; - int err; - - err = arm_smccc_do_one_page(func_id, phys); - if (err) - return err; + WARN_ON_ONCE(arm_smccc_do_one_page(func_id, phys)); phys += PAGE_SIZE; } From 92230596252ab6155f2d7f7ff9fa61425800a13f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 2 Dec 2024 14:57:30 +0000 Subject: [PATCH 05/14] MAINTAINERS: Add CCA and pKVM CoCO guest support to the ARM64 entry Commits 7999edc484ca ("virt: arm-cca-guest: TSM_REPORT support for realm") and a06c3fad49a5 ("drivers/virt: pkvm: Add initial support for running as a protected guest") added arm64 guest-side support for running in CCA and pKVM confidential computing environments respectively. Unfortunately, these changes were not accompanied by a MAINTAINERS entry and so aren't automatically picked up by the get_maintainer.pl script. Since the initial support was merged via the arm64 tree, extend the ARM64 entry to cover the two new directories. Cc: Marc Zyngier Cc: Oliver Upton Cc: Suzuki K Poulose Signed-off-by: Will Deacon Acked-by: Suzuki K Poulose Acked-by: Catalin Marinas Link: https://lore.kernel.org/r/20241202145731.6422-3-will@kernel.org Signed-off-by: Catalin Marinas --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 1e930c7a58b1..9378c0c4bbc9 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3376,6 +3376,8 @@ S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux.git F: Documentation/arch/arm64/ F: arch/arm64/ +F: drivers/virt/coco/arm-cca-guest/ +F: drivers/virt/coco/pkvm-guest/ F: tools/testing/selftests/arm64/ X: arch/arm64/boot/dts/ From c2b46ae022704a2d845e59461fa24431ad627022 Mon Sep 17 00:00:00 2001 From: Qinxin Xia Date: Thu, 5 Dec 2024 09:33:31 +0800 Subject: [PATCH 06/14] ACPI/IORT: Add PMCG platform information for HiSilicon HIP09A HiSilicon HIP09A platforms using the same SMMU PMCG with HIP09 and thus suffers the same erratum. List them in the PMCG platform information list without introducing a new SMMU PMCG Model. Update the silicon-errata.rst as well. Reviewed-by: Yicong Yang Acked-by: Hanjun Guo Signed-off-by: Qinxin Xia Link: https://lore.kernel.org/r/20241205013331.1484017-1-xiaqinxin@huawei.com Signed-off-by: Catalin Marinas --- Documentation/arch/arm64/silicon-errata.rst | 5 +++-- drivers/acpi/arm64/iort.c | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index 77db10e944f0..b42fea07c5ce 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -255,8 +255,9 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | Hisilicon | Hip08 SMMU PMCG | #162001800 | N/A | +----------------+-----------------+-----------------+-----------------------------+ -| Hisilicon | Hip{08,09,10,10C| #162001900 | N/A | -| | ,11} SMMU PMCG | | | +| Hisilicon | Hip{08,09,09A,10| #162001900 | N/A | +| | ,10C,11} | | | +| | SMMU PMCG | | | +----------------+-----------------+-----------------+-----------------------------+ | Hisilicon | Hip09 | #162100801 | HISILICON_ERRATUM_162100801 | +----------------+-----------------+-----------------+-----------------------------+ diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 1f7e4c691d9e..98759d6199d3 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1716,6 +1716,8 @@ static struct acpi_platform_list pmcg_plat_info[] __initdata = { /* HiSilicon Hip09 Platform */ {"HISI ", "HIP09 ", 0, ACPI_SIG_IORT, greater_than_or_equal, "Erratum #162001900", IORT_SMMU_V3_PMCG_HISI_HIP09}, + {"HISI ", "HIP09A ", 0, ACPI_SIG_IORT, greater_than_or_equal, + "Erratum #162001900", IORT_SMMU_V3_PMCG_HISI_HIP09}, /* HiSilicon Hip10/11 Platform uses the same SMMU IP with Hip09 */ {"HISI ", "HIP10 ", 0, ACPI_SIG_IORT, greater_than_or_equal, "Erratum #162001900", IORT_SMMU_V3_PMCG_HISI_HIP09}, From c0900d15d31c2597dd9f634c8be2b71762199890 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 3 Dec 2024 15:19:41 +0000 Subject: [PATCH 07/14] arm64: Ensure bits ASID[15:8] are masked out when the kernel uses 8-bit ASIDs Linux currently sets the TCR_EL1.AS bit unconditionally during CPU bring-up. On an 8-bit ASID CPU, this is RES0 and ignored, otherwise 16-bit ASIDs are enabled. However, if running in a VM and the hypervisor reports 8-bit ASIDs (ID_AA64MMFR0_EL1.ASIDBits == 0) on a 16-bit ASIDs CPU, Linux uses bits 8 to 63 as a generation number for tracking old process ASIDs. The bottom 8 bits of this generation end up being written to TTBR1_EL1 and also used for the ASID-based TLBI operations as the upper 8 bits of the ASID. Following an ASID roll-over event we can have threads of the same application with the same 8-bit ASID but different generation numbers running on separate CPUs. Both TLB caching and the TLBI operations will end up using different actual 16-bit ASIDs for the same process. A similar scenario can happen in a big.LITTLE configuration if the boot CPU only uses 8-bit ASIDs while secondary CPUs have 16-bit ASIDs. Ensure that the ASID generation is only tracked by bits 16 and up, leaving bits 15:8 as 0 if the kernel uses 8-bit ASIDs. Note that clearing TCR_EL1.AS is not sufficient since the architecture requires that the top 8 bits of the ASID passed to TLBI instructions are 0 rather than ignored in such configuration. Cc: stable@vger.kernel.org Cc: Will Deacon Cc: Mark Rutland Cc: Marc Zyngier Cc: James Morse Acked-by: Mark Rutland Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20241203151941.353796-1-catalin.marinas@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/mm/context.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 188197590fc9..b2ac06246327 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -32,9 +32,9 @@ static unsigned long nr_pinned_asids; static unsigned long *pinned_asid_map; #define ASID_MASK (~GENMASK(asid_bits - 1, 0)) -#define ASID_FIRST_VERSION (1UL << asid_bits) +#define ASID_FIRST_VERSION (1UL << 16) -#define NUM_USER_ASIDS ASID_FIRST_VERSION +#define NUM_USER_ASIDS (1UL << asid_bits) #define ctxid2asid(asid) ((asid) & ~ASID_MASK) #define asid2ctxid(asid, genid) ((asid) | (genid)) From cf3b16dae4cab9dfa2436d76ce010ad4dd55b53a Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 4 Dec 2024 17:50:04 +0000 Subject: [PATCH 08/14] arm64: mte: Fix copy_highpage() warning on hugetlb folios Commit 25c17c4b55de ("hugetlb: arm64: add mte support") improved the copy_highpage() function to update the tags in the destination hugetlb folio. However, when the source folio isn't tagged, the code takes the non-hugetlb path where try_page_mte_tagging() warns as the destination is a hugetlb folio: WARNING: CPU: 0 PID: 363 at arch/arm64/include/asm/mte.h:58 copy_highpage+0x1d4/0x2d8 [...] pc : copy_highpage+0x1d4/0x2d8 lr : copy_highpage+0x78/0x2d8 [...] Call trace: copy_highpage+0x1d4/0x2d8 (P) copy_highpage+0x78/0x2d8 (L) copy_user_highpage+0x20/0x48 copy_user_large_folio+0x1bc/0x268 hugetlb_wp+0x190/0x860 hugetlb_fault+0xa28/0xc10 handle_mm_fault+0x2a0/0x2c0 do_page_fault+0x12c/0x578 do_mem_abort+0x4c/0xa8 el0_da+0x44/0xb0 el0t_64_sync_handler+0xc4/0x138 el0t_64_sync+0x198/0x1a0 Change the check for the tagged status of the source folio so that it does not fall through the non-hugetlb case. In addition, only perform the copy (for the full folio) if the source page is the folio head and warn if the destination folio is already tagged, for symmetry with the non-hugetlb case. Fixes: 25c17c4b55de ("hugetlb: arm64: add mte support") Reported-by: Sasha Levin Cc: Yang Shi Cc: David Hildenbrand Cc: Will Deacon Link: https://lore.kernel.org/r/Z0STR6VLt2MCalnY@sashalap Link: https://lore.kernel.org/r/20241204175004.906754-1-catalin.marinas@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/mm/copypage.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c index 87b3f1a25535..a86c897017df 100644 --- a/arch/arm64/mm/copypage.c +++ b/arch/arm64/mm/copypage.c @@ -30,11 +30,13 @@ void copy_highpage(struct page *to, struct page *from) if (!system_supports_mte()) return; - if (folio_test_hugetlb(src) && - folio_test_hugetlb_mte_tagged(src)) { - if (!folio_try_hugetlb_mte_tagging(dst)) + if (folio_test_hugetlb(src)) { + if (!folio_test_hugetlb_mte_tagged(src) || + from != folio_page(src, 0)) return; + WARN_ON_ONCE(!folio_try_hugetlb_mte_tagging(dst)); + /* * Populate tags for all subpages. * From 16d5306629c06ec2653c12794f344168a0eea809 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Thu, 5 Dec 2024 14:36:34 +0000 Subject: [PATCH 09/14] coco: virt: arm64: Do not enable cca guest driver by default As per the guidelines, new drivers may not be set to default on. An expert user can always select it. Reported-by: Dan Williams Cc: Will Deacon Cc: Steven Price Cc: Sami Mujawar Link: https://lore.kernel.org/r/6750c695194cd_2508129427@dwillia2-xfh.jf.intel.com.notmuch Link: https://lore.kernel.org/r/20241205143634.306114-1-suzuki.poulose@arm.com Signed-off-by: Suzuki K Poulose Reviewed-by: Steven Price Signed-off-by: Catalin Marinas --- drivers/virt/coco/arm-cca-guest/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/virt/coco/arm-cca-guest/Kconfig b/drivers/virt/coco/arm-cca-guest/Kconfig index 9dd27c3ee215..3f0f013f03f1 100644 --- a/drivers/virt/coco/arm-cca-guest/Kconfig +++ b/drivers/virt/coco/arm-cca-guest/Kconfig @@ -1,7 +1,6 @@ config ARM_CCA_GUEST tristate "Arm CCA Guest driver" depends on ARM64 - default m select TSM_REPORTS help The driver provides userspace interface to request and From f00b53f1614f7be554fd28b9594ef4e63e2686c5 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 5 Dec 2024 13:48:10 +0000 Subject: [PATCH 10/14] arm64: cpufeature: Add GCS to cpucap_is_possible() Since system_supports_gcs() ends up referring to cpucap_is_possible(), teach the latter about GCS for consistency with similar features. Signed-off-by: Robin Murphy Acked-by: Mark Rutland Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/416c7369fcdce4ebb2a8f12daae234507be27e38.1733406275.git.robin.murphy@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpucaps.h | 2 ++ arch/arm64/include/asm/cpufeature.h | 3 +-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 201a46efd918..cbbf70e0f204 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -44,6 +44,8 @@ cpucap_is_possible(const unsigned int cap) return IS_ENABLED(CONFIG_ARM64_TLB_RANGE); case ARM64_HAS_S1POE: return IS_ENABLED(CONFIG_ARM64_POE); + case ARM64_HAS_GCS: + return IS_ENABLED(CONFIG_ARM64_GCS); case ARM64_UNMAP_KERNEL_AT_EL0: return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0); case ARM64_WORKAROUND_843419: diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index b64e49bd9d10..8b4e5a3cd24c 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -847,8 +847,7 @@ static inline bool system_supports_poe(void) static inline bool system_supports_gcs(void) { - return IS_ENABLED(CONFIG_ARM64_GCS) && - alternative_has_cap_unlikely(ARM64_HAS_GCS); + return alternative_has_cap_unlikely(ARM64_HAS_GCS); } static inline bool system_supports_haft(void) From ca62d90085f4af36de745883faab9f8a7cbb45d3 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 5 Dec 2024 12:16:52 +0000 Subject: [PATCH 11/14] arm64: ptrace: fix partial SETREGSET for NT_ARM_TAGGED_ADDR_CTRL Currently tagged_addr_ctrl_set() doesn't initialize the temporary 'ctrl' variable, and a SETREGSET call with a length of zero will leave this uninitialized. Consequently tagged_addr_ctrl_set() will consume an arbitrary value, potentially leaking up to 64 bits of memory from the kernel stack. The read is limited to a specific slot on the stack, and the issue does not provide a write mechanism. As set_tagged_addr_ctrl() only accepts values where bits [63:4] zero and rejects other values, a partial SETREGSET attempt will randomly succeed or fail depending on the value of the uninitialized value, and the exposure is significantly limited. Fix this by initializing the temporary value before copying the regset from userspace, as for other regsets (e.g. NT_PRSTATUS, NT_PRFPREG, NT_ARM_SYSTEM_CALL). In the case of a zero-length write, the existing value of the tagged address ctrl will be retained. The NT_ARM_TAGGED_ADDR_CTRL regset is only visible in the user_aarch64_view used by a native AArch64 task to manipulate another native AArch64 task. As get_tagged_addr_ctrl() only returns an error value when called for a compat task, tagged_addr_ctrl_get() and tagged_addr_ctrl_set() should never observe an error value from get_tagged_addr_ctrl(). Add a WARN_ON_ONCE() to both to indicate that such an error would be unexpected, and error handlnig is not missing in either case. Fixes: 2200aa7154cb ("arm64: mte: ptrace: Add NT_ARM_TAGGED_ADDR_CTRL regset") Cc: # 5.10.x Signed-off-by: Mark Rutland Cc: Will Deacon Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20241205121655.1824269-2-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/ptrace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index e4437f62a2cd..8525d03f0fb4 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1427,7 +1427,7 @@ static int tagged_addr_ctrl_get(struct task_struct *target, { long ctrl = get_tagged_addr_ctrl(target); - if (IS_ERR_VALUE(ctrl)) + if (WARN_ON_ONCE(IS_ERR_VALUE(ctrl))) return ctrl; return membuf_write(&to, &ctrl, sizeof(ctrl)); @@ -1441,6 +1441,10 @@ static int tagged_addr_ctrl_set(struct task_struct *target, const struct int ret; long ctrl; + ctrl = get_tagged_addr_ctrl(target); + if (WARN_ON_ONCE(IS_ERR_VALUE(ctrl))) + return ctrl; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1); if (ret) return ret; From f5d71291841aecfe5d8435da2dfa7f58ccd18bc8 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 5 Dec 2024 12:16:53 +0000 Subject: [PATCH 12/14] arm64: ptrace: fix partial SETREGSET for NT_ARM_FPMR Currently fpmr_set() doesn't initialize the temporary 'fpmr' variable, and a SETREGSET call with a length of zero will leave this uninitialized. Consequently an arbitrary value will be written back to target->thread.uw.fpmr, potentially leaking up to 64 bits of memory from the kernel stack. The read is limited to a specific slot on the stack, and the issue does not provide a write mechanism. Fix this by initializing the temporary value before copying the regset from userspace, as for other regsets (e.g. NT_PRSTATUS, NT_PRFPREG, NT_ARM_SYSTEM_CALL). In the case of a zero-length write, the existing contents of FPMR will be retained. Before this patch: | # ./fpmr-test | Attempting to write NT_ARM_FPMR::fpmr = 0x900d900d900d900d | SETREGSET(nt=0x40e, len=8) wrote 8 bytes | | Attempting to read NT_ARM_FPMR::fpmr | GETREGSET(nt=0x40e, len=8) read 8 bytes | Read NT_ARM_FPMR::fpmr = 0x900d900d900d900d | | Attempting to write NT_ARM_FPMR (zero length) | SETREGSET(nt=0x40e, len=0) wrote 0 bytes | | Attempting to read NT_ARM_FPMR::fpmr | GETREGSET(nt=0x40e, len=8) read 8 bytes | Read NT_ARM_FPMR::fpmr = 0xffff800083963d50 After this patch: | # ./fpmr-test | Attempting to write NT_ARM_FPMR::fpmr = 0x900d900d900d900d | SETREGSET(nt=0x40e, len=8) wrote 8 bytes | | Attempting to read NT_ARM_FPMR::fpmr | GETREGSET(nt=0x40e, len=8) read 8 bytes | Read NT_ARM_FPMR::fpmr = 0x900d900d900d900d | | Attempting to write NT_ARM_FPMR (zero length) | SETREGSET(nt=0x40e, len=0) wrote 0 bytes | | Attempting to read NT_ARM_FPMR::fpmr | GETREGSET(nt=0x40e, len=8) read 8 bytes | Read NT_ARM_FPMR::fpmr = 0x900d900d900d900d Fixes: 4035c22ef7d4 ("arm64/ptrace: Expose FPMR via ptrace") Cc: # 6.9.x Signed-off-by: Mark Rutland Cc: Mark Brown Cc: Will Deacon Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20241205121655.1824269-3-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/ptrace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 8525d03f0fb4..85c862dc845b 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -720,6 +720,8 @@ static int fpmr_set(struct task_struct *target, const struct user_regset *regset if (!system_supports_fpmr()) return -EINVAL; + fpmr = target->thread.uw.fpmr; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &fpmr, 0, count); if (ret) return ret; From 594bfc4947c4fcabba1318d8384c61a29a6b89fb Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 5 Dec 2024 12:16:54 +0000 Subject: [PATCH 13/14] arm64: ptrace: fix partial SETREGSET for NT_ARM_POE Currently poe_set() doesn't initialize the temporary 'ctrl' variable, and a SETREGSET call with a length of zero will leave this uninitialized. Consequently an arbitrary value will be written back to target->thread.por_el0, potentially leaking up to 64 bits of memory from the kernel stack. The read is limited to a specific slot on the stack, and the issue does not provide a write mechanism. Fix this by initializing the temporary value before copying the regset from userspace, as for other regsets (e.g. NT_PRSTATUS, NT_PRFPREG, NT_ARM_SYSTEM_CALL). In the case of a zero-length write, the existing contents of POR_EL1 will be retained. Before this patch: | # ./poe-test | Attempting to write NT_ARM_POE::por_el0 = 0x900d900d900d900d | SETREGSET(nt=0x40f, len=8) wrote 8 bytes | | Attempting to read NT_ARM_POE::por_el0 | GETREGSET(nt=0x40f, len=8) read 8 bytes | Read NT_ARM_POE::por_el0 = 0x900d900d900d900d | | Attempting to write NT_ARM_POE (zero length) | SETREGSET(nt=0x40f, len=0) wrote 0 bytes | | Attempting to read NT_ARM_POE::por_el0 | GETREGSET(nt=0x40f, len=8) read 8 bytes | Read NT_ARM_POE::por_el0 = 0xffff8000839c3d50 After this patch: | # ./poe-test | Attempting to write NT_ARM_POE::por_el0 = 0x900d900d900d900d | SETREGSET(nt=0x40f, len=8) wrote 8 bytes | | Attempting to read NT_ARM_POE::por_el0 | GETREGSET(nt=0x40f, len=8) read 8 bytes | Read NT_ARM_POE::por_el0 = 0x900d900d900d900d | | Attempting to write NT_ARM_POE (zero length) | SETREGSET(nt=0x40f, len=0) wrote 0 bytes | | Attempting to read NT_ARM_POE::por_el0 | GETREGSET(nt=0x40f, len=8) read 8 bytes | Read NT_ARM_POE::por_el0 = 0x900d900d900d900d Fixes: 175198199262 ("arm64/ptrace: add support for FEAT_POE") Cc: # 6.12.x Signed-off-by: Mark Rutland Cc: Joey Gouly Cc: Will Deacon Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20241205121655.1824269-4-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/ptrace.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 85c862dc845b..f17810030fa0 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1478,6 +1478,8 @@ static int poe_set(struct task_struct *target, const struct if (!system_supports_poe()) return -EINVAL; + ctrl = target->thread.por_el0; + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &ctrl, 0, -1); if (ret) return ret; From d60624f72d15862a96965b945f6ddfee9a1359e7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 5 Dec 2024 12:16:55 +0000 Subject: [PATCH 14/14] arm64: ptrace: fix partial SETREGSET for NT_ARM_GCS Currently gcs_set() doesn't initialize the temporary 'user_gcs' variable, and a SETREGSET call with a length of 0, 8, or 16 will leave some portion of this uninitialized. Consequently some arbitrary uninitialized values may be written back to the relevant fields in task struct, potentially leaking up to 192 bits of memory from the kernel stack. The read is limited to a specific slot on the stack, and the issue does not provide a write mechanism. As gcs_set() rejects cases where user_gcs::features_enabled has bits set other than PR_SHADOW_STACK_SUPPORTED_STATUS_MASK, a SETREGSET call with a length of zero will randomly succeed or fail depending on the value of the uninitialized value, it isn't possible to leak the full 192 bits. With a length of 8 or 16, user_gcs::features_enabled can be initialized to an accepted value, making it practical to leak 128 or 64 bits. Fix this by initializing the temporary value before copying the regset from userspace, as for other regsets (e.g. NT_PRSTATUS, NT_PRFPREG, NT_ARM_SYSTEM_CALL). In the case of a zero-length or partial write, the existing contents of the fields which are not written to will be retained. To ensure that the extraction and insertion of fields is consistent across the GETREGSET and SETREGSET calls, new task_gcs_to_user() and task_gcs_from_user() helpers are added, matching the style of pac_address_keys_to_user() and pac_address_keys_from_user(). Before this patch: | # ./gcs-test | Attempting to write NT_ARM_GCS::user_gcs = { | .features_enabled = 0x0000000000000000, | .features_locked = 0x0000000000000000, | .gcspr_el0 = 0x900d900d900d900d, | } | SETREGSET(nt=0x410, len=24) wrote 24 bytes | | Attempting to read NT_ARM_GCS::user_gcs | GETREGSET(nt=0x410, len=24) read 24 bytes | Read NT_ARM_GCS::user_gcs = { | .features_enabled = 0x0000000000000000, | .features_locked = 0x0000000000000000, | .gcspr_el0 = 0x900d900d900d900d, | } | | Attempting partial write NT_ARM_GCS::user_gcs = { | .features_enabled = 0x0000000000000000, | .features_locked = 0x1de7ec7edbadc0de, | .gcspr_el0 = 0x1de7ec7edbadc0de, | } | SETREGSET(nt=0x410, len=8) wrote 8 bytes | | Attempting to read NT_ARM_GCS::user_gcs | GETREGSET(nt=0x410, len=24) read 24 bytes | Read NT_ARM_GCS::user_gcs = { | .features_enabled = 0x0000000000000000, | .features_locked = 0x000000000093e780, | .gcspr_el0 = 0xffff800083a63d50, | } After this patch: | # ./gcs-test | Attempting to write NT_ARM_GCS::user_gcs = { | .features_enabled = 0x0000000000000000, | .features_locked = 0x0000000000000000, | .gcspr_el0 = 0x900d900d900d900d, | } | SETREGSET(nt=0x410, len=24) wrote 24 bytes | | Attempting to read NT_ARM_GCS::user_gcs | GETREGSET(nt=0x410, len=24) read 24 bytes | Read NT_ARM_GCS::user_gcs = { | .features_enabled = 0x0000000000000000, | .features_locked = 0x0000000000000000, | .gcspr_el0 = 0x900d900d900d900d, | } | | Attempting partial write NT_ARM_GCS::user_gcs = { | .features_enabled = 0x0000000000000000, | .features_locked = 0x1de7ec7edbadc0de, | .gcspr_el0 = 0x1de7ec7edbadc0de, | } | SETREGSET(nt=0x410, len=8) wrote 8 bytes | | Attempting to read NT_ARM_GCS::user_gcs | GETREGSET(nt=0x410, len=24) read 24 bytes | Read NT_ARM_GCS::user_gcs = { | .features_enabled = 0x0000000000000000, | .features_locked = 0x0000000000000000, | .gcspr_el0 = 0x900d900d900d900d, | } Fixes: 7ec3b57cb29f ("arm64/ptrace: Expose GCS via ptrace and core files") Signed-off-by: Mark Rutland Cc: Mark Brown Cc: Will Deacon Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20241205121655.1824269-5-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/ptrace.c | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index f17810030fa0..f79b0d5f71ac 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1491,6 +1491,22 @@ static int poe_set(struct task_struct *target, const struct #endif #ifdef CONFIG_ARM64_GCS +static void task_gcs_to_user(struct user_gcs *user_gcs, + const struct task_struct *target) +{ + user_gcs->features_enabled = target->thread.gcs_el0_mode; + user_gcs->features_locked = target->thread.gcs_el0_locked; + user_gcs->gcspr_el0 = target->thread.gcspr_el0; +} + +static void task_gcs_from_user(struct task_struct *target, + const struct user_gcs *user_gcs) +{ + target->thread.gcs_el0_mode = user_gcs->features_enabled; + target->thread.gcs_el0_locked = user_gcs->features_locked; + target->thread.gcspr_el0 = user_gcs->gcspr_el0; +} + static int gcs_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) @@ -1503,9 +1519,7 @@ static int gcs_get(struct task_struct *target, if (target == current) gcs_preserve_current_state(); - user_gcs.features_enabled = target->thread.gcs_el0_mode; - user_gcs.features_locked = target->thread.gcs_el0_locked; - user_gcs.gcspr_el0 = target->thread.gcspr_el0; + task_gcs_to_user(&user_gcs, target); return membuf_write(&to, &user_gcs, sizeof(user_gcs)); } @@ -1521,6 +1535,8 @@ static int gcs_set(struct task_struct *target, const struct if (!system_supports_gcs()) return -EINVAL; + task_gcs_to_user(&user_gcs, target); + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, &user_gcs, 0, -1); if (ret) return ret; @@ -1528,9 +1544,7 @@ static int gcs_set(struct task_struct *target, const struct if (user_gcs.features_enabled & ~PR_SHADOW_STACK_SUPPORTED_STATUS_MASK) return -EINVAL; - target->thread.gcs_el0_mode = user_gcs.features_enabled; - target->thread.gcs_el0_locked = user_gcs.features_locked; - target->thread.gcspr_el0 = user_gcs.gcspr_el0; + task_gcs_from_user(target, &user_gcs); return 0; }